From 884de944e616b26580830817486bb85e74f1e7c4 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 20 Nov 2025 22:17:34 +0000 Subject: [PATCH 01/14] feat(api): api update --- .stats.yml | 2 +- .../types/projects/query_log_list_by_group_response.py | 6 ------ .../types/projects/query_log_list_groups_response.py | 6 ------ src/codex/types/projects/query_log_list_response.py | 6 ------ .../types/projects/query_log_retrieve_response.py | 10 ++++------ .../remediation_list_resolved_logs_response.py | 6 ------ .../remediations/expert_review_list_response.py | 2 ++ .../remediations/expert_review_retrieve_response.py | 2 ++ 8 files changed, 9 insertions(+), 31 deletions(-) diff --git a/.stats.yml b/.stats.yml index c021c17..5104bd7 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 70 -openapi_spec_hash: 9018ebfb2a9e1afa87058b3a4bd41b0b +openapi_spec_hash: 3e4a110bf3d3aca12173e6023e09f606 config_hash: aad16f20fed13ac50211fc1d0e2ea621 diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index 1a89baa..e0c732b 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -467,12 +467,6 @@ class QueryLogsByGroupQueryLog(BaseModel): expert review. Expert review will override the original guardrail decision. """ - expert_override_log_id: Optional[str] = None - """ - ID of the query log with expert review that overrode the original guardrail - decision. - """ - expert_review_created_at: Optional[datetime] = None """When the expert review was created""" diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index fe70223..fd8edad 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -462,12 +462,6 @@ class QueryLogListGroupsResponse(BaseModel): expert review. Expert review will override the original guardrail decision. """ - expert_override_log_id: Optional[str] = None - """ - ID of the query log with expert review that overrode the original guardrail - decision. - """ - expert_review_created_at: Optional[datetime] = None """When the expert review was created""" diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index dc7768f..f5d7070 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -450,12 +450,6 @@ class QueryLogListResponse(BaseModel): expert review. Expert review will override the original guardrail decision. """ - expert_override_log_id: Optional[str] = None - """ - ID of the query log with expert review that overrode the original guardrail - decision. - """ - expert_review_created_at: Optional[datetime] = None """When the expert review was created""" diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index db91943..69388b9 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -367,6 +367,8 @@ class QueryLogRetrieveResponse(BaseModel): expert_answer_id: Optional[str] = None + expert_override_log_id: Optional[str] = None + formatted_escalation_eval_scores: Optional[Dict[str, FormattedEscalationEvalScores]] = None formatted_eval_scores: Optional[Dict[str, FormattedEvalScores]] = None @@ -392,6 +394,8 @@ class QueryLogRetrieveResponse(BaseModel): issue_status: Literal["addressed", "unaddressed"] """Manual review status override for remediations.""" + log_needs_review: bool + needs_review: bool project_id: str @@ -457,12 +461,6 @@ class QueryLogRetrieveResponse(BaseModel): expert review. Expert review will override the original guardrail decision. """ - expert_override_log_id: Optional[str] = None - """ - ID of the query log with expert review that overrode the original guardrail - decision. - """ - expert_review_created_at: Optional[datetime] = None """When the expert review was created""" diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 9f1b77b..eef33f8 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -457,12 +457,6 @@ class QueryLog(BaseModel): expert review. Expert review will override the original guardrail decision. """ - expert_override_log_id: Optional[str] = None - """ - ID of the query log with expert review that overrode the original guardrail - decision. - """ - expert_review_created_at: Optional[datetime] = None """When the expert review was created""" diff --git a/src/codex/types/projects/remediations/expert_review_list_response.py b/src/codex/types/projects/remediations/expert_review_list_response.py index 99d26ab..eadb974 100644 --- a/src/codex/types/projects/remediations/expert_review_list_response.py +++ b/src/codex/types/projects/remediations/expert_review_list_response.py @@ -16,6 +16,8 @@ class ExpertReviewListResponse(BaseModel): evaluated_response: Optional[str] = None + expert_override_log_id: str + last_edited_at: datetime last_edited_by: Optional[str] = None diff --git a/src/codex/types/projects/remediations/expert_review_retrieve_response.py b/src/codex/types/projects/remediations/expert_review_retrieve_response.py index 9cb0da6..f48fac2 100644 --- a/src/codex/types/projects/remediations/expert_review_retrieve_response.py +++ b/src/codex/types/projects/remediations/expert_review_retrieve_response.py @@ -16,6 +16,8 @@ class ExpertReviewRetrieveResponse(BaseModel): evaluated_response: Optional[str] = None + expert_override_log_id: str + last_edited_at: datetime last_edited_by: Optional[str] = None From 4dec29cdf74dd3beeccf326678db7170156f0c44 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Sat, 22 Nov 2025 03:23:13 +0000 Subject: [PATCH 02/14] chore: add Python 3.14 classifier and testing --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5fb2418..2b09b3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Operating System :: POSIX", "Operating System :: MacOS", From e7450638a2a002030ae88cc997b399084821bba2 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 18:17:49 +0000 Subject: [PATCH 03/14] codegen metadata --- .stats.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 5104bd7..220b7de 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 70 -openapi_spec_hash: 3e4a110bf3d3aca12173e6023e09f606 +openapi_spec_hash: 022a28b575651c2f21f43b1148141ce3 config_hash: aad16f20fed13ac50211fc1d0e2ea621 From 94bacaf492809bc9bc15175d272de53ad2569895 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 26 Nov 2025 18:17:48 +0000 Subject: [PATCH 04/14] feat(api): api update --- .stats.yml | 2 +- .../types/projects/query_log_list_by_group_response.py | 9 +++++++++ .../types/projects/query_log_list_groups_response.py | 9 +++++++++ src/codex/types/projects/query_log_list_response.py | 9 +++++++++ src/codex/types/projects/query_log_retrieve_response.py | 9 +++++++++ .../projects/remediation_list_resolved_logs_response.py | 9 +++++++++ 6 files changed, 46 insertions(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 220b7de..7a6c349 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 70 -openapi_spec_hash: 022a28b575651c2f21f43b1148141ce3 +openapi_spec_hash: 11279400677011ad5dc1ebba33216ae4 config_hash: aad16f20fed13ac50211fc1d0e2ea621 diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index e0c732b..34b722b 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -523,6 +523,15 @@ class QueryLogsByGroupQueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + system_prompt: Optional[str] = None + """ + Content of the first system message associated with this query log, if + available. + """ + + system_prompt_hash: Optional[str] = None + """SHA-256 hash of the system prompt content for quick equality checks.""" + tools: Optional[List[QueryLogsByGroupQueryLogTool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index fd8edad..1587af7 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -518,6 +518,15 @@ class QueryLogListGroupsResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + system_prompt: Optional[str] = None + """ + Content of the first system message associated with this query log, if + available. + """ + + system_prompt_hash: Optional[str] = None + """SHA-256 hash of the system prompt content for quick equality checks.""" + tools: Optional[List[Tool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index f5d7070..e71f05b 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -503,6 +503,15 @@ class QueryLogListResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + system_prompt: Optional[str] = None + """ + Content of the first system message associated with this query log, if + available. + """ + + system_prompt_hash: Optional[str] = None + """SHA-256 hash of the system prompt content for quick equality checks.""" + tools: Optional[List[Tool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 69388b9..6116840 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -517,6 +517,15 @@ class QueryLogRetrieveResponse(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + system_prompt: Optional[str] = None + """ + Content of the first system message associated with this query log, if + available. + """ + + system_prompt_hash: Optional[str] = None + """SHA-256 hash of the system prompt content for quick equality checks.""" + tools: Optional[List[Tool]] = None """Tools to use for the LLM call. diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index eef33f8..986f898 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -510,6 +510,15 @@ class QueryLog(BaseModel): primary_eval_issue_score: Optional[float] = None """Score of the primary eval issue""" + system_prompt: Optional[str] = None + """ + Content of the first system message associated with this query log, if + available. + """ + + system_prompt_hash: Optional[str] = None + """SHA-256 hash of the system prompt content for quick equality checks.""" + tools: Optional[List[QueryLogTool]] = None """Tools to use for the LLM call. From 2c971c4a93b0e407737648e83e555dc6c9b3a759 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 28 Nov 2025 03:08:28 +0000 Subject: [PATCH 05/14] fix: ensure streams are always closed --- src/codex/_streaming.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/codex/_streaming.py b/src/codex/_streaming.py index d9c4a80..e6c997e 100644 --- a/src/codex/_streaming.py +++ b/src/codex/_streaming.py @@ -54,11 +54,12 @@ def __stream__(self) -> Iterator[_T]: process_data = self._client._process_response_data iterator = self._iter_events() - for sse in iterator: - yield process_data(data=sse.json(), cast_to=cast_to, response=response) - - # As we might not fully consume the response stream, we need to close it explicitly - response.close() + try: + for sse in iterator: + yield process_data(data=sse.json(), cast_to=cast_to, response=response) + finally: + # Ensure the response is closed even if the consumer doesn't read all data + response.close() def __enter__(self) -> Self: return self @@ -117,11 +118,12 @@ async def __stream__(self) -> AsyncIterator[_T]: process_data = self._client._process_response_data iterator = self._iter_events() - async for sse in iterator: - yield process_data(data=sse.json(), cast_to=cast_to, response=response) - - # As we might not fully consume the response stream, we need to close it explicitly - await response.aclose() + try: + async for sse in iterator: + yield process_data(data=sse.json(), cast_to=cast_to, response=response) + finally: + # Ensure the response is closed even if the consumer doesn't read all data + await response.aclose() async def __aenter__(self) -> Self: return self From 18285268b4eec848b2be2df65cdbdf960424f72d Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 28 Nov 2025 03:09:32 +0000 Subject: [PATCH 06/14] chore(deps): mypy 1.18.1 has a regression, pin to 1.17 --- pyproject.toml | 2 +- requirements-dev.lock | 4 +++- requirements.lock | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2b09b3e..c388002 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ managed = true # version pins are in requirements-dev.lock dev-dependencies = [ "pyright==1.1.399", - "mypy", + "mypy==1.17", "respx", "pytest", "pytest-asyncio", diff --git a/requirements-dev.lock b/requirements-dev.lock index d728372..1b3f514 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -72,7 +72,7 @@ mdurl==0.1.2 multidict==6.4.4 # via aiohttp # via yarl -mypy==1.14.1 +mypy==1.17.0 mypy-extensions==1.0.0 # via mypy nodeenv==1.8.0 @@ -81,6 +81,8 @@ nox==2023.4.22 packaging==23.2 # via nox # via pytest +pathspec==0.12.1 + # via mypy platformdirs==3.11.0 # via virtualenv pluggy==1.5.0 diff --git a/requirements.lock b/requirements.lock index 4b916da..b6cc26c 100644 --- a/requirements.lock +++ b/requirements.lock @@ -55,21 +55,21 @@ multidict==6.4.4 propcache==0.3.1 # via aiohttp # via yarl -pydantic==2.11.9 +pydantic==2.12.5 # via codex-sdk -pydantic-core==2.33.2 +pydantic-core==2.41.5 # via pydantic sniffio==1.3.0 # via anyio # via codex-sdk -typing-extensions==4.12.2 +typing-extensions==4.15.0 # via anyio # via codex-sdk # via multidict # via pydantic # via pydantic-core # via typing-inspection -typing-inspection==0.4.1 +typing-inspection==0.4.2 # via pydantic yarl==1.20.0 # via aiohttp From 230659a94b4921805c84224578df1324829e5d07 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 03:49:49 +0000 Subject: [PATCH 07/14] chore: update lockfile --- pyproject.toml | 14 +++--- requirements-dev.lock | 108 +++++++++++++++++++++++------------------- requirements.lock | 31 ++++++------ 3 files changed, 83 insertions(+), 70 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c388002..5837356 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,14 +7,16 @@ license = "MIT" authors = [ { name = "Cleanlab", email = "team@cleanlab.ai" }, ] + dependencies = [ - "httpx>=0.23.0, <1", - "pydantic>=1.9.0, <3", - "typing-extensions>=4.10, <5", - "anyio>=3.5.0, <5", - "distro>=1.7.0, <2", - "sniffio", + "httpx>=0.23.0, <1", + "pydantic>=1.9.0, <3", + "typing-extensions>=4.10, <5", + "anyio>=3.5.0, <5", + "distro>=1.7.0, <2", + "sniffio", ] + requires-python = ">= 3.9" classifiers = [ "Typing :: Typed", diff --git a/requirements-dev.lock b/requirements-dev.lock index 1b3f514..90dc04b 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -12,40 +12,45 @@ -e file:. aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.12.8 +aiohttp==3.13.2 # via codex-sdk # via httpx-aiohttp -aiosignal==1.3.2 +aiosignal==1.4.0 # via aiohttp -annotated-types==0.6.0 +annotated-types==0.7.0 # via pydantic -anyio==4.4.0 +anyio==4.12.0 # via codex-sdk # via httpx -argcomplete==3.1.2 +argcomplete==3.6.3 # via nox async-timeout==5.0.1 # via aiohttp -attrs==25.3.0 +attrs==25.4.0 # via aiohttp -certifi==2023.7.22 + # via nox +backports-asyncio-runner==1.2.0 + # via pytest-asyncio +certifi==2025.11.12 # via httpcore # via httpx -colorlog==6.7.0 +colorlog==6.10.1 + # via nox +dependency-groups==1.3.1 # via nox -dirty-equals==0.6.0 -distlib==0.3.7 +dirty-equals==0.11 +distlib==0.4.0 # via virtualenv -distro==1.8.0 +distro==1.9.0 # via codex-sdk -exceptiongroup==1.2.2 +exceptiongroup==1.3.1 # via anyio # via pytest -execnet==2.1.1 +execnet==2.1.2 # via pytest-xdist -filelock==3.12.4 +filelock==3.19.1 # via virtualenv -frozenlist==1.6.2 +frozenlist==1.8.0 # via aiohttp # via aiosignal h11==0.16.0 @@ -58,82 +63,87 @@ httpx==0.28.1 # via respx httpx-aiohttp==0.1.9 # via codex-sdk -idna==3.4 +humanize==4.13.0 + # via nox +idna==3.11 # via anyio # via httpx # via yarl -importlib-metadata==7.0.0 -iniconfig==2.0.0 +importlib-metadata==8.7.0 +iniconfig==2.1.0 # via pytest markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -multidict==6.4.4 +multidict==6.7.0 # via aiohttp # via yarl mypy==1.17.0 -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 # via mypy -nodeenv==1.8.0 +nodeenv==1.9.1 # via pyright -nox==2023.4.22 -packaging==23.2 +nox==2025.11.12 +packaging==25.0 + # via dependency-groups # via nox # via pytest pathspec==0.12.1 # via mypy -platformdirs==3.11.0 +platformdirs==4.4.0 # via virtualenv -pluggy==1.5.0 +pluggy==1.6.0 # via pytest -propcache==0.3.1 +propcache==0.4.1 # via aiohttp # via yarl -pydantic==2.11.9 +pydantic==2.12.5 # via codex-sdk -pydantic-core==2.33.2 +pydantic-core==2.41.5 # via pydantic -pygments==2.18.0 +pygments==2.19.2 + # via pytest # via rich pyright==1.1.399 -pytest==8.3.3 +pytest==8.4.2 # via pytest-asyncio # via pytest-xdist -pytest-asyncio==0.24.0 -pytest-xdist==3.7.0 -python-dateutil==2.8.2 +pytest-asyncio==1.2.0 +pytest-xdist==3.8.0 +python-dateutil==2.9.0.post0 # via time-machine -pytz==2023.3.post1 - # via dirty-equals respx==0.22.0 -rich==13.7.1 -ruff==0.9.4 -setuptools==68.2.2 - # via nodeenv -six==1.16.0 +rich==14.2.0 +ruff==0.14.7 +six==1.17.0 # via python-dateutil -sniffio==1.3.0 - # via anyio +sniffio==1.3.1 # via codex-sdk -time-machine==2.9.0 -tomli==2.0.2 +time-machine==2.19.0 +tomli==2.3.0 + # via dependency-groups # via mypy + # via nox # via pytest -typing-extensions==4.12.2 +typing-extensions==4.15.0 + # via aiosignal # via anyio # via codex-sdk + # via exceptiongroup # via multidict # via mypy # via pydantic # via pydantic-core # via pyright + # via pytest-asyncio # via typing-inspection -typing-inspection==0.4.1 + # via virtualenv +typing-inspection==0.4.2 # via pydantic -virtualenv==20.24.5 +virtualenv==20.35.4 # via nox -yarl==1.20.0 +yarl==1.22.0 # via aiohttp -zipp==3.17.0 +zipp==3.23.0 # via importlib-metadata diff --git a/requirements.lock b/requirements.lock index b6cc26c..2cedc49 100644 --- a/requirements.lock +++ b/requirements.lock @@ -12,28 +12,28 @@ -e file:. aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.12.8 +aiohttp==3.13.2 # via codex-sdk # via httpx-aiohttp -aiosignal==1.3.2 +aiosignal==1.4.0 # via aiohttp -annotated-types==0.6.0 +annotated-types==0.7.0 # via pydantic -anyio==4.4.0 +anyio==4.12.0 # via codex-sdk # via httpx async-timeout==5.0.1 # via aiohttp -attrs==25.3.0 +attrs==25.4.0 # via aiohttp -certifi==2023.7.22 +certifi==2025.11.12 # via httpcore # via httpx -distro==1.8.0 +distro==1.9.0 # via codex-sdk -exceptiongroup==1.2.2 +exceptiongroup==1.3.1 # via anyio -frozenlist==1.6.2 +frozenlist==1.8.0 # via aiohttp # via aiosignal h11==0.16.0 @@ -45,31 +45,32 @@ httpx==0.28.1 # via httpx-aiohttp httpx-aiohttp==0.1.9 # via codex-sdk -idna==3.4 +idna==3.11 # via anyio # via httpx # via yarl -multidict==6.4.4 +multidict==6.7.0 # via aiohttp # via yarl -propcache==0.3.1 +propcache==0.4.1 # via aiohttp # via yarl pydantic==2.12.5 # via codex-sdk pydantic-core==2.41.5 # via pydantic -sniffio==1.3.0 - # via anyio +sniffio==1.3.1 # via codex-sdk typing-extensions==4.15.0 + # via aiosignal # via anyio # via codex-sdk + # via exceptiongroup # via multidict # via pydantic # via pydantic-core # via typing-inspection typing-inspection==0.4.2 # via pydantic -yarl==1.20.0 +yarl==1.22.0 # via aiohttp From 7742c60ecad518656a184513c5228a3447aa34c9 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 18:18:15 +0000 Subject: [PATCH 08/14] feat(api): api update --- .stats.yml | 2 +- src/codex/resources/projects/query_logs.py | 60 +++++++++++++++++++ src/codex/types/project_retrieve_response.py | 13 +++- .../query_log_list_by_group_params.py | 12 ++++ .../query_log_list_by_group_response.py | 21 +++++++ .../projects/query_log_list_groups_params.py | 12 ++++ .../query_log_list_groups_response.py | 9 +++ .../types/projects/query_log_list_params.py | 12 ++++ .../types/projects/query_log_list_response.py | 9 +++ .../projects/query_log_retrieve_response.py | 9 +++ ...remediation_list_resolved_logs_response.py | 9 +++ .../api_resources/projects/test_query_logs.py | 12 ++++ 12 files changed, 178 insertions(+), 2 deletions(-) diff --git a/.stats.yml b/.stats.yml index 7a6c349..3c7f25d 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 70 -openapi_spec_hash: 11279400677011ad5dc1ebba33216ae4 +openapi_spec_hash: 97ec07f3ab237f61ed0bbc359486cc0e config_hash: aad16f20fed13ac50211fc1d0e2ea621 diff --git a/src/codex/resources/projects/query_logs.py b/src/codex/resources/projects/query_logs.py index fec1138..d88656f 100644 --- a/src/codex/resources/projects/query_logs.py +++ b/src/codex/resources/projects/query_logs.py @@ -112,6 +112,7 @@ def list( guardrailed: Optional[bool] | Omit = omit, has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -122,6 +123,7 @@ def list( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -148,6 +150,9 @@ def list( has_tool_calls: Filter by whether the query log has tool calls + non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -168,6 +173,9 @@ def list( tool_call_names: Filter by names of tools called in the assistant response + triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -198,6 +206,7 @@ def list( "guardrailed": guardrailed, "has_tool_calls": has_tool_calls, "limit": limit, + "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -205,6 +214,7 @@ def list( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, + "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_params.QueryLogListParams, @@ -267,6 +277,7 @@ def list_by_group( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -278,6 +289,7 @@ def list_by_group( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -306,6 +318,9 @@ def list_by_group( needs_review: Filter logs that need review + non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -328,6 +343,9 @@ def list_by_group( tool_call_names: Filter by names of tools called in the assistant response + triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -358,6 +376,7 @@ def list_by_group( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, + "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -366,6 +385,7 @@ def list_by_group( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, + "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_by_group_params.QueryLogListByGroupParams, @@ -388,6 +408,7 @@ def list_groups( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -398,6 +419,7 @@ def list_groups( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -426,6 +448,9 @@ def list_groups( needs_review: Filter log groups that need review + non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -447,6 +472,9 @@ def list_groups( tool_call_names: Filter by names of tools called in the assistant response + triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -478,6 +506,7 @@ def list_groups( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, + "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -485,6 +514,7 @@ def list_groups( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, + "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_groups_params.QueryLogListGroupsParams, @@ -637,6 +667,7 @@ def list( guardrailed: Optional[bool] | Omit = omit, has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -647,6 +678,7 @@ def list( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -673,6 +705,9 @@ def list( has_tool_calls: Filter by whether the query log has tool calls + non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -693,6 +728,9 @@ def list( tool_call_names: Filter by names of tools called in the assistant response + triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -723,6 +761,7 @@ def list( "guardrailed": guardrailed, "has_tool_calls": has_tool_calls, "limit": limit, + "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -730,6 +769,7 @@ def list( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, + "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_params.QueryLogListParams, @@ -794,6 +834,7 @@ async def list_by_group( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -805,6 +846,7 @@ async def list_by_group( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -833,6 +875,9 @@ async def list_by_group( needs_review: Filter logs that need review + non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -855,6 +900,9 @@ async def list_by_group( tool_call_names: Filter by names of tools called in the assistant response + triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -885,6 +933,7 @@ async def list_by_group( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, + "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -893,6 +942,7 @@ async def list_by_group( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, + "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_by_group_params.QueryLogListByGroupParams, @@ -915,6 +965,7 @@ def list_groups( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -925,6 +976,7 @@ def list_groups( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -953,6 +1005,9 @@ def list_groups( needs_review: Filter log groups that need review + non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -974,6 +1029,9 @@ def list_groups( tool_call_names: Filter by names of tools called in the assistant response + triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -1005,6 +1063,7 @@ def list_groups( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, + "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -1012,6 +1071,7 @@ def list_groups( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, + "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_groups_params.QueryLogListGroupsParams, diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index 8fe7741..75ff741 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, Optional +from typing import Dict, List, Optional from datetime import datetime from typing_extensions import Literal @@ -9,6 +9,7 @@ __all__ = [ "ProjectRetrieveResponse", "Config", + "ConfigDeterministicEval", "ConfigEvalConfig", "ConfigEvalConfigCustomEvals", "ConfigEvalConfigCustomEvalsEvals", @@ -27,6 +28,14 @@ ] +class ConfigDeterministicEval(BaseModel): + id: str + + name: str + + should_guardrail: bool + + class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): message: str """ @@ -469,6 +478,8 @@ class ConfigEvalConfig(BaseModel): class Config(BaseModel): + deterministic_evals: List[ConfigDeterministicEval] + ai_guidance_threshold: Optional[float] = None clustering_use_llm_matching: Optional[bool] = None diff --git a/src/codex/types/projects/query_log_list_by_group_params.py b/src/codex/types/projects/query_log_list_by_group_params.py index 17a260b..864e963 100644 --- a/src/codex/types/projects/query_log_list_by_group_params.py +++ b/src/codex/types/projects/query_log_list_by_group_params.py @@ -39,6 +39,12 @@ class QueryLogListByGroupParams(TypedDict, total=False): needs_review: Optional[bool] """Filter logs that need review""" + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] + """ + Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + """ + offset: int order: Literal["asc", "desc"] @@ -76,5 +82,11 @@ class QueryLogListByGroupParams(TypedDict, total=False): tool_call_names: Optional[SequenceNotStr[str]] """Filter by names of tools called in the assistant response""" + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] + """ + Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + """ + was_cache_hit: Optional[bool] """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index 34b722b..a4403f3 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -504,6 +504,12 @@ class QueryLogsByGroupQueryLog(BaseModel): itself. """ + non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + UUIDs of deterministic guardrails that were checked but not triggered for this + query + """ + original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -538,6 +544,9 @@ class QueryLogsByGroupQueryLog(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ + triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """UUIDs of deterministic guardrails that were triggered for this query""" + class QueryLogsByGroup(BaseModel): query_logs: List[QueryLogsByGroupQueryLog] @@ -572,6 +581,12 @@ class Filters(BaseModel): needs_review: Optional[bool] = None """Filter logs that need review""" + non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + """ + passed_evals: Optional[List[str]] = None """Filter by evals that passed""" @@ -589,6 +604,12 @@ class Filters(BaseModel): tool_call_names: Optional[List[str]] = None """Filter by names of tools called in the assistant response""" + triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + """ + was_cache_hit: Optional[bool] = None """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_groups_params.py b/src/codex/types/projects/query_log_list_groups_params.py index ece65b1..118fbc6 100644 --- a/src/codex/types/projects/query_log_list_groups_params.py +++ b/src/codex/types/projects/query_log_list_groups_params.py @@ -39,6 +39,12 @@ class QueryLogListGroupsParams(TypedDict, total=False): needs_review: Optional[bool] """Filter log groups that need review""" + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] + """ + Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + """ + offset: int order: Literal["asc", "desc"] @@ -74,5 +80,11 @@ class QueryLogListGroupsParams(TypedDict, total=False): tool_call_names: Optional[SequenceNotStr[str]] """Filter by names of tools called in the assistant response""" + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] + """ + Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + """ + was_cache_hit: Optional[bool] """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 1587af7..8d3c412 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -499,6 +499,12 @@ class QueryLogListGroupsResponse(BaseModel): itself. """ + non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + UUIDs of deterministic guardrails that were checked but not triggered for this + query + """ + original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -532,3 +538,6 @@ class QueryLogListGroupsResponse(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ + + triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """UUIDs of deterministic guardrails that were triggered for this query""" diff --git a/src/codex/types/projects/query_log_list_params.py b/src/codex/types/projects/query_log_list_params.py index eb7858a..86ed0e0 100644 --- a/src/codex/types/projects/query_log_list_params.py +++ b/src/codex/types/projects/query_log_list_params.py @@ -36,6 +36,12 @@ class QueryLogListParams(TypedDict, total=False): limit: int + non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] + """ + Filter logs where ANY of these deterministic guardrail IDs were checked but not + triggered (OR operation) + """ + offset: int order: Literal["asc", "desc"] @@ -70,5 +76,11 @@ class QueryLogListParams(TypedDict, total=False): tool_call_names: Optional[SequenceNotStr[str]] """Filter by names of tools called in the assistant response""" + triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] + """ + Filter logs where ANY of these deterministic guardrail IDs were triggered (OR + operation) + """ + was_cache_hit: Optional[bool] """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index e71f05b..e94cc72 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -484,6 +484,12 @@ class QueryLogListResponse(BaseModel): itself. """ + non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + UUIDs of deterministic guardrails that were checked but not triggered for this + query + """ + original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -517,3 +523,6 @@ class QueryLogListResponse(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ + + triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """UUIDs of deterministic guardrails that were triggered for this query""" diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 6116840..13baf78 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -498,6 +498,12 @@ class QueryLogRetrieveResponse(BaseModel): itself. """ + non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + UUIDs of deterministic guardrails that were checked but not triggered for this + query + """ + original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -531,3 +537,6 @@ class QueryLogRetrieveResponse(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ + + triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """UUIDs of deterministic guardrails that were triggered for this query""" diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 986f898..2564659 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -491,6 +491,12 @@ class QueryLog(BaseModel): itself. """ + non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """ + UUIDs of deterministic guardrails that were checked but not triggered for this + query + """ + original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -525,6 +531,9 @@ class QueryLog(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ + triggered_deterministic_guardrail_ids: Optional[List[str]] = None + """UUIDs of deterministic guardrails that were triggered for this query""" + class RemediationListResolvedLogsResponse(BaseModel): query_logs: List[QueryLog] diff --git a/tests/api_resources/projects/test_query_logs.py b/tests/api_resources/projects/test_query_logs.py index e98cb27..420cb9c 100644 --- a/tests/api_resources/projects/test_query_logs.py +++ b/tests/api_resources/projects/test_query_logs.py @@ -107,6 +107,7 @@ def test_method_list_with_all_params(self, client: Codex) -> None: guardrailed=True, has_tool_calls=True, limit=1, + non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -114,6 +115,7 @@ def test_method_list_with_all_params(self, client: Codex) -> None: search_text="search_text", sort="created_at", tool_call_names=["string"], + triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) assert_matches_type(SyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"]) @@ -234,6 +236,7 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None: has_tool_calls=True, limit=1, needs_review=True, + non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -242,6 +245,7 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None: search_text="search_text", sort="created_at", tool_call_names=["string"], + triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) @@ -309,6 +313,7 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None: has_tool_calls=True, limit=1, needs_review=True, + non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -316,6 +321,7 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None: search_text="search_text", sort="created_at", tool_call_names=["string"], + triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) @@ -553,6 +559,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No guardrailed=True, has_tool_calls=True, limit=1, + non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -560,6 +567,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No search_text="search_text", sort="created_at", tool_call_names=["string"], + triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) assert_matches_type(AsyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"]) @@ -680,6 +688,7 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod has_tool_calls=True, limit=1, needs_review=True, + non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -688,6 +697,7 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod search_text="search_text", sort="created_at", tool_call_names=["string"], + triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) @@ -755,6 +765,7 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex has_tool_calls=True, limit=1, needs_review=True, + non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -762,6 +773,7 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex search_text="search_text", sort="created_at", tool_call_names=["string"], + triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) From 7f283d7abb4b9b79de86c88745fb66ea9943cdae Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 21:17:33 +0000 Subject: [PATCH 09/14] feat(api): api update --- .stats.yml | 2 +- src/codex/resources/projects/query_logs.py | 60 ------------------- src/codex/types/project_retrieve_response.py | 13 +--- .../query_log_list_by_group_params.py | 12 ---- .../query_log_list_by_group_response.py | 21 ------- .../projects/query_log_list_groups_params.py | 12 ---- .../query_log_list_groups_response.py | 9 --- .../types/projects/query_log_list_params.py | 12 ---- .../types/projects/query_log_list_response.py | 9 --- .../projects/query_log_retrieve_response.py | 9 --- ...remediation_list_resolved_logs_response.py | 9 --- .../api_resources/projects/test_query_logs.py | 12 ---- 12 files changed, 2 insertions(+), 178 deletions(-) diff --git a/.stats.yml b/.stats.yml index 3c7f25d..7a6c349 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,3 +1,3 @@ configured_endpoints: 70 -openapi_spec_hash: 97ec07f3ab237f61ed0bbc359486cc0e +openapi_spec_hash: 11279400677011ad5dc1ebba33216ae4 config_hash: aad16f20fed13ac50211fc1d0e2ea621 diff --git a/src/codex/resources/projects/query_logs.py b/src/codex/resources/projects/query_logs.py index d88656f..fec1138 100644 --- a/src/codex/resources/projects/query_logs.py +++ b/src/codex/resources/projects/query_logs.py @@ -112,7 +112,6 @@ def list( guardrailed: Optional[bool] | Omit = omit, has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -123,7 +122,6 @@ def list( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -150,9 +148,6 @@ def list( has_tool_calls: Filter by whether the query log has tool calls - non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -173,9 +168,6 @@ def list( tool_call_names: Filter by names of tools called in the assistant response - triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -206,7 +198,6 @@ def list( "guardrailed": guardrailed, "has_tool_calls": has_tool_calls, "limit": limit, - "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -214,7 +205,6 @@ def list( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, - "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_params.QueryLogListParams, @@ -277,7 +267,6 @@ def list_by_group( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -289,7 +278,6 @@ def list_by_group( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -318,9 +306,6 @@ def list_by_group( needs_review: Filter logs that need review - non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -343,9 +328,6 @@ def list_by_group( tool_call_names: Filter by names of tools called in the assistant response - triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -376,7 +358,6 @@ def list_by_group( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, - "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -385,7 +366,6 @@ def list_by_group( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, - "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_by_group_params.QueryLogListByGroupParams, @@ -408,7 +388,6 @@ def list_groups( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -419,7 +398,6 @@ def list_groups( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -448,9 +426,6 @@ def list_groups( needs_review: Filter log groups that need review - non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -472,9 +447,6 @@ def list_groups( tool_call_names: Filter by names of tools called in the assistant response - triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -506,7 +478,6 @@ def list_groups( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, - "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -514,7 +485,6 @@ def list_groups( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, - "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_groups_params.QueryLogListGroupsParams, @@ -667,7 +637,6 @@ def list( guardrailed: Optional[bool] | Omit = omit, has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -678,7 +647,6 @@ def list( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -705,9 +673,6 @@ def list( has_tool_calls: Filter by whether the query log has tool calls - non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -728,9 +693,6 @@ def list( tool_call_names: Filter by names of tools called in the assistant response - triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -761,7 +723,6 @@ def list( "guardrailed": guardrailed, "has_tool_calls": has_tool_calls, "limit": limit, - "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -769,7 +730,6 @@ def list( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, - "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_params.QueryLogListParams, @@ -834,7 +794,6 @@ async def list_by_group( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -846,7 +805,6 @@ async def list_by_group( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -875,9 +833,6 @@ async def list_by_group( needs_review: Filter logs that need review - non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -900,9 +855,6 @@ async def list_by_group( tool_call_names: Filter by names of tools called in the assistant response - triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -933,7 +885,6 @@ async def list_by_group( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, - "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -942,7 +893,6 @@ async def list_by_group( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, - "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_by_group_params.QueryLogListByGroupParams, @@ -965,7 +915,6 @@ def list_groups( has_tool_calls: Optional[bool] | Omit = omit, limit: int | Omit = omit, needs_review: Optional[bool] | Omit = omit, - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, offset: int | Omit = omit, order: Literal["asc", "desc"] | Omit = omit, passed_evals: Optional[SequenceNotStr[str]] | Omit = omit, @@ -976,7 +925,6 @@ def list_groups( search_text: Optional[str] | Omit = omit, sort: Optional[str] | Omit = omit, tool_call_names: Optional[SequenceNotStr[str]] | Omit = omit, - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] | Omit = omit, was_cache_hit: Optional[bool] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -1005,9 +953,6 @@ def list_groups( needs_review: Filter log groups that need review - non_triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - passed_evals: Filter by evals that passed primary_eval_issue: Filter logs that have ANY of these primary evaluation issues (OR operation) @@ -1029,9 +974,6 @@ def list_groups( tool_call_names: Filter by names of tools called in the assistant response - triggered_deterministic_guardrail_ids: Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - was_cache_hit: Filter by cache hit status extra_headers: Send extra headers @@ -1063,7 +1005,6 @@ def list_groups( "has_tool_calls": has_tool_calls, "limit": limit, "needs_review": needs_review, - "non_triggered_deterministic_guardrail_ids": non_triggered_deterministic_guardrail_ids, "offset": offset, "order": order, "passed_evals": passed_evals, @@ -1071,7 +1012,6 @@ def list_groups( "search_text": search_text, "sort": sort, "tool_call_names": tool_call_names, - "triggered_deterministic_guardrail_ids": triggered_deterministic_guardrail_ids, "was_cache_hit": was_cache_hit, }, query_log_list_groups_params.QueryLogListGroupsParams, diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index 75ff741..8fe7741 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Optional +from typing import Dict, Optional from datetime import datetime from typing_extensions import Literal @@ -9,7 +9,6 @@ __all__ = [ "ProjectRetrieveResponse", "Config", - "ConfigDeterministicEval", "ConfigEvalConfig", "ConfigEvalConfigCustomEvals", "ConfigEvalConfigCustomEvalsEvals", @@ -28,14 +27,6 @@ ] -class ConfigDeterministicEval(BaseModel): - id: str - - name: str - - should_guardrail: bool - - class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): message: str """ @@ -478,8 +469,6 @@ class ConfigEvalConfig(BaseModel): class Config(BaseModel): - deterministic_evals: List[ConfigDeterministicEval] - ai_guidance_threshold: Optional[float] = None clustering_use_llm_matching: Optional[bool] = None diff --git a/src/codex/types/projects/query_log_list_by_group_params.py b/src/codex/types/projects/query_log_list_by_group_params.py index 864e963..17a260b 100644 --- a/src/codex/types/projects/query_log_list_by_group_params.py +++ b/src/codex/types/projects/query_log_list_by_group_params.py @@ -39,12 +39,6 @@ class QueryLogListByGroupParams(TypedDict, total=False): needs_review: Optional[bool] """Filter logs that need review""" - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] - """ - Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - """ - offset: int order: Literal["asc", "desc"] @@ -82,11 +76,5 @@ class QueryLogListByGroupParams(TypedDict, total=False): tool_call_names: Optional[SequenceNotStr[str]] """Filter by names of tools called in the assistant response""" - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] - """ - Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - """ - was_cache_hit: Optional[bool] """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index a4403f3..34b722b 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -504,12 +504,6 @@ class QueryLogsByGroupQueryLog(BaseModel): itself. """ - non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - UUIDs of deterministic guardrails that were checked but not triggered for this - query - """ - original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -544,9 +538,6 @@ class QueryLogsByGroupQueryLog(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ - triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """UUIDs of deterministic guardrails that were triggered for this query""" - class QueryLogsByGroup(BaseModel): query_logs: List[QueryLogsByGroupQueryLog] @@ -581,12 +572,6 @@ class Filters(BaseModel): needs_review: Optional[bool] = None """Filter logs that need review""" - non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - """ - passed_evals: Optional[List[str]] = None """Filter by evals that passed""" @@ -604,12 +589,6 @@ class Filters(BaseModel): tool_call_names: Optional[List[str]] = None """Filter by names of tools called in the assistant response""" - triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - """ - was_cache_hit: Optional[bool] = None """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_groups_params.py b/src/codex/types/projects/query_log_list_groups_params.py index 118fbc6..ece65b1 100644 --- a/src/codex/types/projects/query_log_list_groups_params.py +++ b/src/codex/types/projects/query_log_list_groups_params.py @@ -39,12 +39,6 @@ class QueryLogListGroupsParams(TypedDict, total=False): needs_review: Optional[bool] """Filter log groups that need review""" - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] - """ - Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - """ - offset: int order: Literal["asc", "desc"] @@ -80,11 +74,5 @@ class QueryLogListGroupsParams(TypedDict, total=False): tool_call_names: Optional[SequenceNotStr[str]] """Filter by names of tools called in the assistant response""" - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] - """ - Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - """ - was_cache_hit: Optional[bool] """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 8d3c412..1587af7 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -499,12 +499,6 @@ class QueryLogListGroupsResponse(BaseModel): itself. """ - non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - UUIDs of deterministic guardrails that were checked but not triggered for this - query - """ - original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -538,6 +532,3 @@ class QueryLogListGroupsResponse(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ - - triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """UUIDs of deterministic guardrails that were triggered for this query""" diff --git a/src/codex/types/projects/query_log_list_params.py b/src/codex/types/projects/query_log_list_params.py index 86ed0e0..eb7858a 100644 --- a/src/codex/types/projects/query_log_list_params.py +++ b/src/codex/types/projects/query_log_list_params.py @@ -36,12 +36,6 @@ class QueryLogListParams(TypedDict, total=False): limit: int - non_triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] - """ - Filter logs where ANY of these deterministic guardrail IDs were checked but not - triggered (OR operation) - """ - offset: int order: Literal["asc", "desc"] @@ -76,11 +70,5 @@ class QueryLogListParams(TypedDict, total=False): tool_call_names: Optional[SequenceNotStr[str]] """Filter by names of tools called in the assistant response""" - triggered_deterministic_guardrail_ids: Optional[SequenceNotStr[str]] - """ - Filter logs where ANY of these deterministic guardrail IDs were triggered (OR - operation) - """ - was_cache_hit: Optional[bool] """Filter by cache hit status""" diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index e94cc72..e71f05b 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -484,12 +484,6 @@ class QueryLogListResponse(BaseModel): itself. """ - non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - UUIDs of deterministic guardrails that were checked but not triggered for this - query - """ - original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -523,6 +517,3 @@ class QueryLogListResponse(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ - - triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """UUIDs of deterministic guardrails that were triggered for this query""" diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 13baf78..6116840 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -498,12 +498,6 @@ class QueryLogRetrieveResponse(BaseModel): itself. """ - non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - UUIDs of deterministic guardrails that were checked but not triggered for this - query - """ - original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -537,6 +531,3 @@ class QueryLogRetrieveResponse(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ - - triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """UUIDs of deterministic guardrails that were triggered for this query""" diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 2564659..986f898 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -491,12 +491,6 @@ class QueryLog(BaseModel): itself. """ - non_triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """ - UUIDs of deterministic guardrails that were checked but not triggered for this - query - """ - original_assistant_response: Optional[str] = None """The original assistant response that would have been displayed to the user. @@ -531,9 +525,6 @@ class QueryLog(BaseModel): If not provided, it is assumed no tools were provided to the LLM. """ - triggered_deterministic_guardrail_ids: Optional[List[str]] = None - """UUIDs of deterministic guardrails that were triggered for this query""" - class RemediationListResolvedLogsResponse(BaseModel): query_logs: List[QueryLog] diff --git a/tests/api_resources/projects/test_query_logs.py b/tests/api_resources/projects/test_query_logs.py index 420cb9c..e98cb27 100644 --- a/tests/api_resources/projects/test_query_logs.py +++ b/tests/api_resources/projects/test_query_logs.py @@ -107,7 +107,6 @@ def test_method_list_with_all_params(self, client: Codex) -> None: guardrailed=True, has_tool_calls=True, limit=1, - non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -115,7 +114,6 @@ def test_method_list_with_all_params(self, client: Codex) -> None: search_text="search_text", sort="created_at", tool_call_names=["string"], - triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) assert_matches_type(SyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"]) @@ -236,7 +234,6 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None: has_tool_calls=True, limit=1, needs_review=True, - non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -245,7 +242,6 @@ def test_method_list_by_group_with_all_params(self, client: Codex) -> None: search_text="search_text", sort="created_at", tool_call_names=["string"], - triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) @@ -313,7 +309,6 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None: has_tool_calls=True, limit=1, needs_review=True, - non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -321,7 +316,6 @@ def test_method_list_groups_with_all_params(self, client: Codex) -> None: search_text="search_text", sort="created_at", tool_call_names=["string"], - triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) @@ -559,7 +553,6 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No guardrailed=True, has_tool_calls=True, limit=1, - non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -567,7 +560,6 @@ async def test_method_list_with_all_params(self, async_client: AsyncCodex) -> No search_text="search_text", sort="created_at", tool_call_names=["string"], - triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) assert_matches_type(AsyncOffsetPageQueryLogs[QueryLogListResponse], query_log, path=["response"]) @@ -688,7 +680,6 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod has_tool_calls=True, limit=1, needs_review=True, - non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -697,7 +688,6 @@ async def test_method_list_by_group_with_all_params(self, async_client: AsyncCod search_text="search_text", sort="created_at", tool_call_names=["string"], - triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) @@ -765,7 +755,6 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex has_tool_calls=True, limit=1, needs_review=True, - non_triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], offset=0, order="asc", passed_evals=["string"], @@ -773,7 +762,6 @@ async def test_method_list_groups_with_all_params(self, async_client: AsyncCodex search_text="search_text", sort="created_at", tool_call_names=["string"], - triggered_deterministic_guardrail_ids=["182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"], was_cache_hit=True, ) From d64e47443ef147240de6cba892e901dcab0b2d71 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 03:31:33 +0000 Subject: [PATCH 10/14] fix(types): allow pyright to infer TypedDict types within SequenceNotStr --- src/codex/_types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/codex/_types.py b/src/codex/_types.py index 2e4695f..edc28a8 100644 --- a/src/codex/_types.py +++ b/src/codex/_types.py @@ -243,6 +243,9 @@ class HttpxSendArgs(TypedDict, total=False): if TYPE_CHECKING: # This works because str.__contains__ does not accept object (either in typeshed or at runtime) # https://github.com/hauntsaninja/useful_types/blob/5e9710f3875107d068e7679fd7fec9cfab0eff3b/useful_types/__init__.py#L285 + # + # Note: index() and count() methods are intentionally omitted to allow pyright to properly + # infer TypedDict types when dict literals are used in lists assigned to SequenceNotStr. class SequenceNotStr(Protocol[_T_co]): @overload def __getitem__(self, index: SupportsIndex, /) -> _T_co: ... @@ -251,8 +254,6 @@ def __getitem__(self, index: slice, /) -> Sequence[_T_co]: ... def __contains__(self, value: object, /) -> bool: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[_T_co]: ... - def index(self, value: Any, start: int = 0, stop: int = ..., /) -> int: ... - def count(self, value: Any, /) -> int: ... def __reversed__(self) -> Iterator[_T_co]: ... else: # just point this to a normal `Sequence` at runtime to avoid having to special case From 250433e37cb8ba034de2977ee6375f06390cc6c4 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 03:33:14 +0000 Subject: [PATCH 11/14] chore: add missing docstrings --- .../organization_list_members_response.py | 2 + src/codex/types/project_create_params.py | 53 ++++++++ src/codex/types/project_detect_params.py | 127 ++++++++++++++++++ src/codex/types/project_detect_response.py | 4 + src/codex/types/project_list_response.py | 55 ++++++++ src/codex/types/project_retrieve_response.py | 53 ++++++++ src/codex/types/project_return_schema.py | 53 ++++++++ src/codex/types/project_update_params.py | 53 ++++++++ src/codex/types/project_validate_params.py | 74 ++++++++++ src/codex/types/project_validate_response.py | 4 + .../types/projects/eval_create_params.py | 2 + .../types/projects/eval_list_response.py | 4 + .../types/projects/eval_update_params.py | 4 + .../query_log_list_by_group_response.py | 12 ++ .../query_log_list_groups_response.py | 10 ++ .../types/projects/query_log_list_response.py | 10 ++ .../projects/query_log_retrieve_response.py | 10 ++ ...remediation_list_resolved_logs_response.py | 10 ++ 18 files changed, 540 insertions(+) diff --git a/src/codex/types/organization_list_members_response.py b/src/codex/types/organization_list_members_response.py index 1fa593e..f37c1f1 100644 --- a/src/codex/types/organization_list_members_response.py +++ b/src/codex/types/organization_list_members_response.py @@ -9,6 +9,8 @@ class OrganizationListMembersResponseItem(BaseModel): + """Schema for public organization member information.""" + email: str name: str diff --git a/src/codex/types/project_create_params.py b/src/codex/types/project_create_params.py index 4704f63..bd84b1c 100644 --- a/src/codex/types/project_create_params.py +++ b/src/codex/types/project_create_params.py @@ -39,6 +39,8 @@ class ProjectCreateParams(TypedDict, total=False): class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -56,6 +58,11 @@ class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(TypedDict, total=False class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): + """A custom evaluation metric created by users. + + The TLMEvalSchema are mutable and stored in the database. + """ + criteria: Required[str] """ The evaluation criteria text that describes what aspect is being evaluated and @@ -120,10 +127,14 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): class ConfigEvalConfigCustomEvals(TypedDict, total=False): + """Configuration for custom evaluation metrics.""" + evals: Dict[str, ConfigEvalConfigCustomEvalsEvals] class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -141,6 +152,12 @@ class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(TypedDic class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -179,6 +196,8 @@ class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -196,6 +215,12 @@ class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(TypedDict, total= class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -234,6 +259,8 @@ class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -251,6 +278,12 @@ class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(TypedD class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -289,6 +322,8 @@ class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -306,6 +341,12 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(TypedDi class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -344,6 +385,8 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -361,6 +404,12 @@ class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(TypedDict, class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -399,6 +448,8 @@ class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): class ConfigEvalConfigDefaultEvals(TypedDict, total=False): + """Configuration for default evaluation metrics.""" + context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency """A pre-configured evaluation metric from TrustworthyRAG or built into the system. @@ -436,6 +487,8 @@ class ConfigEvalConfigDefaultEvals(TypedDict, total=False): class ConfigEvalConfig(TypedDict, total=False): + """Configuration for project-specific evaluation metrics""" + custom_evals: ConfigEvalConfigCustomEvals """Configuration for custom evaluation metrics.""" diff --git a/src/codex/types/project_detect_params.py b/src/codex/types/project_detect_params.py index 8e93971..9cbf9bc 100644 --- a/src/codex/types/project_detect_params.py +++ b/src/codex/types/project_detect_params.py @@ -440,6 +440,8 @@ class ResponseChatCompletionTyped(TypedDict, total=False): class EvalConfigCustomEvalsEvalsGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -457,6 +459,11 @@ class EvalConfigCustomEvalsEvalsGuardrailedFallback(TypedDict, total=False): class EvalConfigCustomEvalsEvals(TypedDict, total=False): + """A custom evaluation metric created by users. + + The TLMEvalSchema are mutable and stored in the database. + """ + criteria: Required[str] """ The evaluation criteria text that describes what aspect is being evaluated and @@ -521,10 +528,14 @@ class EvalConfigCustomEvalsEvals(TypedDict, total=False): class EvalConfigCustomEvals(TypedDict, total=False): + """Configuration for custom evaluation metrics.""" + evals: Dict[str, EvalConfigCustomEvalsEvals] class EvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -542,6 +553,12 @@ class EvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(TypedDict, tot class EvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -580,6 +597,8 @@ class EvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): class EvalConfigDefaultEvalsQueryEaseGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -597,6 +616,12 @@ class EvalConfigDefaultEvalsQueryEaseGuardrailedFallback(TypedDict, total=False) class EvalConfigDefaultEvalsQueryEase(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -635,6 +660,8 @@ class EvalConfigDefaultEvalsQueryEase(TypedDict, total=False): class EvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -652,6 +679,12 @@ class EvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(TypedDict, t class EvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -690,6 +723,8 @@ class EvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): class EvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -707,6 +742,12 @@ class EvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(TypedDict, to class EvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -745,6 +786,8 @@ class EvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): class EvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -762,6 +805,12 @@ class EvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(TypedDict, total= class EvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -800,6 +849,8 @@ class EvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): class EvalConfigDefaultEvals(TypedDict, total=False): + """Configuration for default evaluation metrics.""" + context_sufficiency: EvalConfigDefaultEvalsContextSufficiency """A pre-configured evaluation metric from TrustworthyRAG or built into the system. @@ -837,6 +888,8 @@ class EvalConfigDefaultEvals(TypedDict, total=False): class EvalConfig(TypedDict, total=False): + """All of the evals that should be used for this query""" + custom_evals: EvalConfigCustomEvals """Configuration for custom evaluation metrics.""" @@ -1041,6 +1094,80 @@ class MessageChatCompletionDeveloperMessageParam(TypedDict, total=False): class Options(TypedDict, total=False): + """ + Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the quality preset. + + For all options described below, higher settings will lead to longer runtimes and may consume more tokens internally. + You may not be able to run long prompts (or prompts with long responses) in your account, + unless your token/rate limits are increased. If you hit token limit issues, try lower/less expensive TLMOptions + to be able to run longer prompts/responses, or contact Cleanlab to increase your limits. + + The default values corresponding to each quality preset are: + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, `reasoning_effort` = `"none"`. + + By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. + You can set custom values for these arguments regardless of the quality preset specified. + + Args: + model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield better results, faster models yield faster results). + - Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-haiku". + - Recommended models for accuracy: "gpt-5", "gpt-4.1", "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". + - Recommended models for low latency/costs: "gpt-4.1-nano", "nova-micro". + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring. + If you experience token/rate-limit errors, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens) + when generating alternative possible responses and reflecting on responses during trustworthiness scoring. + Reduce this value to reduce runtimes. Higher values may improve trust scoring. + + num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring. + The maximum number currently supported is 3. Lower values can reduce runtimes. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + This parameter has no effect when `disable_trustworthiness` is True. + + num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring. + Must be between 0 and 20. Lower values can reduce runtimes. + Measuring consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. + This parameter has no effect when `disable_trustworthiness` is True. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. + Supported similarity measures include - "semantic" (based on natural language inference), + "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), + "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies), + and "string" (based on character/word overlap). Set this to "string" for minimal runtimes. + This parameter has no effect when `num_consistency_samples = 0`. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`. + `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + You can auto-improve responses by increasing this parameter, but at higher runtimes/costs. + This parameter must be between 1 and 20. It has no effect on `TLM.score()`. + When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it. + This parameter has no effect when `disable_trustworthiness` is True. + + disable_trustworthiness (bool, default = False): if True, TLM will not compute trust scores, + useful if you only want to compute custom evaluation criteria. + """ + custom_eval_criteria: Iterable[object] disable_persistence: bool diff --git a/src/codex/types/project_detect_response.py b/src/codex/types/project_detect_response.py index df03c86..ff0d6ce 100644 --- a/src/codex/types/project_detect_response.py +++ b/src/codex/types/project_detect_response.py @@ -40,6 +40,10 @@ class EvalScores(BaseModel): class GuardrailedFallback(BaseModel): + """ + Name, fallback message, fallback priority, and fallback type of the triggered guardrail with the highest fallback priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/project_list_response.py b/src/codex/types/project_list_response.py index e4ce558..d528e47 100644 --- a/src/codex/types/project_list_response.py +++ b/src/codex/types/project_list_response.py @@ -30,6 +30,8 @@ class ProjectConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -47,6 +49,11 @@ class ProjectConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel): + """A custom evaluation metric created by users. + + The TLMEvalSchema are mutable and stored in the database. + """ + criteria: str """ The evaluation criteria text that describes what aspect is being evaluated and @@ -117,10 +124,14 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel): class ProjectConfigEvalConfigCustomEvals(BaseModel): + """Configuration for custom evaluation metrics.""" + evals: Optional[Dict[str, ProjectConfigEvalConfigCustomEvalsEvals]] = None class ProjectConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -138,6 +149,12 @@ class ProjectConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(B class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -182,6 +199,8 @@ class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): class ProjectConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -199,6 +218,12 @@ class ProjectConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(BaseModel) class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -243,6 +268,8 @@ class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel): class ProjectConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -260,6 +287,12 @@ class ProjectConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -304,6 +337,8 @@ class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -321,6 +356,12 @@ class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback( class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -365,6 +406,8 @@ class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): class ProjectConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -382,6 +425,12 @@ class ProjectConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(Base class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -426,6 +475,8 @@ class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): class ProjectConfigEvalConfigDefaultEvals(BaseModel): + """Configuration for default evaluation metrics.""" + context_sufficiency: Optional[ProjectConfigEvalConfigDefaultEvalsContextSufficiency] = None """A pre-configured evaluation metric from TrustworthyRAG or built into the system. @@ -463,6 +514,8 @@ class ProjectConfigEvalConfigDefaultEvals(BaseModel): class ProjectConfigEvalConfig(BaseModel): + """Configuration for project-specific evaluation metrics""" + custom_evals: Optional[ProjectConfigEvalConfigCustomEvals] = None """Configuration for custom evaluation metrics.""" @@ -522,6 +575,8 @@ class Project(BaseModel): class Filters(BaseModel): + """Applied filters for the projects list request""" + query: Optional[str] = None diff --git a/src/codex/types/project_retrieve_response.py b/src/codex/types/project_retrieve_response.py index 8fe7741..abc05ad 100644 --- a/src/codex/types/project_retrieve_response.py +++ b/src/codex/types/project_retrieve_response.py @@ -28,6 +28,8 @@ class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -45,6 +47,11 @@ class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): class ConfigEvalConfigCustomEvalsEvals(BaseModel): + """A custom evaluation metric created by users. + + The TLMEvalSchema are mutable and stored in the database. + """ + criteria: str """ The evaluation criteria text that describes what aspect is being evaluated and @@ -115,10 +122,14 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel): class ConfigEvalConfigCustomEvals(BaseModel): + """Configuration for custom evaluation metrics.""" + evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -136,6 +147,12 @@ class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(BaseMode class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -180,6 +197,8 @@ class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -197,6 +216,12 @@ class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(BaseModel): class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -241,6 +266,8 @@ class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel): class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -258,6 +285,12 @@ class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(BaseMo class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -302,6 +335,8 @@ class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -319,6 +354,12 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(BaseMod class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -363,6 +404,8 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -380,6 +423,12 @@ class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(BaseModel): class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -424,6 +473,8 @@ class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): class ConfigEvalConfigDefaultEvals(BaseModel): + """Configuration for default evaluation metrics.""" + context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None """A pre-configured evaluation metric from TrustworthyRAG or built into the system. @@ -461,6 +512,8 @@ class ConfigEvalConfigDefaultEvals(BaseModel): class ConfigEvalConfig(BaseModel): + """Configuration for project-specific evaluation metrics""" + custom_evals: Optional[ConfigEvalConfigCustomEvals] = None """Configuration for custom evaluation metrics.""" diff --git a/src/codex/types/project_return_schema.py b/src/codex/types/project_return_schema.py index 423d0ce..07a3a9b 100644 --- a/src/codex/types/project_return_schema.py +++ b/src/codex/types/project_return_schema.py @@ -28,6 +28,8 @@ class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -45,6 +47,11 @@ class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(BaseModel): class ConfigEvalConfigCustomEvalsEvals(BaseModel): + """A custom evaluation metric created by users. + + The TLMEvalSchema are mutable and stored in the database. + """ + criteria: str """ The evaluation criteria text that describes what aspect is being evaluated and @@ -115,10 +122,14 @@ class ConfigEvalConfigCustomEvalsEvals(BaseModel): class ConfigEvalConfigCustomEvals(BaseModel): + """Configuration for custom evaluation metrics.""" + evals: Optional[Dict[str, ConfigEvalConfigCustomEvalsEvals]] = None class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -136,6 +147,12 @@ class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(BaseMode class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -180,6 +197,8 @@ class ConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel): class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -197,6 +216,12 @@ class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(BaseModel): class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -241,6 +266,8 @@ class ConfigEvalConfigDefaultEvalsQueryEase(BaseModel): class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -258,6 +285,12 @@ class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(BaseMo class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -302,6 +335,8 @@ class ConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel): class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -319,6 +354,12 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(BaseMod class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -363,6 +404,8 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel): class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -380,6 +423,12 @@ class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(BaseModel): class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + display_name: str """Human-friendly name for display. @@ -424,6 +473,8 @@ class ConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel): class ConfigEvalConfigDefaultEvals(BaseModel): + """Configuration for default evaluation metrics.""" + context_sufficiency: Optional[ConfigEvalConfigDefaultEvalsContextSufficiency] = None """A pre-configured evaluation metric from TrustworthyRAG or built into the system. @@ -461,6 +512,8 @@ class ConfigEvalConfigDefaultEvals(BaseModel): class ConfigEvalConfig(BaseModel): + """Configuration for project-specific evaluation metrics""" + custom_evals: Optional[ConfigEvalConfigCustomEvals] = None """Configuration for custom evaluation metrics.""" diff --git a/src/codex/types/project_update_params.py b/src/codex/types/project_update_params.py index 3557c2d..68cb0d3 100644 --- a/src/codex/types/project_update_params.py +++ b/src/codex/types/project_update_params.py @@ -37,6 +37,8 @@ class ProjectUpdateParams(TypedDict, total=False): class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -54,6 +56,11 @@ class ConfigEvalConfigCustomEvalsEvalsGuardrailedFallback(TypedDict, total=False class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): + """A custom evaluation metric created by users. + + The TLMEvalSchema are mutable and stored in the database. + """ + criteria: Required[str] """ The evaluation criteria text that describes what aspect is being evaluated and @@ -118,10 +125,14 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False): class ConfigEvalConfigCustomEvals(TypedDict, total=False): + """Configuration for custom evaluation metrics.""" + evals: Dict[str, ConfigEvalConfigCustomEvalsEvals] class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -139,6 +150,12 @@ class ConfigEvalConfigDefaultEvalsContextSufficiencyGuardrailedFallback(TypedDic class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -177,6 +194,8 @@ class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -194,6 +213,12 @@ class ConfigEvalConfigDefaultEvalsQueryEaseGuardrailedFallback(TypedDict, total= class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -232,6 +257,8 @@ class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -249,6 +276,12 @@ class ConfigEvalConfigDefaultEvalsResponseGroundednessGuardrailedFallback(TypedD class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -287,6 +320,8 @@ class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -304,6 +339,12 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulnessGuardrailedFallback(TypedDi class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -342,6 +383,8 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False): class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -359,6 +402,12 @@ class ConfigEvalConfigDefaultEvalsTrustworthinessGuardrailedFallback(TypedDict, class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): + """A pre-configured evaluation metric from TrustworthyRAG or built into the system. + + The evaluation criteria and identifiers are immutable and system-managed, + while other properties like thresholds and priorities can be configured. + """ + eval_key: Required[str] """ Unique key for eval metric - currently maps to the TrustworthyRAG name property @@ -397,6 +446,8 @@ class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False): class ConfigEvalConfigDefaultEvals(TypedDict, total=False): + """Configuration for default evaluation metrics.""" + context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency """A pre-configured evaluation metric from TrustworthyRAG or built into the system. @@ -434,6 +485,8 @@ class ConfigEvalConfigDefaultEvals(TypedDict, total=False): class ConfigEvalConfig(TypedDict, total=False): + """Configuration for project-specific evaluation metrics""" + custom_evals: ConfigEvalConfigCustomEvals """Configuration for custom evaluation metrics.""" diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py index 0efa430..1ea5392 100644 --- a/src/codex/types/project_validate_params.py +++ b/src/codex/types/project_validate_params.py @@ -644,6 +644,80 @@ class MessageChatCompletionDeveloperMessageParam(TypedDict, total=False): class Options(TypedDict, total=False): + """ + Typed dict of advanced configuration options for the Trustworthy Language Model. + Many of these configurations are determined by the quality preset selected + (learn about quality presets in the TLM [initialization method](./#class-tlm)). + Specifying TLMOptions values directly overrides any default values set from the quality preset. + + For all options described below, higher settings will lead to longer runtimes and may consume more tokens internally. + You may not be able to run long prompts (or prompts with long responses) in your account, + unless your token/rate limits are increased. If you hit token limit issues, try lower/less expensive TLMOptions + to be able to run longer prompts/responses, or contact Cleanlab to increase your limits. + + The default values corresponding to each quality preset are: + - **best:** `num_consistency_samples` = 8, `num_self_reflections` = 3, `reasoning_effort` = `"high"`. + - **high:** `num_consistency_samples` = 4, `num_self_reflections` = 3, `reasoning_effort` = `"high"`. + - **medium:** `num_consistency_samples` = 0, `num_self_reflections` = 3, `reasoning_effort` = `"high"`. + - **low:** `num_consistency_samples` = 0, `num_self_reflections` = 3, `reasoning_effort` = `"none"`. + - **base:** `num_consistency_samples` = 0, `num_self_reflections` = 1, `reasoning_effort` = `"none"`. + + By default, TLM uses the: "medium" `quality_preset`, "gpt-4.1-mini" base `model`, and `max_tokens` is set to 512. + You can set custom values for these arguments regardless of the quality preset specified. + + Args: + model ({"gpt-5", "gpt-5-mini", "gpt-5-nano", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o4-mini", "o3", "gpt-4.5-preview", "gpt-4o-mini", "gpt-4o", "o3-mini", "o1", "o1-mini", "gpt-4", "gpt-3.5-turbo-16k", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-sonnet-v2", "claude-3.5-sonnet", "claude-3.5-haiku", "claude-3-haiku", "nova-micro", "nova-lite", "nova-pro"}, default = "gpt-4.1-mini"): Underlying base LLM to use (better models yield better results, faster models yield faster results). + - Models still in beta: "o3", "o1", "o4-mini", "o3-mini", "o1-mini", "gpt-4.5-preview", "claude-opus-4-0", "claude-sonnet-4-0", "claude-3.7-sonnet", "claude-3.5-haiku". + - Recommended models for accuracy: "gpt-5", "gpt-4.1", "o4-mini", "o3", "claude-opus-4-0", "claude-sonnet-4-0". + - Recommended models for low latency/costs: "gpt-4.1-nano", "nova-micro". + + log (list[str], default = []): optionally specify additional logs or metadata that TLM should return. + For instance, include "explanation" here to get explanations of why a response is scored with low trustworthiness. + + custom_eval_criteria (list[dict[str, Any]], default = []): optionally specify custom evalution criteria beyond the built-in trustworthiness scoring. + The expected input format is a list of dictionaries, where each dictionary has the following keys: + - name: Name of the evaluation criteria. + - criteria: Instructions specifying the evaluation criteria. + + max_tokens (int, default = 512): the maximum number of tokens that can be generated in the response from `TLM.prompt()` as well as during internal trustworthiness scoring. + If you experience token/rate-limit errors, try lowering this number. + For OpenAI models, this parameter must be between 64 and 4096. For Claude models, this parameter must be between 64 and 512. + + reasoning_effort ({"none", "low", "medium", "high"}, default = "high"): how much internal LLM calls are allowed to reason (number of thinking tokens) + when generating alternative possible responses and reflecting on responses during trustworthiness scoring. + Reduce this value to reduce runtimes. Higher values may improve trust scoring. + + num_self_reflections (int, default = 3): the number of different evaluations to perform where the LLM reflects on the response, a factor affecting trust scoring. + The maximum number currently supported is 3. Lower values can reduce runtimes. + Reflection helps quantify aleatoric uncertainty associated with challenging prompts and catches responses that are noticeably incorrect/bad upon further analysis. + This parameter has no effect when `disable_trustworthiness` is True. + + num_consistency_samples (int, default = 8): the amount of internal sampling to measure LLM response consistency, a factor affecting trust scoring. + Must be between 0 and 20. Lower values can reduce runtimes. + Measuring consistency helps quantify the epistemic uncertainty associated with + strange prompts or prompts that are too vague/open-ended to receive a clearly defined 'good' response. + TLM measures consistency via the degree of contradiction between sampled responses that the model considers plausible. + This parameter has no effect when `disable_trustworthiness` is True. + + similarity_measure ({"semantic", "string", "embedding", "embedding_large", "code", "discrepancy"}, default = "discrepancy"): how the + trustworthiness scoring's consistency algorithm measures similarity between alternative responses considered plausible by the model. + Supported similarity measures include - "semantic" (based on natural language inference), + "embedding" (based on vector embedding similarity), "embedding_large" (based on a larger embedding model), + "code" (based on model-based analysis designed to compare code), "discrepancy" (based on model-based analysis of possible discrepancies), + and "string" (based on character/word overlap). Set this to "string" for minimal runtimes. + This parameter has no effect when `num_consistency_samples = 0`. + + num_candidate_responses (int, default = 1): how many alternative candidate responses are internally generated in `TLM.prompt()`. + `TLM.prompt()` scores the trustworthiness of each candidate response, and then returns the most trustworthy one. + You can auto-improve responses by increasing this parameter, but at higher runtimes/costs. + This parameter must be between 1 and 20. It has no effect on `TLM.score()`. + When this parameter is 1, `TLM.prompt()` simply returns a standard LLM response and does not attempt to auto-improve it. + This parameter has no effect when `disable_trustworthiness` is True. + + disable_trustworthiness (bool, default = False): if True, TLM will not compute trust scores, + useful if you only want to compute custom evaluation criteria. + """ + custom_eval_criteria: Iterable[object] disable_persistence: bool diff --git a/src/codex/types/project_validate_response.py b/src/codex/types/project_validate_response.py index b9166c2..895db6f 100644 --- a/src/codex/types/project_validate_response.py +++ b/src/codex/types/project_validate_response.py @@ -74,6 +74,10 @@ class EvalScores(BaseModel): class GuardrailedFallback(BaseModel): + """ + Name, fallback message, fallback priority, and fallback type of the triggered guardrail with the highest fallback priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/projects/eval_create_params.py b/src/codex/types/projects/eval_create_params.py index d4ec41e..d319f92 100644 --- a/src/codex/types/projects/eval_create_params.py +++ b/src/codex/types/projects/eval_create_params.py @@ -73,6 +73,8 @@ class EvalCreateParams(TypedDict, total=False): class GuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/projects/eval_list_response.py b/src/codex/types/projects/eval_list_response.py index 2aa0d75..47bdd3d 100644 --- a/src/codex/types/projects/eval_list_response.py +++ b/src/codex/types/projects/eval_list_response.py @@ -9,6 +9,8 @@ class EvalGuardrailedFallback(BaseModel): + """message, priority, type""" + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -96,6 +98,8 @@ class Eval(BaseModel): class EvalListResponse(BaseModel): + """Schema for paginated evals response.""" + evals: List[Eval] total_count: int diff --git a/src/codex/types/projects/eval_update_params.py b/src/codex/types/projects/eval_update_params.py index 7da4e1e..87dc940 100644 --- a/src/codex/types/projects/eval_update_params.py +++ b/src/codex/types/projects/eval_update_params.py @@ -83,6 +83,8 @@ class CustomEvalCreateOrUpdateSchema(TypedDict, total=False): class CustomEvalCreateOrUpdateSchemaGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be @@ -137,6 +139,8 @@ class DefaultEvalUpdateSchema(TypedDict, total=False): class DefaultEvalUpdateSchemaGuardrailedFallback(TypedDict, total=False): + """message, priority, type""" + message: Required[str] """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/projects/query_log_list_by_group_response.py b/src/codex/types/projects/query_log_list_by_group_response.py index 34b722b..4638a3f 100644 --- a/src/codex/types/projects/query_log_list_by_group_response.py +++ b/src/codex/types/projects/query_log_list_by_group_response.py @@ -84,6 +84,12 @@ class QueryLogsByGroupQueryLogFormattedNonGuardrailEvalScores(BaseModel): class QueryLogsByGroupQueryLogContext(BaseModel): + """Represents a document in RAG contex. + + This schema is designed to be flexible while maintaining structure for RAG systems. + It supports both simple string content and rich document metadata. + """ + content: str """The actual content/text of the document.""" @@ -142,6 +148,10 @@ class QueryLogsByGroupQueryLogEvaluatedResponseToolCall(BaseModel): class QueryLogsByGroupQueryLogGuardrailedFallback(BaseModel): + """ + Name, fallback message, priority, and type for for the triggered guardrail with the highest priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be @@ -546,6 +556,8 @@ class QueryLogsByGroup(BaseModel): class Filters(BaseModel): + """Applied filters for the query""" + custom_metadata_dict: Optional[object] = None created_at_end: Optional[datetime] = None diff --git a/src/codex/types/projects/query_log_list_groups_response.py b/src/codex/types/projects/query_log_list_groups_response.py index 1587af7..c5c2a4d 100644 --- a/src/codex/types/projects/query_log_list_groups_response.py +++ b/src/codex/types/projects/query_log_list_groups_response.py @@ -81,6 +81,12 @@ class FormattedNonGuardrailEvalScores(BaseModel): class Context(BaseModel): + """Represents a document in RAG contex. + + This schema is designed to be flexible while maintaining structure for RAG systems. + It supports both simple string content and rich document metadata. + """ + content: str """The actual content/text of the document.""" @@ -139,6 +145,10 @@ class EvaluatedResponseToolCall(BaseModel): class GuardrailedFallback(BaseModel): + """ + Name, fallback message, priority, and type for for the triggered guardrail with the highest priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/projects/query_log_list_response.py b/src/codex/types/projects/query_log_list_response.py index e71f05b..b558081 100644 --- a/src/codex/types/projects/query_log_list_response.py +++ b/src/codex/types/projects/query_log_list_response.py @@ -81,6 +81,12 @@ class FormattedNonGuardrailEvalScores(BaseModel): class Context(BaseModel): + """Represents a document in RAG contex. + + This schema is designed to be flexible while maintaining structure for RAG systems. + It supports both simple string content and rich document metadata. + """ + content: str """The actual content/text of the document.""" @@ -139,6 +145,10 @@ class EvaluatedResponseToolCall(BaseModel): class GuardrailedFallback(BaseModel): + """ + Name, fallback message, priority, and type for for the triggered guardrail with the highest priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/projects/query_log_retrieve_response.py b/src/codex/types/projects/query_log_retrieve_response.py index 6116840..5df2108 100644 --- a/src/codex/types/projects/query_log_retrieve_response.py +++ b/src/codex/types/projects/query_log_retrieve_response.py @@ -81,6 +81,12 @@ class FormattedNonGuardrailEvalScores(BaseModel): class Context(BaseModel): + """Represents a document in RAG contex. + + This schema is designed to be flexible while maintaining structure for RAG systems. + It supports both simple string content and rich document metadata. + """ + content: str """The actual content/text of the document.""" @@ -139,6 +145,10 @@ class EvaluatedResponseToolCall(BaseModel): class GuardrailedFallback(BaseModel): + """ + Name, fallback message, priority, and type for for the triggered guardrail with the highest priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be diff --git a/src/codex/types/projects/remediation_list_resolved_logs_response.py b/src/codex/types/projects/remediation_list_resolved_logs_response.py index 986f898..d96d129 100644 --- a/src/codex/types/projects/remediation_list_resolved_logs_response.py +++ b/src/codex/types/projects/remediation_list_resolved_logs_response.py @@ -82,6 +82,12 @@ class QueryLogFormattedNonGuardrailEvalScores(BaseModel): class QueryLogContext(BaseModel): + """Represents a document in RAG contex. + + This schema is designed to be flexible while maintaining structure for RAG systems. + It supports both simple string content and rich document metadata. + """ + content: str """The actual content/text of the document.""" @@ -140,6 +146,10 @@ class QueryLogEvaluatedResponseToolCall(BaseModel): class QueryLogGuardrailedFallback(BaseModel): + """ + Name, fallback message, priority, and type for for the triggered guardrail with the highest priority + """ + message: str """ Fallback message to use if this eval fails and causes the response to be From c8986ce9fa0eae5726ba6cb6692dfa11c60284f5 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 16 Dec 2025 03:24:10 +0000 Subject: [PATCH 12/14] chore(internal): add missing files argument to base client --- src/codex/_base_client.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/codex/_base_client.py b/src/codex/_base_client.py index e6febf3..1ce4a39 100644 --- a/src/codex/_base_client.py +++ b/src/codex/_base_client.py @@ -1247,9 +1247,12 @@ def patch( *, cast_to: Type[ResponseT], body: Body | None = None, + files: RequestFiles | None = None, options: RequestOptions = {}, ) -> ResponseT: - opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options) + opts = FinalRequestOptions.construct( + method="patch", url=path, json_data=body, files=to_httpx_files(files), **options + ) return self.request(cast_to, opts) def put( @@ -1767,9 +1770,12 @@ async def patch( *, cast_to: Type[ResponseT], body: Body | None = None, + files: RequestFiles | None = None, options: RequestOptions = {}, ) -> ResponseT: - opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options) + opts = FinalRequestOptions.construct( + method="patch", url=path, json_data=body, files=to_httpx_files(files), **options + ) return await self.request(cast_to, opts) async def put( From 9f17615353be5ee705ea2f4713d9dc790b2ecb3b Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 03:47:20 +0000 Subject: [PATCH 13/14] chore: speedup initial import --- src/codex/_client.py | 226 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 179 insertions(+), 47 deletions(-) diff --git a/src/codex/_client.py b/src/codex/_client.py index 308ce9a..1bfcb7e 100644 --- a/src/codex/_client.py +++ b/src/codex/_client.py @@ -3,7 +3,7 @@ from __future__ import annotations import os -from typing import Any, Dict, Mapping, cast +from typing import TYPE_CHECKING, Any, Dict, Mapping, cast from typing_extensions import Self, Literal, override import httpx @@ -21,8 +21,8 @@ not_given, ) from ._utils import is_given, get_async_library +from ._compat import cached_property from ._version import __version__ -from .resources import health from ._streaming import Stream as Stream, AsyncStream as AsyncStream from ._exceptions import APIStatusError from ._base_client import ( @@ -30,9 +30,13 @@ SyncAPIClient, AsyncAPIClient, ) -from .resources.users import users -from .resources.projects import projects -from .resources.organizations import organizations + +if TYPE_CHECKING: + from .resources import users, health, projects, organizations + from .resources.health import HealthResource, AsyncHealthResource + from .resources.users.users import UsersResource, AsyncUsersResource + from .resources.projects.projects import ProjectsResource, AsyncProjectsResource + from .resources.organizations.organizations import OrganizationsResource, AsyncOrganizationsResource __all__ = [ "ENVIRONMENTS", @@ -54,13 +58,6 @@ class Codex(SyncAPIClient): - health: health.HealthResource - organizations: organizations.OrganizationsResource - users: users.UsersResource - projects: projects.ProjectsResource - with_raw_response: CodexWithRawResponse - with_streaming_response: CodexWithStreamedResponse - # client options auth_token: str | None api_key: str | None @@ -138,12 +135,37 @@ def __init__( _strict_response_validation=_strict_response_validation, ) - self.health = health.HealthResource(self) - self.organizations = organizations.OrganizationsResource(self) - self.users = users.UsersResource(self) - self.projects = projects.ProjectsResource(self) - self.with_raw_response = CodexWithRawResponse(self) - self.with_streaming_response = CodexWithStreamedResponse(self) + @cached_property + def health(self) -> HealthResource: + from .resources.health import HealthResource + + return HealthResource(self) + + @cached_property + def organizations(self) -> OrganizationsResource: + from .resources.organizations import OrganizationsResource + + return OrganizationsResource(self) + + @cached_property + def users(self) -> UsersResource: + from .resources.users import UsersResource + + return UsersResource(self) + + @cached_property + def projects(self) -> ProjectsResource: + from .resources.projects import ProjectsResource + + return ProjectsResource(self) + + @cached_property + def with_raw_response(self) -> CodexWithRawResponse: + return CodexWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CodexWithStreamedResponse: + return CodexWithStreamedResponse(self) @property @override @@ -298,13 +320,6 @@ def _make_status_error( class AsyncCodex(AsyncAPIClient): - health: health.AsyncHealthResource - organizations: organizations.AsyncOrganizationsResource - users: users.AsyncUsersResource - projects: projects.AsyncProjectsResource - with_raw_response: AsyncCodexWithRawResponse - with_streaming_response: AsyncCodexWithStreamedResponse - # client options auth_token: str | None api_key: str | None @@ -382,12 +397,37 @@ def __init__( _strict_response_validation=_strict_response_validation, ) - self.health = health.AsyncHealthResource(self) - self.organizations = organizations.AsyncOrganizationsResource(self) - self.users = users.AsyncUsersResource(self) - self.projects = projects.AsyncProjectsResource(self) - self.with_raw_response = AsyncCodexWithRawResponse(self) - self.with_streaming_response = AsyncCodexWithStreamedResponse(self) + @cached_property + def health(self) -> AsyncHealthResource: + from .resources.health import AsyncHealthResource + + return AsyncHealthResource(self) + + @cached_property + def organizations(self) -> AsyncOrganizationsResource: + from .resources.organizations import AsyncOrganizationsResource + + return AsyncOrganizationsResource(self) + + @cached_property + def users(self) -> AsyncUsersResource: + from .resources.users import AsyncUsersResource + + return AsyncUsersResource(self) + + @cached_property + def projects(self) -> AsyncProjectsResource: + from .resources.projects import AsyncProjectsResource + + return AsyncProjectsResource(self) + + @cached_property + def with_raw_response(self) -> AsyncCodexWithRawResponse: + return AsyncCodexWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCodexWithStreamedResponse: + return AsyncCodexWithStreamedResponse(self) @property @override @@ -542,35 +582,127 @@ def _make_status_error( class CodexWithRawResponse: + _client: Codex + def __init__(self, client: Codex) -> None: - self.health = health.HealthResourceWithRawResponse(client.health) - self.organizations = organizations.OrganizationsResourceWithRawResponse(client.organizations) - self.users = users.UsersResourceWithRawResponse(client.users) - self.projects = projects.ProjectsResourceWithRawResponse(client.projects) + self._client = client + + @cached_property + def health(self) -> health.HealthResourceWithRawResponse: + from .resources.health import HealthResourceWithRawResponse + + return HealthResourceWithRawResponse(self._client.health) + + @cached_property + def organizations(self) -> organizations.OrganizationsResourceWithRawResponse: + from .resources.organizations import OrganizationsResourceWithRawResponse + + return OrganizationsResourceWithRawResponse(self._client.organizations) + + @cached_property + def users(self) -> users.UsersResourceWithRawResponse: + from .resources.users import UsersResourceWithRawResponse + + return UsersResourceWithRawResponse(self._client.users) + + @cached_property + def projects(self) -> projects.ProjectsResourceWithRawResponse: + from .resources.projects import ProjectsResourceWithRawResponse + + return ProjectsResourceWithRawResponse(self._client.projects) class AsyncCodexWithRawResponse: + _client: AsyncCodex + def __init__(self, client: AsyncCodex) -> None: - self.health = health.AsyncHealthResourceWithRawResponse(client.health) - self.organizations = organizations.AsyncOrganizationsResourceWithRawResponse(client.organizations) - self.users = users.AsyncUsersResourceWithRawResponse(client.users) - self.projects = projects.AsyncProjectsResourceWithRawResponse(client.projects) + self._client = client + + @cached_property + def health(self) -> health.AsyncHealthResourceWithRawResponse: + from .resources.health import AsyncHealthResourceWithRawResponse + + return AsyncHealthResourceWithRawResponse(self._client.health) + + @cached_property + def organizations(self) -> organizations.AsyncOrganizationsResourceWithRawResponse: + from .resources.organizations import AsyncOrganizationsResourceWithRawResponse + + return AsyncOrganizationsResourceWithRawResponse(self._client.organizations) + + @cached_property + def users(self) -> users.AsyncUsersResourceWithRawResponse: + from .resources.users import AsyncUsersResourceWithRawResponse + + return AsyncUsersResourceWithRawResponse(self._client.users) + + @cached_property + def projects(self) -> projects.AsyncProjectsResourceWithRawResponse: + from .resources.projects import AsyncProjectsResourceWithRawResponse + + return AsyncProjectsResourceWithRawResponse(self._client.projects) class CodexWithStreamedResponse: + _client: Codex + def __init__(self, client: Codex) -> None: - self.health = health.HealthResourceWithStreamingResponse(client.health) - self.organizations = organizations.OrganizationsResourceWithStreamingResponse(client.organizations) - self.users = users.UsersResourceWithStreamingResponse(client.users) - self.projects = projects.ProjectsResourceWithStreamingResponse(client.projects) + self._client = client + + @cached_property + def health(self) -> health.HealthResourceWithStreamingResponse: + from .resources.health import HealthResourceWithStreamingResponse + + return HealthResourceWithStreamingResponse(self._client.health) + + @cached_property + def organizations(self) -> organizations.OrganizationsResourceWithStreamingResponse: + from .resources.organizations import OrganizationsResourceWithStreamingResponse + + return OrganizationsResourceWithStreamingResponse(self._client.organizations) + + @cached_property + def users(self) -> users.UsersResourceWithStreamingResponse: + from .resources.users import UsersResourceWithStreamingResponse + + return UsersResourceWithStreamingResponse(self._client.users) + + @cached_property + def projects(self) -> projects.ProjectsResourceWithStreamingResponse: + from .resources.projects import ProjectsResourceWithStreamingResponse + + return ProjectsResourceWithStreamingResponse(self._client.projects) class AsyncCodexWithStreamedResponse: + _client: AsyncCodex + def __init__(self, client: AsyncCodex) -> None: - self.health = health.AsyncHealthResourceWithStreamingResponse(client.health) - self.organizations = organizations.AsyncOrganizationsResourceWithStreamingResponse(client.organizations) - self.users = users.AsyncUsersResourceWithStreamingResponse(client.users) - self.projects = projects.AsyncProjectsResourceWithStreamingResponse(client.projects) + self._client = client + + @cached_property + def health(self) -> health.AsyncHealthResourceWithStreamingResponse: + from .resources.health import AsyncHealthResourceWithStreamingResponse + + return AsyncHealthResourceWithStreamingResponse(self._client.health) + + @cached_property + def organizations(self) -> organizations.AsyncOrganizationsResourceWithStreamingResponse: + from .resources.organizations import AsyncOrganizationsResourceWithStreamingResponse + + return AsyncOrganizationsResourceWithStreamingResponse(self._client.organizations) + + @cached_property + def users(self) -> users.AsyncUsersResourceWithStreamingResponse: + from .resources.users import AsyncUsersResourceWithStreamingResponse + + return AsyncUsersResourceWithStreamingResponse(self._client.users) + + @cached_property + def projects(self) -> projects.AsyncProjectsResourceWithStreamingResponse: + from .resources.projects import AsyncProjectsResourceWithStreamingResponse + + return AsyncProjectsResourceWithStreamingResponse(self._client.projects) Client = Codex From f6d2803b6b897aef905c8ecd3f5e032be5e8a892 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 03:47:38 +0000 Subject: [PATCH 14/14] release: 0.1.0-alpha.35 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 27 +++++++++++++++++++++++++++ pyproject.toml | 2 +- src/codex/_version.py | 2 +- 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 36b2aff..f996350 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.34" + ".": "0.1.0-alpha.35" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 7782cb1..95cf32e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## 0.1.0-alpha.35 (2025-12-17) + +Full Changelog: [v0.1.0-alpha.34...v0.1.0-alpha.35](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.34...v0.1.0-alpha.35) + +### Features + +* **api:** api update ([7f283d7](https://github.com/cleanlab/codex-python/commit/7f283d7abb4b9b79de86c88745fb66ea9943cdae)) +* **api:** api update ([7742c60](https://github.com/cleanlab/codex-python/commit/7742c60ecad518656a184513c5228a3447aa34c9)) +* **api:** api update ([94bacaf](https://github.com/cleanlab/codex-python/commit/94bacaf492809bc9bc15175d272de53ad2569895)) +* **api:** api update ([884de94](https://github.com/cleanlab/codex-python/commit/884de944e616b26580830817486bb85e74f1e7c4)) + + +### Bug Fixes + +* ensure streams are always closed ([2c971c4](https://github.com/cleanlab/codex-python/commit/2c971c4a93b0e407737648e83e555dc6c9b3a759)) +* **types:** allow pyright to infer TypedDict types within SequenceNotStr ([d64e474](https://github.com/cleanlab/codex-python/commit/d64e47443ef147240de6cba892e901dcab0b2d71)) + + +### Chores + +* add missing docstrings ([250433e](https://github.com/cleanlab/codex-python/commit/250433e37cb8ba034de2977ee6375f06390cc6c4)) +* add Python 3.14 classifier and testing ([4dec29c](https://github.com/cleanlab/codex-python/commit/4dec29cdf74dd3beeccf326678db7170156f0c44)) +* **deps:** mypy 1.18.1 has a regression, pin to 1.17 ([1828526](https://github.com/cleanlab/codex-python/commit/18285268b4eec848b2be2df65cdbdf960424f72d)) +* **internal:** add missing files argument to base client ([c8986ce](https://github.com/cleanlab/codex-python/commit/c8986ce9fa0eae5726ba6cb6692dfa11c60284f5)) +* speedup initial import ([9f17615](https://github.com/cleanlab/codex-python/commit/9f17615353be5ee705ea2f4713d9dc790b2ecb3b)) +* update lockfile ([230659a](https://github.com/cleanlab/codex-python/commit/230659a94b4921805c84224578df1324829e5d07)) + ## 0.1.0-alpha.34 (2025-11-19) Full Changelog: [v0.1.0-alpha.33...v0.1.0-alpha.34](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.33...v0.1.0-alpha.34) diff --git a/pyproject.toml b/pyproject.toml index 5837356..d65296d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codex-sdk" -version = "0.1.0-alpha.34" +version = "0.1.0-alpha.35" description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead." dynamic = ["readme"] license = "MIT" diff --git a/src/codex/_version.py b/src/codex/_version.py index c2ea81e..0bddca9 100644 --- a/src/codex/_version.py +++ b/src/codex/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "codex" -__version__ = "0.1.0-alpha.34" # x-release-please-version +__version__ = "0.1.0-alpha.35" # x-release-please-version