From d57ce3629b0e8332636ed2094b617f6d62fde437 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Fri, 27 Jun 2025 17:57:25 +0200 Subject: [PATCH 01/82] risk analyzer scripts and changes required --- examples/risk_analyzer.py | 36 ++++++++------- .../labeler/risk_labeler.py | 4 -- src/bigdata_research_tools/themes.py | 24 ---------- .../workflows/risk_analyzer.py | 45 ++++++++++--------- src/bigdata_research_tools/workflows/utils.py | 10 ++++- 5 files changed, 53 insertions(+), 66 deletions(-) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index cf33fca..1d046e0 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -3,17 +3,16 @@ from bigdata_client.models.search import DocumentType from bigdata_research_tools.client import bigdata_connection -from bigdata_research_tools.visuals import create_risk_exposure_dashboard from bigdata_research_tools.workflows.risk_analyzer import RiskAnalyzer - +from bigdata_research_tools.visuals import create_thematic_exposure_dashboard def risk_analyzer_example( - risk_scenario: str, + risk_scenario: str, llm_model: str = "openai::gpt-4o-mini", - keywords: list = ["Tariffs"], - control_entities: dict = {"place": ["Canada", "Mexico"]}, - focus: str = "", - export_path: str = "risk_analyzer_results.xlsx", + keywords: list = ['Tariffs'], + control_entities: dict = {'place':['Canada', 'Mexico']}, + focus: str = '', + export_path: str = "risk_analyzer", ) -> Dict: GRID_watchlist_ID = "44118802-9104-4265-b97a-2e6d88d74893" @@ -28,8 +27,8 @@ def risk_analyzer_example( llm_model=llm_model, main_theme=risk_scenario, companies=companies, - start_date="2025-01-01", - end_date="2025-01-31", + start_date='2025-01-01', + end_date='2025-01-31', keywords=keywords, document_type=DocumentType.NEWS, control_entities=control_entities, @@ -52,13 +51,16 @@ def risk_analyzer_example( logging.basicConfig() logging.getLogger("bigdata_research_tools").setLevel(logging.INFO) - x = risk_analyzer_example( - "US Import Tariffs against Canada and Mexico", - focus="Provide a detailed taxonomy of risks describing how new American import tariffs against Canada and Mexico will impact US companies, their operations and strategy. Cover trade-relations risks, foreign market access risks, supply chain risks, US market sales and revenue risks (including price impacts), and intellectual property risks, provide at least 4 sub-scenarios for each risk factor.", - ) - + x = risk_analyzer_example("US Import Tariffs against Canada and Mexico", focus="Provide a detailed taxonomy of risks describing how new American import tariffs against Canada and Mexico will impact US companies, their operations and strategy. Cover trade-relations risks, foreign market access risks, supply chain risks, US market sales and revenue risks (including price impacts), and intellectual property risks, provide at least 4 sub-scenarios for each risk factor.") + # custom_config = { + # 'company_column': 'Company', + # 'heatmap_colorscale': 'Plasma', + # 'dashboard_height': 1800, + # 'top_themes_count': 5, + # 'main_title': 'Custom Thematic Analysis Dashboard' + # } df = x["df_company"] - fig, industry_fig = create_risk_exposure_dashboard(df, n_companies=15) - fig.show(renderer="browser") # Shows the main dashboard - industry_fig.show(renderer="browser") # Shows the industry analysis + # fig, industry_fig = create_thematic_exposure_dashboard(df, n_companies=15, config=custom_config) + # fig.show(renderer="browser") # Shows the main dashboard + # industry_fig.show(renderer="browser") # Shows the industry analysis print(df.head(10)) # Display the first 10 rows of the DataFrame diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index facc22e..5a4ab67 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -229,7 +229,3 @@ def replace_company_placeholders(row: Series, col_name: str = 'motivation') -> s text = [t.replace(f"{get_other_entity_placeholder()}_{entity_id}", entity_name) for t in text] return text - -# Function to map risk_factor to risk_category -def map_risk_category(risk_factor, mapping): - return mapping.get(risk_factor, 'Not Applicable') diff --git a/src/bigdata_research_tools/themes.py b/src/bigdata_research_tools/themes.py index 279bde8..49f2f20 100644 --- a/src/bigdata_research_tools/themes.py +++ b/src/bigdata_research_tools/themes.py @@ -320,30 +320,6 @@ def traverse(node, parent_label=None): traverse(self) return mapping - - def _to_dict(self) -> dict: - """ - Recursively convert the ThemeTree to a dictionary suitable for JSON serialization. - """ - return { - "label": self.label, - "node": self.node, - "summary": self.summary, - "children": [child._to_dict() for child in self.children] if self.children else [], - "keywords": self.keywords, - } - - def save_json(self, filepath: str, **kwargs) -> None: - """ - Save the ThemeTree as a JSON dictionary to the specified file. - - Args: - filepath (str): Path to the output JSON file. - **kwargs: Additional keyword arguments passed to json.dump. - """ - with open(filepath, "w", encoding="utf-8") as f: - json.dump(self._to_dict(), f, ensure_ascii=False, indent=2, **kwargs) - def generate_theme_tree( main_theme: str, diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 1ab2308..f3e123b 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -11,10 +11,11 @@ from bigdata_research_tools.excel import check_excel_dependencies from bigdata_research_tools.themes import ThemeTree -from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category +from bigdata_research_tools.labeler.risk_labeler import RiskLabeler from bigdata_research_tools.workflows.utils import ( get_scored_df, save_to_excel, + map_risk_category ) from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.themes import generate_risk_tree @@ -161,7 +162,6 @@ def label_search_results(self, df_sentences, terminal_labels, risk_tree: ThemeTr """ prompt_fields = self._add_prompt_fields(df_sentences, additional_prompt_fields) - # Label the search results with our theme labels ## To Do: generalize the labeler or pass it as an argument # to allow for different labelers to be used. @@ -204,7 +204,7 @@ def generate_results(self, df_labeled: DataFrame, word_range: Tuple[int, int] = return df_company, df_industry df_company = get_scored_df( - df_labeled, index_columns=["Company", "Ticker", "Sector", "Industry"], pivot_column="Sub-Scenario" + df_labeled, index_columns=["Company", "Ticker", "Industry"], pivot_column="Sub-Scenario" ) df_industry = get_scored_df( df_labeled, index_columns=["Industry"], pivot_column="Sub-Scenario" @@ -218,30 +218,34 @@ def generate_results(self, df_labeled: DataFrame, word_range: Tuple[int, int] = return df_company, df_industry, motivation_df - def save_results(self, df_labeled: DataFrame, df_company: DataFrame, df_industry: DataFrame, motivation_df: DataFrame, risk_tree: ThemeTree, export_path: str): + def save_results(self, df: DataFrame, df_labeled: DataFrame, df_company: DataFrame, df_industry: DataFrame, df_motivation: DataFrame, export_path: str): + + ## TO DO: Change to Save to Excel """ Save the results to Excel files if export_path is provided. Args: + df (DataFrame): The DataFrame with the search results. df_labeled (DataFrame): The DataFrame with the labeled search results. df_company (DataFrame): The DataFrame with the output by company. df_industry (DataFrame): The DataFrame with the output by industry. export_path (str): The path to export the results to. """ - if export_path: - save_to_excel( - file_path=export_path, - tables={ - "Semantic Labels": (df_labeled, (0, 0)), - "By Company": (df_company, (2, 5)), - "By Industry": (df_industry, (2, 2)), - "Motivations": (motivation_df, (0, 0)) - }, + if export_path and not check_excel_dependencies(): + logger.error( + "`excel` optional dependencies are not installed. " + "You can run `pip install bigdata_research_tools[excel]` to install them. " + "Consider installing them to save the Thematic Screener result into the " + f"path `{export_path}`." ) - ## Save risk tree to json - risk_tree.save_json(export_path.replace('.xlsx', '_mindmap.json')) - else: - logger.warning("No export path provided. Results will not be saved.") + df.to_csv(export_path+'_df.csv', index=False) + df_labeled.to_csv(export_path+'_df_labeled.csv', index=False) + df_company.to_csv(export_path+'_df_company.csv', index=False) + df_industry.to_csv(export_path+'_df_industry.csv', index=False) + df_motivation.to_csv(export_path+'_df_motivation.csv', index=False) + + #print(f"Full results have been exported to: {export_path}") + def screen_companies( self, @@ -312,14 +316,15 @@ def screen_companies( 'headline'] ) - df_company, df_industry, df_motivation = self.generate_results(df_labeled, word_range) + df_company, df_industry, df_motivation = self.generate_results(df_labeled) # Export to Excel if path provided if export_path: - self.save_results(df_labeled, df_company, df_industry, df_motivation, risk_tree, export_path = export_path) + self.save_results(df, df_labeled, df_company, df_industry, df_motivation, export_path = export_path) - except Exception as e: + except Exception: execution_result = "error" + raise else: execution_result = "success" finally: diff --git a/src/bigdata_research_tools/workflows/utils.py b/src/bigdata_research_tools/workflows/utils.py index a676254..578aabb 100644 --- a/src/bigdata_research_tools/workflows/utils.py +++ b/src/bigdata_research_tools/workflows/utils.py @@ -90,4 +90,12 @@ def save_to_excel( (df, sheet_name, position) for sheet_name, (df, position) in tables.items() ] - excel_manager.save_workbook(excel_args, file_path) \ No newline at end of file +<<<<<<< HEAD + excel_manager.save_workbook(excel_args, file_path) +======= + excel_manager.save_workbook(excel_args, file_path) + +# Function to map risk_factor to risk_category +def map_risk_category(risk_factor, mapping): + return mapping.get(risk_factor, 'Not Applicable') +>>>>>>> 2358ddc (risk analyzer scripts and changes required) From 7d402dbdf3b138dee2a3e029eea419af0e6d8886 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Mon, 30 Jun 2025 15:08:16 +0200 Subject: [PATCH 02/82] addressing comments and using excel manager to save --- examples/risk_analyzer.py | 2 +- .../labeler/risk_labeler.py | 4 ++ .../workflows/risk_analyzer.py | 40 ++++++++----------- src/bigdata_research_tools/workflows/utils.py | 4 -- 4 files changed, 21 insertions(+), 29 deletions(-) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index 1d046e0..c64c5e5 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -12,7 +12,7 @@ def risk_analyzer_example( keywords: list = ['Tariffs'], control_entities: dict = {'place':['Canada', 'Mexico']}, focus: str = '', - export_path: str = "risk_analyzer", + export_path: str = "risk_analyzer_results.xlsx", ) -> Dict: GRID_watchlist_ID = "44118802-9104-4265-b97a-2e6d88d74893" diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 5a4ab67..facc22e 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -229,3 +229,7 @@ def replace_company_placeholders(row: Series, col_name: str = 'motivation') -> s text = [t.replace(f"{get_other_entity_placeholder()}_{entity_id}", entity_name) for t in text] return text + +# Function to map risk_factor to risk_category +def map_risk_category(risk_factor, mapping): + return mapping.get(risk_factor, 'Not Applicable') diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index f3e123b..4680ecd 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -11,11 +11,10 @@ from bigdata_research_tools.excel import check_excel_dependencies from bigdata_research_tools.themes import ThemeTree -from bigdata_research_tools.labeler.risk_labeler import RiskLabeler +from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category from bigdata_research_tools.workflows.utils import ( get_scored_df, save_to_excel, - map_risk_category ) from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.themes import generate_risk_tree @@ -218,34 +217,28 @@ def generate_results(self, df_labeled: DataFrame, word_range: Tuple[int, int] = return df_company, df_industry, motivation_df - def save_results(self, df: DataFrame, df_labeled: DataFrame, df_company: DataFrame, df_industry: DataFrame, df_motivation: DataFrame, export_path: str): - - ## TO DO: Change to Save to Excel + def save_results(self, df_labeled: DataFrame, df_company: DataFrame, df_industry: DataFrame, motivation_df: DataFrame, export_path: str): """ Save the results to Excel files if export_path is provided. Args: - df (DataFrame): The DataFrame with the search results. df_labeled (DataFrame): The DataFrame with the labeled search results. df_company (DataFrame): The DataFrame with the output by company. df_industry (DataFrame): The DataFrame with the output by industry. export_path (str): The path to export the results to. """ - if export_path and not check_excel_dependencies(): - logger.error( - "`excel` optional dependencies are not installed. " - "You can run `pip install bigdata_research_tools[excel]` to install them. " - "Consider installing them to save the Thematic Screener result into the " - f"path `{export_path}`." + if export_path: + save_to_excel( + file_path=export_path, + tables={ + "Semantic Labels": (df_labeled, (0, 0)), + "By Company": (df_company, (2, 4)), + "By Industry": (df_industry, (2, 2)), + "Motivations": (motivation_df, (0, 0)) + }, ) - df.to_csv(export_path+'_df.csv', index=False) - df_labeled.to_csv(export_path+'_df_labeled.csv', index=False) - df_company.to_csv(export_path+'_df_company.csv', index=False) - df_industry.to_csv(export_path+'_df_industry.csv', index=False) - df_motivation.to_csv(export_path+'_df_motivation.csv', index=False) - - #print(f"Full results have been exported to: {export_path}") - + else: + logger.warning("No export path provided. Results will not be saved to Excel.") def screen_companies( self, @@ -316,15 +309,14 @@ def screen_companies( 'headline'] ) - df_company, df_industry, df_motivation = self.generate_results(df_labeled) + df_company, df_industry, df_motivation = self.generate_results(df_labeled, word_range) # Export to Excel if path provided if export_path: - self.save_results(df, df_labeled, df_company, df_industry, df_motivation, export_path = export_path) + self.save_results(df_labeled, df_company, df_industry, df_motivation, export_path = export_path) - except Exception: + except Exception as e: execution_result = "error" - raise else: execution_result = "success" finally: diff --git a/src/bigdata_research_tools/workflows/utils.py b/src/bigdata_research_tools/workflows/utils.py index 578aabb..b2185a9 100644 --- a/src/bigdata_research_tools/workflows/utils.py +++ b/src/bigdata_research_tools/workflows/utils.py @@ -90,12 +90,8 @@ def save_to_excel( (df, sheet_name, position) for sheet_name, (df, position) in tables.items() ] -<<<<<<< HEAD - excel_manager.save_workbook(excel_args, file_path) -======= excel_manager.save_workbook(excel_args, file_path) # Function to map risk_factor to risk_category def map_risk_category(risk_factor, mapping): return mapping.get(risk_factor, 'Not Applicable') ->>>>>>> 2358ddc (risk analyzer scripts and changes required) From b025ceaf4f7ab5e268322fcb5db62ef901771501 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 1 Jul 2025 13:48:52 +0200 Subject: [PATCH 03/82] Refactor settings to utils --- src/bigdata_research_tools/excel.py | 2 +- src/bigdata_research_tools/{settings.py => utils.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/bigdata_research_tools/{settings.py => utils.py} (100%) diff --git a/src/bigdata_research_tools/excel.py b/src/bigdata_research_tools/excel.py index e4efc86..b092171 100644 --- a/src/bigdata_research_tools/excel.py +++ b/src/bigdata_research_tools/excel.py @@ -9,7 +9,7 @@ import pandas as pd -from bigdata_research_tools.settings import ( +from bigdata_research_tools.utils import ( check_libraries_installed, get_resources_path, ) diff --git a/src/bigdata_research_tools/settings.py b/src/bigdata_research_tools/utils.py similarity index 100% rename from src/bigdata_research_tools/settings.py rename to src/bigdata_research_tools/utils.py From 4f56e38235289b7b611e0001f4a98c09557de082 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 1 Jul 2025 16:50:05 +0200 Subject: [PATCH 04/82] Refactor excel related functions into the excel module --- examples/query_builder.py | 6 ++++ src/bigdata_research_tools/excel.py | 26 +++++++++++++++ .../workflows/narrative_miner.py | 3 +- .../workflows/risk_analyzer.py | 7 ++-- .../workflows/thematic_screener.py | 7 ++-- src/bigdata_research_tools/workflows/utils.py | 33 ------------------- 6 files changed, 37 insertions(+), 45 deletions(-) diff --git a/examples/query_builder.py b/examples/query_builder.py index 762152b..7277def 100644 --- a/examples/query_builder.py +++ b/examples/query_builder.py @@ -6,6 +6,12 @@ build_batched_query, ) from bigdata_client import Bigdata + +from dotenv import load_dotenv + +# Load environment variables for authentication +print(f"Environment variables loaded: {load_dotenv()}") + bigdata = Bigdata() # Configure logging logging.basicConfig( diff --git a/src/bigdata_research_tools/excel.py b/src/bigdata_research_tools/excel.py index b092171..fc62d1d 100644 --- a/src/bigdata_research_tools/excel.py +++ b/src/bigdata_research_tools/excel.py @@ -187,3 +187,29 @@ def _format_special_columns(self, sheet) -> None: for cell in sheet[last_col_letter][self.row_offset - 2 :]: cell.font = Font(size=12, bold=True) cell.border = self.thick_border + + +def save_to_excel( + file_path: str, + tables: dict[str, tuple[pd.DataFrame, tuple[int, int]]], +) -> None: + """ + Save multiple DataFrames to an Excel file using ExcelManager. + + Args: + file_path: Destination path for the Excel file. + tables: A dict mapping sheet names to (DataFrame, position) tuples. + + Returns: + None. + """ + if not file_path or not check_excel_dependencies(): + return + + excel_manager = ExcelManager() + + excel_args = [ + (df, sheet_name, position) for sheet_name, (df, position) in tables.items() + ] + + excel_manager.save_workbook(excel_args, file_path) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index c4cf10f..f078c5e 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -6,10 +6,9 @@ from pandas import merge from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace -from bigdata_research_tools.excel import check_excel_dependencies +from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.narrative_labeler import NarrativeLabeler from bigdata_research_tools.search import search_narratives -from bigdata_research_tools.workflows.utils import save_to_excel logger: Logger = getLogger(__name__) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 4680ecd..bb231f9 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -8,14 +8,11 @@ from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace -from bigdata_research_tools.excel import check_excel_dependencies +from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.themes import ThemeTree from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category -from bigdata_research_tools.workflows.utils import ( - get_scored_df, - save_to_excel, -) +from bigdata_research_tools.workflows.utils import get_scored_df from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.themes import generate_risk_tree diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index f32108f..aacefd9 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -8,14 +8,11 @@ from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace -from bigdata_research_tools.excel import check_excel_dependencies +from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.themes import generate_theme_tree -from bigdata_research_tools.workflows.utils import ( - get_scored_df, - save_to_excel -) +from bigdata_research_tools.workflows.utils import get_scored_df logger: Logger = getLogger(__name__) diff --git a/src/bigdata_research_tools/workflows/utils.py b/src/bigdata_research_tools/workflows/utils.py index b2185a9..ebb71d2 100644 --- a/src/bigdata_research_tools/workflows/utils.py +++ b/src/bigdata_research_tools/workflows/utils.py @@ -4,11 +4,8 @@ from typing import List -from bigdata_client.models.search import DocumentType from pandas import DataFrame -from bigdata_research_tools.excel import ExcelManager, check_excel_dependencies - from IPython.display import display, HTML def display_output_chunks_dataframe(final_df): @@ -65,33 +62,3 @@ def get_scored_df( drop=True ) return df_pivot - - -def save_to_excel( - file_path: str, - tables: dict[str, tuple[DataFrame, tuple[int, int]]], -) -> None: - """ - Save multiple DataFrames to an Excel file using ExcelManager. - - Args: - file_path: Destination path for the Excel file. - tables: A dict mapping sheet names to (DataFrame, position) tuples. - - Returns: - None. - """ - if not file_path or not check_excel_dependencies(): - return - - excel_manager = ExcelManager() - - excel_args = [ - (df, sheet_name, position) for sheet_name, (df, position) in tables.items() - ] - - excel_manager.save_workbook(excel_args, file_path) - -# Function to map risk_factor to risk_category -def map_risk_category(risk_factor, mapping): - return mapping.get(risk_factor, 'Not Applicable') From 6567010bd680932ef7298ef65afb58eb44d5015c Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 1 Jul 2025 16:57:09 +0200 Subject: [PATCH 05/82] Remove unneded copyright notices --- src/bigdata_research_tools/excel.py | 6 ------ src/bigdata_research_tools/labeler/labeler.py | 6 ------ src/bigdata_research_tools/labeler/narrative_labeler.py | 6 ------ src/bigdata_research_tools/labeler/risk_labeler.py | 5 ----- src/bigdata_research_tools/labeler/screener_labeler.py | 6 ------ src/bigdata_research_tools/search/query_builder.py | 5 ----- src/bigdata_research_tools/themes.py | 8 -------- 7 files changed, 42 deletions(-) diff --git a/src/bigdata_research_tools/excel.py b/src/bigdata_research_tools/excel.py index fc62d1d..e80a347 100644 --- a/src/bigdata_research_tools/excel.py +++ b/src/bigdata_research_tools/excel.py @@ -1,9 +1,3 @@ -""" -Module for managing Excel workbook operations. - -Copyright (C) 2024, RavenPack | Bigdata.com. All rights reserved. -""" - from logging import Logger, getLogger from typing import List, Tuple diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index f760e92..f464d4f 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -1,9 +1,3 @@ -""" -Module for managing labeling operations. - -Copyright (C) 2024, RavenPack | Bigdata.com. All rights reserved. -""" - from itertools import zip_longest from json import JSONDecodeError, dumps, loads from logging import Logger, getLogger diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 031ccbe..6333949 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -1,9 +1,3 @@ -""" -Module for managing labeling operations. - -Copyright (C) 2024, RavenPack | Bigdata.com. All rights reserved. -""" - from logging import Logger, getLogger from typing import List, Optional diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index facc22e..05d5ebf 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -1,8 +1,3 @@ -""" -Module for managing labeling operations. - -Copyright (C) 2024, RavenPack | Bigdata.com. All rights reserved. -""" from logging import Logger, getLogger from typing import List, Optional, Any, Dict diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index ed258a1..e3857b5 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -1,9 +1,3 @@ -""" -Module for managing labeling operations. - -Copyright (C) 2024, RavenPack | Bigdata.com. All rights reserved. -""" - from logging import Logger, getLogger from typing import List, Optional diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 17b6073..5141e63 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -1,8 +1,3 @@ -""" -Copyright (C) 2025 RavenPack | Bigdata.com. All rights reserved. -Author: Alessandro Bouchs (abouchs@ravenpack.com), Jelena Starovic (jstarovic@ravenpack.com) -""" - from dataclasses import dataclass from itertools import chain,zip_longest from typing import List, Optional, Tuple, Type, Dict diff --git a/src/bigdata_research_tools/themes.py b/src/bigdata_research_tools/themes.py index 49f2f20..d7ee5b2 100644 --- a/src/bigdata_research_tools/themes.py +++ b/src/bigdata_research_tools/themes.py @@ -1,11 +1,3 @@ -""" -Module that includes all functions to create or extract -information related to the sub-theme tree structure. - -Copyright (C) 2024, RavenPack | Bigdata.com. All rights reserved. -Author: Jelena Starovic (jstarovic@ravenpack.com) -""" - import ast from dataclasses import dataclass from typing import Any, Dict, List, Optional From 7bf0cf3120aeb59d37d968fbf5edcd532b45269d Mon Sep 17 00:00:00 2001 From: jaldana Date: Mon, 7 Jul 2025 08:51:56 +0200 Subject: [PATCH 06/82] PX-550 Refactor themes submodule in a more generic tree structure --- CHANGELOG.md | 11 ++ .../{themes.py => tree.py} | 83 ++++----- .../workflows/narrative_miner.py | 7 +- .../workflows/risk_analyzer.py | 164 ++++++++++++------ .../workflows/thematic_screener.py | 14 +- 5 files changed, 171 insertions(+), 108 deletions(-) rename src/bigdata_research_tools/{themes.py => tree.py} (85%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2354a2c..29f1f13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.99.0] - 2025-07-03 +Preparation for a first stable release. + +### Changed +- Refactor the `themes` submodule to `tree`, allowing for a more generic tree structure + that can be re-used accross different workflows. +- Moved `workflows.utils.save_to_excel` to `excel.save_to_excel`, grouping all optional + features related to Excel in one place. +- Renamed `settings` to `utils` to better reflect its purpose as a utility module and free + the name for future use. + ## [0.15.1] - 2025-06-12 ### Changed diff --git a/src/bigdata_research_tools/themes.py b/src/bigdata_research_tools/tree.py similarity index 85% rename from src/bigdata_research_tools/themes.py rename to src/bigdata_research_tools/tree.py index d7ee5b2..2a21b54 100644 --- a/src/bigdata_research_tools/themes.py +++ b/src/bigdata_research_tools/tree.py @@ -6,12 +6,12 @@ import json from bigdata_research_tools.llm import LLMEngine +from bigdata_research_tools.prompts.risk import compose_risk_system_prompt_focus from bigdata_research_tools.prompts.themes import ( compose_themes_system_prompt_base, compose_themes_system_prompt_focus, compose_themes_system_prompt_onestep, ) -from bigdata_research_tools.prompts.risk import compose_risk_system_prompt_focus themes_default_llm_model_config: Dict[str, Any] = { "provider": "openai", @@ -28,28 +28,28 @@ @dataclass -class ThemeTree: +class SemanticTree: """ - A hierarchical tree structure rooted in a main theme, branching into distinct sub-themes - that guide the analyst's research process. + A hierarchical tree structure where each node represents a semantically meaningful unit, or node, that guide the analyst's research process. - Each node in the tree provides a unique identifier, a descriptive label, and a summary - explaining its relevance. + Each node in the tree provides a unique identifier, a descriptive label, and a summary (and optionally keywords) + explaining its relevance. Nodes can have child nodes, allowing the tree + to represent nested or related concepts, categories, or classifications. Args: - label (str): The name of the theme or sub-theme. + label (str): The name of the current node. node (int): A unique identifier for the node. - summary (str): A brief explanation of the node’s relevance. For the root node - (main theme), this describes the overall theme; for sub-nodes, it explains their - connection to the parent theme. - children (Optional[List[ThemeTree]]): A list of child nodes representing sub-themes. - keywords (Optional[List[str]]): A list of keywords summarizing the main theme. Currently used by RiskAnalyzer to ensure branches are relevant. + summary (str): A brief explanation of the node's relevance. For the root node, + this describes the overall relevance of the tree; for sub-nodes, it explains their + connection to the parent node. + children (Optional[List[SemanticTree]]): A list of child nodes representing sub-units. + keywords (Optional[List[str]]): A list of keywords summarizing the current node. """ label: str node: int summary: str = None - children: List["ThemeTree"] = None + children: List["SemanticTree"] = None keywords: Optional[List[str]] = None def __post_init__(self): @@ -59,24 +59,24 @@ def __str__(self) -> str: return self.as_string() @staticmethod - def from_dict(tree_dict: dict) -> "ThemeTree": + def from_dict(tree_dict: dict) -> "SemanticTree": """ - Create a ThemeTree object from a dictionary. + Create a SemanticTree object from a dictionary. Args: - tree_dict (dict): A dictionary representing the ThemeTree structure. + tree_dict (dict): A dictionary representing the SemanticTree structure. Returns: - ThemeTree: The ThemeTree object generated from the dictionary. + SemanticTree: The SemanticTree object generated from the dictionary. """ # Handle case sensitivity in keys tree_dict = dict_keys_to_lowercase(tree_dict) - theme_tree = ThemeTree(**tree_dict) - theme_tree.children = [ - ThemeTree.from_dict(child) for child in tree_dict.get("children", []) + tree = SemanticTree(**tree_dict) + tree.children = [ + SemanticTree.from_dict(child) for child in tree_dict.get("children", []) ] - return theme_tree + return tree def as_string(self, prefix: str = "") -> str: """ @@ -111,7 +111,7 @@ def get_label_summaries(self) -> Dict[str, str]: Extract the label summaries from the tree. Returns: - dict[str, str]: Dictionary with all the labels of the ThemeTree as keys and their associated summaries as values. + dict[str, str]: Dictionary with all the labels of the SemanticTree as keys and their associated summaries as values. """ label_summary = {self.label: self.summary} for child in self.children: @@ -120,7 +120,7 @@ def get_label_summaries(self) -> Dict[str, str]: def get_summaries(self) -> List[str]: """ - Extract the node summaries from a ThemeTree. + Extract the node summaries from a SemanticTree. Returns: list[str]: List of all 'summary' values in the tree, including its children. @@ -135,7 +135,7 @@ def get_terminal_label_summaries(self) -> Dict[str, str]: Extract the items (labels, summaries) from terminal nodes of the tree. Returns: - dict[str, str]: Dictionary with the labels of the ThemeTree as keys and + dict[str, str]: Dictionary with the labels of the SemanticTree as keys and their associated summaries as values, only using terminal nodes. """ label_summary = {} @@ -210,7 +210,7 @@ def _visualize_graphviz(self) -> "graphviz.Digraph": import graphviz except ImportError: raise ImportError( - "Missing optional dependency for theme visualization, " + "Missing optional dependency for tree visualization, " "please install `bigdata_research_tools[graphviz]` to enable them." ) @@ -223,7 +223,7 @@ def _visualize_graphviz(self) -> "graphviz.Digraph": splines="curved", ) - def add_nodes(node): + def add_nodes(node: SemanticTree): # Determine if the node is a terminal (leaf) node is_terminal = not node.children @@ -276,11 +276,11 @@ def _visualize_plotly(self) -> None: import plotly.express as px except ImportError: raise ImportError( - "Missing optional dependency for theme visualization, " + "Missing optional dependency for tree visualization, " "please install `bigdata_research_tools[plotly]` to enable them." ) - def extract_labels(node: ThemeTree, parent_label=""): + def extract_labels(node: SemanticTree, parent_label=""): labels.append(node.label) parents.append(parent_label) for child in node.children: @@ -293,7 +293,7 @@ def extract_labels(node: ThemeTree, parent_label=""): df = DataFrame({"labels": labels, "parents": parents}) fig = px.treemap(df, names="labels", parents="parents") fig.show() - + def get_label_to_parent_mapping(self) -> dict: """ Returns a mapping from each leaf node label to its parent node label. @@ -317,9 +317,9 @@ def generate_theme_tree( main_theme: str, focus: str = "", llm_model_config: Dict[str, Any] = None, -) -> ThemeTree: +) -> SemanticTree: """ - Generate a `ThemeTree` class from a main theme and focus. + Generate a `SemanticTree` class from a main theme and focus. Args: main_theme (str): The primary theme to analyze. @@ -337,7 +337,7 @@ def generate_theme_tree( - `seed` (int) Returns: - ThemeTree: The generated theme tree. + SemanticTree: The generated theme tree. """ ll_model_config = llm_model_config or themes_default_llm_model_config model_str = f"{ll_model_config['provider']}::{ll_model_config['model']}" @@ -380,7 +380,7 @@ def generate_theme_tree( tree_dict = ast.literal_eval(tree_str) - return ThemeTree.from_dict(tree_dict) + return SemanticTree.from_dict(tree_dict) def dict_keys_to_lowercase(d: Dict[str, Any]) -> Dict[str, Any]: @@ -404,10 +404,10 @@ def dict_keys_to_lowercase(d: Dict[str, Any]) -> Dict[str, Any]: def stringify_label_summaries(label_summaries: Dict[str, str]) -> List[str]: """ - Convert the label summaries of a ThemeTree into a list of strings. + Convert the label summaries of a SemanticTree into a list of strings. Args: - label_summaries (dict[str, str]): A dictionary of label summaries of ThemeTree. + label_summaries (dict[str, str]): A dictionary of label summaries of SemanticTree. Expected format: {label: summary}. Returns: List[str]: A list of strings, each one containing a label and its summary, i.e. @@ -415,13 +415,14 @@ def stringify_label_summaries(label_summaries: Dict[str, str]) -> List[str]: """ return [f"{label}: {summary}" for label, summary in label_summaries.items()] + def generate_risk_tree( main_theme: str, focus: str = "", llm_model_config: Dict[str, Any] = None, -) -> ThemeTree: +) -> SemanticTree: """ - Generate a `ThemeTree` class from a main theme and analyst focus. + Generate a `SemanticTree` class from a main theme and analyst focus. Args: main_theme (str): The primary theme to analyze. @@ -439,7 +440,7 @@ def generate_risk_tree( - `seed` (int) Returns: - ThemeTree: The generated theme tree. + SemanticTree: The generated theme tree. """ ll_model_config = llm_model_config or themes_default_llm_model_config model_str = f"{ll_model_config['provider']}::{ll_model_config['model']}" @@ -447,8 +448,10 @@ def generate_risk_tree( system_prompt = compose_risk_system_prompt_focus(main_theme, focus) - tree_str = llm.get_response([{"role": "system", "content": system_prompt}], **ll_model_config["kwargs"]) + tree_str = llm.get_response( + [{"role": "system", "content": system_prompt}], **ll_model_config["kwargs"] + ) tree_dict = ast.literal_eval(tree_str) - return ThemeTree.from_dict(tree_dict) \ No newline at end of file + return SemanticTree.from_dict(tree_dict) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index f078c5e..888bc4a 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -2,13 +2,13 @@ from typing import Dict, List, Optional from bigdata_client.models.search import DocumentType -from bigdata_research_tools.client import init_bigdata_client from pandas import merge -from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace +from bigdata_research_tools.client import init_bigdata_client from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.narrative_labeler import NarrativeLabeler from bigdata_research_tools.search import search_narratives +from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace logger: Logger = getLogger(__name__) @@ -41,7 +41,7 @@ def __init__( If not provided, the search is run across all available sources. rerank_threshold: Enable the cross-encoder by setting the value between [0, 1]. """ - + self.llm_model = llm_model self.narrative_sentences = narrative_sentences self.sources = sources @@ -102,6 +102,7 @@ def mine_narratives( batch_size=batch_size, scope=self.document_type, current_trace=current_trace, + fiscal_year=self.fiscal_year, bigdata_client=bigdata_client, ) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index bb231f9..1118a07 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -3,18 +3,16 @@ from bigdata_client.models.entities import Company from bigdata_client.models.search import DocumentType -from bigdata_research_tools.client import init_bigdata_client from pandas import DataFrame, merge -from bigdata_research_tools.portfolio.motivation import Motivation -from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace +from bigdata_research_tools.client import init_bigdata_client from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel -from bigdata_research_tools.themes import ThemeTree - from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category -from bigdata_research_tools.workflows.utils import get_scored_df +from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.search.screener_search import search_by_companies -from bigdata_research_tools.themes import generate_risk_tree +from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace +from bigdata_research_tools.tree import SemanticTree, generate_risk_tree +from bigdata_research_tools.workflows.utils import get_scored_df logger: Logger = getLogger(__name__) @@ -73,12 +71,12 @@ def __init__( self.focus = focus def create_taxonomy(self): - """ Create a risk taxonomy based on the main theme and focus. + """Create a risk taxonomy based on the main theme and focus. Returns: - ThemeTree: The generated risk tree. + SemanticTree: The generated risk tree. List[str]: A list of risk summaries for the terminal nodes. List[str]: A list of terminal labels for the risk categories. - """ + """ risk_tree = generate_risk_tree( main_theme=self.main_theme, focus=self.focus, @@ -88,10 +86,16 @@ def create_taxonomy(self): terminal_labels = risk_tree.get_terminal_labels() return risk_tree, risk_summaries, terminal_labels - - def retrieve_results(self,sentences: List[str], freq: str = "3M", document_limit: int = 10, batch_size: int = 10) -> DataFrame: - """ Retrieve search results based on the provided sentences and parameters. - Args: + + def retrieve_results( + self, + sentences: List[str], + freq: str = "3M", + document_limit: int = 10, + batch_size: int = 10, + ) -> DataFrame: + """Retrieve search results based on the provided sentences and parameters. + Args: sentences (List[str]): List of sentences to search for. freq (str): The frequency of the date ranges. Supported values: - 'Y': Yearly intervals. @@ -102,8 +106,9 @@ def retrieve_results(self,sentences: List[str], freq: str = "3M", document_limit document_limit (int): The maximum number of documents to return per Bigdata query. batch_size (int): The number of entities to include in each batched query. Returns: - DataFrame: A DataFrame containing the search results with relevant information. """ - + DataFrame: A DataFrame containing the search results with relevant information. + """ + ## To Do: import the search class and make search_by_companies a class method df_sentences = search_by_companies( companies=self.companies, @@ -122,8 +127,10 @@ def retrieve_results(self,sentences: List[str], freq: str = "3M", document_limit ) return df_sentences - - def _add_prompt_fields(self, df_sentences: DataFrame, additional_prompt_fields: Optional[List] = None) -> List[Dict]: + + def _add_prompt_fields( + self, df_sentences: DataFrame, additional_prompt_fields: Optional[List] = None + ) -> List[Dict]: """ Add additional fields from the DataFrame for the labeling prompt. @@ -142,15 +149,21 @@ def _add_prompt_fields(self, df_sentences: DataFrame, additional_prompt_fields: return df_sentences[additional_prompt_fields].to_dict(orient="records") else: return [] - - def label_search_results(self, df_sentences, terminal_labels, risk_tree: ThemeTree, additional_prompt_fields: Optional[List] = None): + + def label_search_results( + self, + df_sentences, + terminal_labels, + risk_tree: SemanticTree, + additional_prompt_fields: Optional[List] = None, + ): """ Label the search results with our theme labels. Args: df_sentences (DataFrame): The DataFrame containing the search results. terminal_labels (List[str]): The terminal labels for the risk categories. - risk_tree (ThemeTree): The ThemeTree object containing the risk taxonomy. + risk_tree (SemanticTree): The SemanticTree object containing the risk taxonomy. prompt_fields (Dict): Additional fields to be used in the labeling prompt. Returns: @@ -166,7 +179,8 @@ def label_search_results(self, df_sentences, terminal_labels, risk_tree: ThemeTr main_theme=self.main_theme, labels=terminal_labels, texts=df_sentences["masked_text"].tolist(), - textsconfig = prompt_fields) + textsconfig=prompt_fields, + ) # Merge and process results df = merge(df_sentences, df_labels, left_index=True, right_index=True) @@ -178,21 +192,36 @@ def label_search_results(self, df_sentences, terminal_labels, risk_tree: ThemeTr ## to do: generalize the extra fields generation logic to allow for different fields to be added - df['risk_factor'] = df['label'].apply(lambda x: map_risk_category(x, label_to_parent)) + df["risk_factor"] = df["label"].apply( + lambda x: map_risk_category(x, label_to_parent) + ) - df = df.loc[df.risk_factor.notnull() | df.risk_factor.ne('Not Applicable')].copy() + df = df.loc[ + df.risk_factor.notnull() | df.risk_factor.ne("Not Applicable") + ].copy() - df['channel'] = df.apply( - lambda row: row['risk_factor'] + '/' + row['label'], axis=1) - - df['theme'] = self.main_theme + df["channel"] = df.apply( + lambda row: row["risk_factor"] + "/" + row["label"], axis=1 + ) - df_clean = labeler.post_process_dataframe(df, extra_fields = {'channel': 'Risk Channel', 'risk_factor': 'Risk Factor', 'quotes':'Highlights'}, extra_columns=['Risk Channel', 'Risk Factor', 'Highlights']) + df["theme"] = self.main_theme + + df_clean = labeler.post_process_dataframe( + df, + extra_fields={ + "channel": "Risk Channel", + "risk_factor": "Risk Factor", + "quotes": "Highlights", + }, + extra_columns=["Risk Channel", "Risk Factor", "Highlights"], + ) return df, df_clean - - def generate_results(self, df_labeled: DataFrame, word_range: Tuple[int, int] = (50, 100)): - """ Generate the Pivot Tables with factor Scores for companies and industries.""" + + def generate_results( + self, df_labeled: DataFrame, word_range: Tuple[int, int] = (50, 100) + ): + """Generate the Pivot Tables with factor Scores for companies and industries.""" df_company, df_industry = DataFrame(), DataFrame() if df_labeled.empty: @@ -200,21 +229,30 @@ def generate_results(self, df_labeled: DataFrame, word_range: Tuple[int, int] = return df_company, df_industry df_company = get_scored_df( - df_labeled, index_columns=["Company", "Ticker", "Industry"], pivot_column="Sub-Scenario" + df_labeled, + index_columns=["Company", "Ticker", "Industry"], + pivot_column="Sub-Scenario", ) df_industry = get_scored_df( df_labeled, index_columns=["Industry"], pivot_column="Sub-Scenario" ) motivation_generator = Motivation(model=self.llm_model) motivation_df = motivation_generator.generate_company_motivations( - df=df_labeled.rename(columns={'Sub-Scenario': 'Theme'}), - theme_name=self.main_theme, - word_range=word_range - ) + df=df_labeled.rename(columns={"Sub-Scenario": "Theme"}), + theme_name=self.main_theme, + word_range=word_range, + ) return df_company, df_industry, motivation_df - - def save_results(self, df_labeled: DataFrame, df_company: DataFrame, df_industry: DataFrame, motivation_df: DataFrame, export_path: str): + + def save_results( + self, + df_labeled: DataFrame, + df_company: DataFrame, + df_industry: DataFrame, + motivation_df: DataFrame, + export_path: str, + ): """ Save the results to Excel files if export_path is provided. @@ -231,11 +269,13 @@ def save_results(self, df_labeled: DataFrame, df_company: DataFrame, df_industry "Semantic Labels": (df_labeled, (0, 0)), "By Company": (df_company, (2, 4)), "By Industry": (df_industry, (2, 2)), - "Motivations": (motivation_df, (0, 0)) + "Motivations": (motivation_df, (0, 0)), }, ) else: - logger.warning("No export path provided. Results will not be saved to Excel.") + logger.warning( + "No export path provided. Results will not be saved to Excel." + ) def screen_companies( self, @@ -264,7 +304,7 @@ def screen_companies( - df_labeled: The DataFrame with the labeled search results. - df_company: The DataFrame with the output by company. - df_industry: The DataFrame with the output by industry. - - risk_tree: The ThemeTree created for the screening. + - risk_tree: The SemanticTree created for the screening. """ if export_path and not check_excel_dependencies(): @@ -274,7 +314,7 @@ def screen_companies( "Consider installing them to save the Thematic Screener result into the " f"path `{export_path}`." ) - + bigdata_client = init_bigdata_client() current_trace = Trace( event_name=TraceEventNames.RISK_ANALYZER, @@ -287,8 +327,8 @@ def screen_companies( workflow_start_date=Trace.get_time_now(), ) - try: - risk_tree, risk_summaries, terminal_labels = self.create_taxonomy() + try: + risk_tree, risk_summaries, terminal_labels = self.create_taxonomy() df_sentences = self.retrieve_results( sentences=risk_summaries, @@ -296,21 +336,31 @@ def screen_companies( document_limit=document_limit, batch_size=batch_size, ) - + df, df_labeled = self.label_search_results( df_sentences=df_sentences, terminal_labels=terminal_labels, risk_tree=risk_tree, - additional_prompt_fields=['entity_sector', - 'entity_industry', - 'headline'] + additional_prompt_fields=[ + "entity_sector", + "entity_industry", + "headline", + ], ) - df_company, df_industry, df_motivation = self.generate_results(df_labeled, word_range) + df_company, df_industry, df_motivation = self.generate_results( + df_labeled, word_range + ) # Export to Excel if path provided if export_path: - self.save_results(df_labeled, df_company, df_industry, df_motivation, export_path = export_path) + self.save_results( + df_labeled, + df_company, + df_industry, + df_motivation, + export_path=export_path, + ) except Exception as e: execution_result = "error" @@ -321,9 +371,9 @@ def screen_companies( current_trace.result = execution_result # noqa send_trace(bigdata_client, current_trace) return { - "df_labeled": df_labeled, - "df_company": df_company, - "df_industry": df_industry, - "df_motivation": df_motivation, - "risk_tree": risk_tree, - } + "df_labeled": df_labeled, + "df_company": df_company, + "df_industry": df_industry, + "df_motivation": df_motivation, + "risk_tree": risk_tree, + } diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index aacefd9..8472271 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -3,15 +3,15 @@ from bigdata_client.models.entities import Company from bigdata_client.models.search import DocumentType -from bigdata_research_tools.client import init_bigdata_client from pandas import DataFrame, merge -from bigdata_research_tools.portfolio.motivation import Motivation -from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace +from bigdata_research_tools.client import init_bigdata_client from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler +from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.search.screener_search import search_by_companies -from bigdata_research_tools.themes import generate_theme_tree +from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace +from bigdata_research_tools.tree import generate_theme_tree from bigdata_research_tools.workflows.utils import get_scored_df logger: Logger = getLogger(__name__) @@ -173,9 +173,7 @@ def screen_companies( motivation_generator = Motivation(model=self.llm_model) motivation_df = motivation_generator.generate_company_motivations( - df=df, - theme_name=self.main_theme, - word_range=word_range + df=df, theme_name=self.main_theme, word_range=word_range ) # Export to Excel if path provided @@ -186,7 +184,7 @@ def screen_companies( "Semantic Labels": (df, (0, 0)), "By Company": (df_company, (2, 4)), "By Industry": (df_industry, (2, 2)), - "Motivations": (motivation_df, (0, 0)) + "Motivations": (motivation_df, (0, 0)), }, ) except Exception: From 354aa69754cb9dc8efaa50e478f000260eeb9a23 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Tue, 16 Sep 2025 10:38:13 +0000 Subject: [PATCH 07/82] proposed fixes to id-only logic --- .../search/query_builder.py | 5 +-- .../search/screener_search.py | 31 ++++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 17b6073..5bf3be2 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -212,8 +212,9 @@ def _get_entity_ids( if entity is not None: if entity_type in (Entity, ReportingEntity): entity = entity_type(entity.id) - - entity_ids.append(entity) + entity_ids.append(entity) + else: + entity_ids.append(Entity(entity.id)) return entity_ids diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index e23ffec..73dbe36 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -157,7 +157,7 @@ def search_by_companies( results, entities = filter_search_results(results) # Filter entities to only include COMPANY entities - entities = filter_company_entities(entities) + entities, topics = filter_company_entities(entities) # Determine whether to filter by companies based on document type # For filings and transcripts, we don't need to filter as we use reporting entities @@ -172,6 +172,7 @@ def search_by_companies( results=results, entities=entities, companies=companies if needs_company_filtering else None, + topics=topics, document_type=scope, ) except Exception: @@ -203,13 +204,16 @@ def filter_company_entities( entity for entity in entities if hasattr(entity, "entity_type") and getattr(entity, "entity_type") == "COMP" - ] + ], [entity for entity in entities + if hasattr(entity, 'entity_type') and + getattr(entity, "entity_type") != 'COMP'] def process_screener_search_results( results: List[Document], entities: List[ListQueryComponent], companies: Optional[List[Company]] = None, document_type: DocumentType = DocumentType.NEWS, + topics: Optional[List[ListQueryComponent]] = None, ) -> DataFrame: """ Build a unified DataFrame from search results for any document type. @@ -240,10 +244,16 @@ def process_screener_search_results( - text: str - other_entities: str - entities: List[Dict[str, Any]] + - topics: List[Dict[str, Any]] + - source_name: str (if applicable) + - source_rank: int (if applicable) + - url: str (if applicable) - masked_text: str - other_entities_map: List[Tuple[int, str]] """ entity_key_map = {entity.id: entity for entity in entities} + topic_key_map = {entity.id: entity for entity in topics} + company_ids = {company.id for company in companies} if companies else None rows = [] for result in tqdm(results, desc=f"Processing {document_type} results..."): @@ -268,6 +278,14 @@ def process_screener_search_results( for entity in chunk.entities if entity.key in entity_key_map ] + chunk_topics = [{'key': entity.key, + 'name': (topic_key_map[entity.key].name if entity.key in topic_key_map else None), + 'entity_type': (topic_key_map[entity.key].entity_type if entity.key in topic_key_map else None), + #'country': (topic_key_map[entity.key].country if entity.key in topic_key_map else None), + 'start': entity.start, + 'end': entity.end} + for entity in chunk.entities + if entity.key in topic_key_map] if not chunk_entities: continue # Skip if no entities are mapped @@ -306,6 +324,7 @@ def process_screener_search_results( e["name"] for e in other_entities ), "entities": chunk_entities, + 'topics': chunk_topics, } ) else: @@ -317,7 +336,7 @@ def process_screener_search_results( continue # Skip if entity is not found # # if entity isn't in our original watchlist, skip - if companies and entity_key not in companies: + if company_ids and chunk_entity["key"] not in company_ids: continue # Exclude the entity from other entities @@ -345,7 +364,11 @@ def process_screener_search_results( e["name"] for e in other_entities ), "entities": chunk_entities, - } + 'topics': chunk_topics, + 'source_name': result.source.name, + 'source_rank': result.source.rank, + 'url': result.url + } ) if not rows: From bf020cdd1804fb4374ad20c6237373d0f89cba06 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 16 Sep 2025 12:57:55 +0200 Subject: [PATCH 08/82] Fix imports --- src/bigdata_research_tools/tree.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index b90bb20..413d04b 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -8,11 +8,7 @@ from bigdata_research_tools.llm import LLMEngine from bigdata_research_tools.prompts.risk import compose_risk_system_prompt_focus -from bigdata_research_tools.prompts.themes import ( - compose_themes_system_prompt_base, - compose_themes_system_prompt_focus, - compose_themes_system_prompt_onestep, -) +from bigdata_research_tools.prompts.themes import compose_themes_system_prompt themes_default_llm_model_config: Dict[str, Any] = { "provider": "openai", From e6c5a058f8ef5fa5ffc25aaa5096dc9636864484 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 23 Sep 2025 09:42:20 +0200 Subject: [PATCH 09/82] Fix docstring of screen_companies in risk analyzer --- src/bigdata_research_tools/workflows/risk_analyzer.py | 1 + uv.lock | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 08008ca..5bf5df7 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -312,6 +312,7 @@ def screen_companies( - df_labeled: The DataFrame with the labeled search results. - df_company: The DataFrame with the output by company. - df_industry: The DataFrame with the output by industry. + - df_motivation: The DataFrame with the generated motivations. - risk_tree: The SemanticTree created for the screening. """ diff --git a/uv.lock b/uv.lock index aa4947c..9c6bee5 100644 --- a/uv.lock +++ b/uv.lock @@ -318,7 +318,7 @@ wheels = [ [[package]] name = "bigdata-research-tools" -version = "0.20.1" +version = "0.99.0" source = { virtual = "." } dependencies = [ { name = "bigdata-client" }, From 457b64f3d2bfbb937d3866177bd526f534791922 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 10:08:02 +0200 Subject: [PATCH 10/82] Rename freq parameter to frequency for consistency with all workflows --- CHANGELOG.md | 3 ++- README.md | 2 +- docs/cookbooks/miners.rst | 12 ++++++------ .../search/narrative_search.py | 6 +++--- src/bigdata_research_tools/search/query_builder.py | 14 +++++++------- .../search/screener_search.py | 8 ++++---- .../workflows/narrative_miner.py | 8 ++++---- .../workflows/risk_analyzer.py | 8 ++++---- .../workflows/thematic_screener.py | 2 +- 9 files changed, 32 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7101904..22c596c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.99.0] - 2025-07-03 +## [0.99.0] - Unreleased Preparation for a first stable release. ### Changed @@ -17,6 +17,7 @@ Preparation for a first stable release. features related to Excel in one place. - Renamed `settings` to `utils` to better reflect its purpose as a utility module and free the name for future use. +- Renamed all `freq` parameters to `frequency` for better clarity and consistency accross workflows. ## [0.20.1] - 2025-09-16 diff --git a/README.md b/README.md index 5a4a8db..46e8cab 100644 --- a/README.md +++ b/README.md @@ -354,7 +354,7 @@ Parameters to run the analysis end-to-end. |-----------|------|---------|-------------| | `document_limit` | `int` | `10` | Documents per query (see [Document Limit Guide](#document-limit))| | `batch_size` | `int` | `10` | Batch size for processing (see [Batch Size Parameter Guide](#batch-size))| -| `freq` | `str` | `"3M"` | Date range frequency (see [Frequency Parameter Guide](#frequency))| +| `frequency` | `str` | `"3M"` | Date range frequency (see [Frequency Parameter Guide](#frequency))| | `export_path` | `str` | `None` | Excel export path | #### Return Values diff --git a/docs/cookbooks/miners.rst b/docs/cookbooks/miners.rst index 596ef4a..ea1fad8 100644 --- a/docs/cookbooks/miners.rst +++ b/docs/cookbooks/miners.rst @@ -4042,21 +4042,21 @@ transcripts, and SEC filings: print("Mining news narratives...") news_results = news_miner.mine_narratives( document_limit=100, - freq='W', # Weekly frequency + frequency='W', # Weekly frequency export_path=news_results_path ) print("Mining earnings call transcripts...") transcripts_results = transcripts_miner.mine_narratives( document_limit=100, - freq='M', # Monthly frequency (earnings are quarterly) + frequency='M', # Monthly frequency (earnings are quarterly) export_path=transcripts_results_path ) print("Mining SEC filings...") filings_results = filings_miner.mine_narratives( document_limit=100, - freq='M', # Monthly frequency (filings are quarterly) + frequency='M', # Monthly frequency (filings are quarterly) export_path=filings_results_path ) @@ -4376,13 +4376,13 @@ overall source scores. # Narrative Analysis Functions - def prepare_narrative_data(df, freq='W'): + def prepare_narrative_data(df, frequency='W'): """ Prepare narrative data for visualization by creating time series of narrative counts, converting to z-scores, and applying smoothing. """ pivot_df = pd.pivot_table(df, index='Date', columns='Label', aggfunc='size', fill_value=0) - resampled_df = pivot_df.resample(freq).sum() + resampled_df = pivot_df.resample(frequency).sum() # Calculate z-scores for each narrative zscore_df = pd.DataFrame() @@ -4602,7 +4602,7 @@ and a narrative breakdown chart for news media. def visualize_news_narrative_breakdown(): """Create a stacked area chart showing the breakdown of specific narratives in news with unique colors""" # Prepare news narrative data - news_narratives = prepare_narrative_data(news_df, freq='W') + news_narratives = prepare_narrative_data(news_df, frequency='W') # Filter to only include the top narratives max_values = news_narratives.max() diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index a97fa05..d40f633 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -30,7 +30,7 @@ def search_narratives( sources: Optional[List[str]] = None, keywords: Optional[List[str]] = None, control_entities: Optional[List[str]] = None, - freq: str = "M", + frequency: str = "M", sort_by: SortBy = SortBy.RELEVANCE, rerank_threshold: Optional[float] = None, document_limit: int = 50, @@ -53,7 +53,7 @@ def search_narratives( If None, no keyword queries are created. control_entities (Optional[List[str]]): A list of control entity IDs for creating co-mentions queries. If None, no control queries are created. - freq (str): The frequency of the date ranges. Defaults to 'M'. + frequency (str): The frequency of the date ranges. Defaults to 'M'. sort_by (SortBy): The sorting criterion for the search results. Defaults to SortBy.RELEVANCE. rerank_threshold (Optional[float]): The threshold for reranking the search results. @@ -91,7 +91,7 @@ def search_narratives( ) # Create list of date ranges - date_ranges = create_date_ranges(start_date, end_date, freq) + date_ranges = create_date_ranges(start_date, end_date, frequency) no_queries = len(batched_query) no_dates = len(date_ranges) diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 5141e63..53d92e2 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -397,7 +397,7 @@ def _expand_queries( return queries_expanded def create_date_intervals( - start_date: str, end_date: str, freq: str + start_date: str, end_date: str, frequency: str ) -> List[Tuple[pd.Timestamp, pd.Timestamp]]: """ Generates date intervals based on a specified frequency within a given start and end date range. @@ -407,7 +407,7 @@ def create_date_intervals( The start date in 'YYYY-MM-DD' format. end_date (str): The end date in 'YYYY-MM-DD' format. - freq (str): + frequency (str): The frequency for intervals. Supported values: - 'Y': Yearly intervals. - 'M': Monthly intervals. @@ -443,11 +443,11 @@ def create_date_intervals( # Adjust frequency for yearly and monthly to use appropriate start markers # 'AS' for year start, 'MS' for month start - adjusted_freq = freq.replace("Y", "AS").replace("M", "MS") + adjusted_freq = frequency.replace("Y", "AS").replace("M", "MS") # Generate date range based on the adjusted frequency try: - date_range = pd.date_range(start=start_date, end=end_date, freq=adjusted_freq) + date_range = pd.date_range(start=start_date, end=end_date, frequency=adjusted_freq) except ValueError: raise ValueError("Invalid frequency. Use 'Y', 'M', 'W', or 'D'.") @@ -475,7 +475,7 @@ def create_date_intervals( def create_date_ranges( - start_date: str, end_date: str, freq: str + start_date: str, end_date: str, frequency: str ) -> List[AbsoluteDateRange]: """ Generates a list of `AbsoluteDateRange` objects based on the specified frequency. @@ -485,7 +485,7 @@ def create_date_ranges( The start date in 'YYYY-MM-DD' format. end_date (str): The end date in 'YYYY-MM-DD' format. - freq (str): + frequency (str): The frequency for dividing the date range. Supported values: - 'Y': Yearly. - 'M': Monthly. @@ -502,5 +502,5 @@ def create_date_ranges( 2. Converts each interval (start and end tuple) into an `AbsoluteDateRange` object. 3. Returns a list of these `AbsoluteDateRange` objects. """ - intervals = create_date_intervals(start_date, end_date, freq=freq) + intervals = create_date_intervals(start_date, end_date, frequency=frequency) return [AbsoluteDateRange(start, end) for start, end in intervals] diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index e23ffec..89d2426 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -36,7 +36,7 @@ def search_by_companies( sources: Optional[List[str]] = None, keywords: Optional[List[str]] = None, control_entities: Optional[Dict] = None, - freq: str = "M", + frequency: str = "M", sort_by: SortBy = SortBy.RELEVANCE, rerank_threshold: Optional[float] = None, document_limit: int = 50, @@ -59,7 +59,7 @@ def search_by_companies( keywords (List[str]): A list of keywords for constructing keyword queries. If None, no keyword queries are created. control_entities (Dict): A dictionary of control entities of different types for creating co-mentions queries. - freq (str): The frequency of the date ranges. Defaults to '3M'. + frequency (str): The frequency of the date ranges. Defaults to '3M'. sort_by (SortBy): The sorting criterion for the search results. Defaults to SortBy.RELEVANCE. rerank_threshold (Optional[float]): The threshold for reranking the search results. @@ -103,7 +103,7 @@ def search_by_companies( end_date=end_date, rerank_threshold=rerank_threshold, llm_model=None, - frequency=freq, + frequency=frequency, workflow_start_date=Trace.get_time_now(), ) @@ -136,7 +136,7 @@ def search_by_companies( ) # Create list of date ranges - date_ranges = create_date_ranges(start_date, end_date, freq) + date_ranges = create_date_ranges(start_date, end_date, frequency) no_queries = len(batched_query) no_dates = len(date_ranges) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index 496acd5..acf80dd 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -56,7 +56,7 @@ def mine_narratives( self, document_limit: int = 10, batch_size: int = 10, - freq: str = "3M", + frequency: str = "3M", export_path: Optional[str] = None, ) -> Dict: """ @@ -65,7 +65,7 @@ def mine_narratives( Args: document_limit: Maximum number of documents to analyze. batch_size: Size of batches for processing. - freq: Frequency for analysis ('M' for monthly). + frequency: Frequency for analysis ('M' for monthly). export_path: Optional path to export results to an Excel file. Returns: @@ -87,7 +87,7 @@ def mine_narratives( end_date=self.end_date, rerank_threshold=self.rerank_threshold, llm_model=self.llm_model, - frequency=freq, + frequency=frequency, workflow_start_date=Trace.get_time_now(), ) try: @@ -99,7 +99,7 @@ def mine_narratives( rerank_threshold=self.rerank_threshold, start_date=self.start_date, end_date=self.end_date, - freq=freq, + frequency=frequency, document_limit=document_limit, batch_size=batch_size, scope=self.document_type, diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 5bf5df7..9a48705 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -95,14 +95,14 @@ def create_taxonomy(self): def retrieve_results( self, sentences: List[str], - freq: str = "3M", + frequency: str = "3M", document_limit: int = 10, batch_size: int = 10, ) -> DataFrame: """Retrieve search results based on the provided sentences and parameters. Args: sentences (List[str]): List of sentences to search for. - freq (str): The frequency of the date ranges. Supported values: + frequency (str): The frequency of the date ranges. Supported values: - 'Y': Yearly intervals. - 'M': Monthly intervals. - 'W': Weekly intervals. @@ -126,7 +126,7 @@ def retrieve_results( fiscal_year=self.fiscal_year, sources=self.sources, rerank_threshold=self.rerank_threshold, - freq=freq, + frequency=frequency, document_limit=document_limit, batch_size=batch_size, ) @@ -345,7 +345,7 @@ def screen_companies( self.notify_observers(f"Searching companies for risk exposure") df_sentences = self.retrieve_results( sentences=risk_summaries, - freq=frequency, + frequency=frequency, document_limit=document_limit, batch_size=batch_size, ) diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 0709611..b390fbe 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -141,7 +141,7 @@ def screen_companies( fiscal_year=self.fiscal_year, sources=self.sources, rerank_threshold=self.rerank_threshold, - freq=frequency, + frequency=frequency, document_limit=document_limit, batch_size=batch_size, current_trace=current_trace, From af697d0002d0e8e9bb174b59b548ce2458dc4297 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 11:16:37 +0200 Subject: [PATCH 11/82] Change build system to properly install package locally Adapt CI pipeline to not trigger with beta versions --- .github/workflows/release.yml | 2 ++ pyproject.toml | 11 ++++------- uv.lock | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3ab2135..cedd9b9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,6 +4,8 @@ on: push: tags: - 'v*' + tags-ignore: + - 'v*-beta*' jobs: release: diff --git a/pyproject.toml b/pyproject.toml index 6c83cb8..53936c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bigdata-research-tools" -version = "0.99.0" +version = "1.0.0-beta-0" description = "Bigdata.com API High-Efficiency Tools at Scale" readme = "README.md" authors = [{ name = "Bigdata.com", email = "support@ravenpack.com" }] @@ -53,9 +53,6 @@ dev = [ "pre-commit>=4.1.0", ] -# Build configuration to include resource files -[tool.setuptools] -packages = {find = {where = ["src"]}} - -[tool.setuptools.package-data] -"bigdata_research_tools" = ["res/*.png", "res/*.jpg", "res/*"] +[build-system] +requires = ["uv_build>=0.8.22,<0.9.0"] +build-backend = "uv_build" \ No newline at end of file diff --git a/uv.lock b/uv.lock index 9c6bee5..7e81d07 100644 --- a/uv.lock +++ b/uv.lock @@ -318,8 +318,8 @@ wheels = [ [[package]] name = "bigdata-research-tools" -version = "0.99.0" -source = { virtual = "." } +version = "1.0.0b0" +source = { editable = "." } dependencies = [ { name = "bigdata-client" }, { name = "graphviz" }, From 8dddce9f45c780a25c11ec29c7dc07908d28e734 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 11:21:50 +0200 Subject: [PATCH 12/82] Fix CI filters and update changelog --- .github/workflows/release.yml | 3 +-- CHANGELOG.md | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cedd9b9..ab0b624 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,8 +4,7 @@ on: push: tags: - 'v*' - tags-ignore: - - 'v*-beta*' + - '!v*-beta*' jobs: release: diff --git a/CHANGELOG.md b/CHANGELOG.md index 22c596c..601cbbe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,9 @@ Preparation for a first stable release. the name for future use. - Renamed all `freq` parameters to `frequency` for better clarity and consistency accross workflows. +### Fixed +- Changed build system to use `uv_build` instead of `setuptools` to avoid issues with package data inclusion. + ## [0.20.1] - 2025-09-16 ### Fix From bebe1f81aa4a19a07a67ecea0114b54450d4c34a Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 11:55:28 +0200 Subject: [PATCH 13/82] Remove old, outdated docs, documentation now lives at https://docs.bigdata.com --- .readthedocs.yaml | 36 - docs/_static/bigdata_dark.svg | 27 - docs/_static/bigdata_light.svg | 26 - docs/bigdata_sdk/index.rst | 9 - docs/changelog_link.md | 2 - docs/conf.py | 131 - docs/cookbooks/index.rst | 24 - docs/cookbooks/miners.rst | 12674 ------------------------------- docs/index.rst | 39 - docs/reference/index.rst | 12 - docs/reference/miners.rst | 10 - docs/reference/screeners.rst | 10 - docs/reference/search.rst | 7 - docs/reference/themes.rst | 13 - 14 files changed, 13020 deletions(-) delete mode 100644 .readthedocs.yaml delete mode 100644 docs/_static/bigdata_dark.svg delete mode 100644 docs/_static/bigdata_light.svg delete mode 100644 docs/bigdata_sdk/index.rst delete mode 100644 docs/changelog_link.md delete mode 100644 docs/conf.py delete mode 100644 docs/cookbooks/index.rst delete mode 100644 docs/cookbooks/miners.rst delete mode 100644 docs/index.rst delete mode 100644 docs/reference/index.rst delete mode 100644 docs/reference/miners.rst delete mode 100644 docs/reference/screeners.rst delete mode 100644 docs/reference/search.rst delete mode 100644 docs/reference/themes.rst diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 41ffbfc..0000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# .readthedocs.yaml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Build offline documentation -formats: - - htmlzip - -# Set the OS, Python version and other tools you might need -build: - os: ubuntu-22.04 - tools: - python: "3.11" - jobs: - post_create_environment: - # Install uv - - pip install uv - - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH - - uv sync --extra docs - - uv pip freeze > docs/requirements.txt - # Tell poetry to not use a virtual environment - post_install: - # Install dependencies with 'docs' dependency group - # - python -m poetry install --with docs - # Just for debugging - - uv run make create-docs - -# Build documentation in the "docs/" directory with Sphinx -sphinx: - configuration: docs/conf.py -python: - install: - - requirements: docs/requirements.txt diff --git a/docs/_static/bigdata_dark.svg b/docs/_static/bigdata_dark.svg deleted file mode 100644 index 72b58a4..0000000 --- a/docs/_static/bigdata_dark.svg +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/_static/bigdata_light.svg b/docs/_static/bigdata_light.svg deleted file mode 100644 index 64436a5..0000000 --- a/docs/_static/bigdata_light.svg +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/bigdata_sdk/index.rst b/docs/bigdata_sdk/index.rst deleted file mode 100644 index 9c31446..0000000 --- a/docs/bigdata_sdk/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _bigdata_sdk: - -Bigdata SDK -=========== -.. raw:: html - - - -If you are not redirected automatically, please click [here](https://sdk.bigdata.com/en/latest/). \ No newline at end of file diff --git a/docs/changelog_link.md b/docs/changelog_link.md deleted file mode 100644 index 8261b35..0000000 --- a/docs/changelog_link.md +++ /dev/null @@ -1,2 +0,0 @@ -```{include} ../CHANGELOG.md -``` \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 7dd2bda..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,131 +0,0 @@ -import os -import shutil -import sys -from pathlib import Path - -import sphinx - -# import sphinxcontrib.spelling - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "Bigdata Research Tools" -copyright = "2024, RavenPack" -author = "RavenPack" - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -templates_path = ["_templates"] -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -# html_theme = "alabaster" -html_theme = "furo" -html_static_path = ["_static"] -# Logo not needed here for furo. -# html_logo = "_static/bigdata.svg" -html_theme_options = { - "light_logo": "bigdata_light.svg", - "dark_logo": "bigdata_dark.svg", - "sidebar_hide_name": True, - "light_css_variables": { - "color-brand-primary": "#0387FE", - "color-brand-content": "#0387FE", - }, - "dark_css_variables": { - "color-brand-primary": "#00A9FE", - "color-brand-content": "#00A9FE", - }, -} -pygments_style = "sphinx" -pygments_dark_style = "monokai" - - -PROJECT_DIRECTORY = Path(__file__).parent.parent / "src" - -sys.path.insert(0, str(PROJECT_DIRECTORY)) - -# version = '.'.join(__version_info__[:2]) -# release = __version__ - -# -- General configuration --------------------------------------------------- -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx.ext.mathjax", - # Disabling autosectionlabel because it does strange things in markdown documents - # "sphinx.ext.autosectionlabel", - # 'nbsphinx', - "sphinx.ext.autosummary", - "sphinx.ext.intersphinx", - "sphinxcontrib.autodoc_pydantic", - "sphinxcontrib.spelling", - "myst_parser", - "sphinx_new_tab_link", # Extension that forces every link to open in a new tab. This is a req for the current ones. - "sphinx_copybutton", # Extension to provide a copy button in the code samples - "sphinx_reredirects", - "sphinx_togglebutton", -] - -# sphinx_reredirects -redirects = {"getting_started/overview.html": "./index.html"} - - -# suppress_warnings = ['autosectionlabel.*', # nbsphinx and austosectionlabel do not play well together -# 'app.add_node', # using multiple builders in custom Sphinx objects throws a bunch of these -# 'app.add_directive', -# 'app.add_role',] - -suppress_warnings = [ - "myst.xref_missing" # This warning happens whenever links referencing sections are used in rst files. But the links work fine. -] -# -- References (BibTex) ----------------------------------------------------- -bibtex_bibfiles = ["wecopttool_refs.bib"] -bibtex_encoding = "utf-8-sig" -bibtex_default_style = "alpha" -bibtex_reference_style = "label" -bibtex_foot_reference_style = "foot" - - -# -- API documentation ------------------------------------------------------- -napoleon_numpy_docstring = True -napoleon_include_init_with_doc = True -add_module_names = False -html_show_sourcelink = False -autodoc_typehints = "description" -autodoc_type_aliases = { - "ArrayLike": "ArrayLike", - "FloatOrArray": "FloatOrArray", - "TStateFunction": "StateFunction", - "TWEC": "WEC", - "TPTO": "PTO", - "TEFF": "Callable[[ArrayLike, ArrayLike], ArrayLike]", - "TForceDict": "dict[str, StateFunction]", - "TIForceDict": "Mapping[str, StateFunction]", - "DataArray": "DataArray", - "Dataset": "Dataset", - "Figure": "Figure", - "Axes": "Axes", -} -autodoc_class_signature = "separated" -highlight_language = "python3" -rst_prolog = """ -.. role:: python(code) - :language: python -""" -autodoc_default_options = {"exclude-members": "__new__"} -autosummary_ignore_module_all = False -autosummary_imported_members = True -autodoc_pydantic_model_show_config_summary = False - -spelling_warning = True -spelling_word_list_filename = "spelling_wordlist.txt" -copybutton_exclude = ".linenos, .gp, .go" -copybutton_prompt_text = "(bigdata_venv) $" -copybutton_prompt_text = ">>> " diff --git a/docs/cookbooks/index.rst b/docs/cookbooks/index.rst deleted file mode 100644 index 98fa818..0000000 --- a/docs/cookbooks/index.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. _cookbooks: - -Cookbooks -========= - -This section provides practical cookbooks—concise, example-driven guides that demonstrate how to use functions and classes from the bigdata-research-tools package to build your own -research workflows. - -Each cookbook walks through a different workflow, showing how to work with the SDK’s building blocks—its classes, methods, and functions—to process data, interpret the output, and apply the results in a research context. - -Narrative Miner ---------------- -Researchers, analysts, and decision-makers face the challenge of monitoring how key narratives evolve over time in an overwhelming sea of text data. The Narrative Miner provides a systematic approach to: - -* Define a set of narratives you wish to track -* Search and label transcripts, news, and filings data using the pre-defined narratives -* Compare narrative evolution across the different document types and time periods - -Whether you're conducting academic research, market intelligence, policy analysis, or strategic planning, the Narrative Miner offers tools to transform unstructured text into structured insights about how stories evolve and spread. - -.. toctree:: - :maxdepth: 1 - - miners.rst \ No newline at end of file diff --git a/docs/cookbooks/miners.rst b/docs/cookbooks/miners.rst deleted file mode 100644 index ea1fad8..0000000 --- a/docs/cookbooks/miners.rst +++ /dev/null @@ -1,12674 +0,0 @@ -Narrative Miners: Uncover the Stories That Drive Markets -======================================================== - -Introduction ------------- - -This notebook demonstrates how advanced narrative mining reveals -evolving market stories across multiple document types. We will track -the “AI Bubble Concerns” narrative as it emerges and evolves across -news, earnings calls, and regulatory filings – highlighting the -difference between public discourse and corporate communications. - -The bigdata-research-tools package provides a specialised class, -NarrativeMiner, for narrative mining. We can specify the document_type -parameter to be DocumentType.NEWS, DocumentType.TRANSCRIPTS, -DocumentType.FILINGS to be able to - -- Analyze web-based news content -- Examine earnings call and event transcripts -- Explore SEC Filings from EDGAR - -Each Narrative Miner instance follows the same workflow: - -1. Define narrative labels which encompass a theme -2. Retrieve content using BigData’s search capabilities -3. Label content with LLMs to identify narrative matches -4. Analyze the results to reveal patterns and insights - -Setup and Imports ------------------ - -Below is the Python code required for setting up our environment and -importing necessary libraries. - -.. code:: python - - from IPython.display import display, HTML, IFrame - import pandas as pd - import matplotlib.pyplot as plt - import seaborn as sns - import numpy as np - from datetime import datetime - import os - from scipy.ndimage import gaussian_filter1d - import plotly - import plotly.graph_objects as go - import warnings - from IPython.display import Image, display - import plotly.io as pio - pio.renderers.default = 'notebook' - plotly.offline.init_notebook_mode() - - from bigdata_client import Bigdata - from bigdata_client.daterange import RollingDateRange - from bigdata_research_tools.workflows import NarrativeMiner - from bigdata_client.models.sources import Source - from bigdata_client.models.search import DocumentType - - - -.. raw:: html - - - - - - -Define Output Paths -------------------- - -We define the output paths for our narrative mining results. - -.. code:: python - - # Define output file paths for our results - output_dir = "output" - os.makedirs(output_dir, exist_ok=True) - - news_results_path = f"{output_dir}/ai_bubble_news.xlsx" - transcripts_results_path = f"{output_dir}/ai_bubble_transcripts.xlsx" - filings_results_path = f"{output_dir}/ai_bubble_filings.xlsx" - visualization_path = f"{output_dir}/ai_bubble_narratives.html" - -Load Environment Variables --------------------------- - -Make sure you have added your BigData API credentials to a .env file. -Then load them as follows: - -.. code:: python - - # Load environment variables for BigData credentials - from dotenv import load_dotenv - load_dotenv('.env') - - BIGDATA_USERNAME = os.getenv("BIGDATA_USERNAME") - BIGDATA_PASSWORD = os.getenv("BIGDATA_PASSWORD") - bigdata = Bigdata(BIGDATA_USERNAME, BIGDATA_PASSWORD) - -Define Narratives ------------------ - -We define specific narratives related to the AI bubble concerns: - -.. code:: python - - ai_bubble_narratives = [ - "Tech valuations have detached from fundamental earnings potential", - "AI investments show classic signs of irrational exuberance", - "Market is positioning AI as revolutionary without proven ROI", - "Current AI investments may not generate predicted financial returns", - "Tech CEOs acknowledge AI implementation challenges amid high expectations", - "Analysts are questioning the timeline for AI-driven profits", - "Companies are spending billions on unproven AI technology", - "AI infrastructure costs are rising but revenue gains remain uncertain", - "Venture capital is flooding AI startups at unsustainable valuations", - "Regulatory concerns could derail AI market growth projections", - "Public discourse about AI capabilities exceeds technical realities", - "AI talent acquisition costs have created an unsustainable bubble", - "Corporate executives privately express concerns about AI ROI timelines", - "AI market projections rely on aggressive and unproven assumptions", - "Industry veterans drawing parallels to previous tech bubbles" - ] - -Configure the Narrative Miners ------------------------------- - -Create narrative miners for each document type. In this example, we -select MT Newswires as the news source. Note that you should curate your -own set of trusted sources - -.. code:: python - - # Common parameters across narrative miners - common_params = { - "narrative_sentences": ai_bubble_narratives, - "llm_model": "openai::gpt-4o-mini", - "start_date": "2023-03-01", - "end_date": "2025-03-28", - "rerank_threshold": 0.7, - } - - # Choose CNBC as a news source - tech_news_sources = bigdata.knowledge_graph.find_sources("MT Newswires") - tech_news_ids = [source.id for source in tech_news_sources if "MT Newswires" == source.name] - - # Create narrative miners for each document type - news_miner = NarrativeMiner(sources= tech_news_ids, document_type=DocumentType.NEWS, **common_params, fiscal_year=None) - transcripts_miner = NarrativeMiner(**common_params, sources=None, document_type=DocumentType.TRANSCRIPTS, fiscal_year=2024) - filings_miner = NarrativeMiner(**common_params, sources=None, document_type=DocumentType.FILINGS, fiscal_year=2024) - -Run Narrative Mining Across Sources ------------------------------------ - -Execute the narrative mining processes for news, earnings call -transcripts, and SEC filings: - -.. code:: python - - # Run Narrative Mining Across Sources - print("Mining news narratives...") - news_results = news_miner.mine_narratives( - document_limit=100, - frequency='W', # Weekly frequency - export_path=news_results_path - ) - - print("Mining earnings call transcripts...") - transcripts_results = transcripts_miner.mine_narratives( - document_limit=100, - frequency='M', # Monthly frequency (earnings are quarterly) - export_path=transcripts_results_path - ) - - print("Mining SEC filings...") - filings_results = filings_miner.mine_narratives( - document_limit=100, - frequency='M', # Monthly frequency (filings are quarterly) - export_path=filings_results_path - ) - -Load and Process Results ------------------------- - -Load the exported Excel files, clean the data, and display a summary. - -.. toggle:: Load results function - - .. code:: python - - # Load and Process Results - def load_results(file_path, source_type): - """ - Load and clean narrative mining results. - - Parameters: - file_path (str): Path to the Excel file containing mining results. - source_type (str): Type of data source (News, Earnings Call, SEC Filing). - - Returns: - pd.DataFrame: Cleaned dataframe with source type label. - """ - df = pd.read_excel(file_path, header=1).reset_index(drop=True) - df = df.drop(columns=[col for col in df.columns if 'Unnamed' in str(col)]) - df['Date'] = pd.to_datetime(df['Date']) - df['Source_Type'] = source_type # Add source type column - print(f"Loaded {len(df)} narrative records from {source_type}") - return df - - # Load results from all three document types with labeling - news_df = load_results(news_results_path, "News Media") - transcripts_df = load_results(transcripts_results_path, "Earnings Calls") - filings_df = load_results(filings_results_path, "SEC Filings") - - # Create a summary of the dataset sizes - source_summary = pd.DataFrame({ - 'Source Type': ['News Media', 'Earnings Calls', 'SEC Filings'], - 'Record Count': [len(news_df), len(transcripts_df), len(filings_df)], - 'Date Range': [ - f"{news_df['Date'].min().strftime('%Y-%m-%d')} to {news_df['Date'].max().strftime('%Y-%m-%d')}", - f"{transcripts_df['Date'].min().strftime('%Y-%m-%d')} to {transcripts_df['Date'].max().strftime('%Y-%m-%d')}", - f"{filings_df['Date'].min().strftime('%Y-%m-%d')} to {filings_df['Date'].max().strftime('%Y-%m-%d')}" - ], - 'Unique Narratives': [ - news_df['Label'].nunique(), - transcripts_df['Label'].nunique(), - filings_df['Label'].nunique() - ] - }) - - # Display the summary table - display(source_summary) - - # Display samples from each source - print("\n======= SAMPLE NEWS NARRATIVES =======") - display(news_df[['Date', 'Headline', 'Label', 'Quote']].head(3)) - - print("\n======= SAMPLE EARNINGS CALL NARRATIVES =======") - display(transcripts_df[['Date', 'Headline', 'Label', 'Quote']].head(3)) - - print("\n======= SAMPLE SEC FILING NARRATIVES =======") - display(filings_df[['Date', 'Headline', 'Label', 'Quote']].head(3)) - - -.. parsed-literal:: - - Loaded 64 narrative records from News Media - Loaded 641 narrative records from Earnings Calls - Loaded 744 narrative records from SEC Filings - - - -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Source TypeRecord CountDate RangeUnique Narratives
0News Media642023-03-07 to 2025-01-1411
1Earnings Calls6412023-04-27 to 2025-03-2615
2SEC Filings7442023-05-05 to 2025-03-2813
-
- - -.. parsed-literal:: - - - ======= SAMPLE NEWS NARRATIVES ======= - - - -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DateHeadlineLabelQuote
02023-03-07Salesforce Discloses Generative Artificial Int...Companies are spending billions on unproven AI...Microsoft (MSFT) has been "leading the pack" t...
12023-04-24Equity Markets Struggle for Direction Ahead of...Tech valuations have detached from fundamental..."Stretched valuations, too-optimistic consensu...
22023-04-24Appetite for Equities Mixed Ahead of Mega-Cap ...Tech valuations have detached from fundamental..."Stretched valuations, too-optimistic consensu...
-
- - -.. parsed-literal:: - - - ======= SAMPLE EARNINGS CALL NARRATIVES ======= - - - -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DateHeadlineLabelQuote
02023-04-27Amazon.com Inc: Q1 2023 Earnings Call on Apr 2...Corporate executives privately express concern...Our recent Announcement on large language mode...
12023-06-01Elastic N.V.: Q4 2023 Earnings Call on Jun 1, ...Public discourse about AI capabilities exceeds...And if you look back a few years, you guys bui...
22023-06-01Elastic N.V.: Q4 2023 Earnings Call on Jun 1, ...Market is positioning AI as revolutionary with...For example, In enterprise search, we expanded...
-
- - -.. parsed-literal:: - - - ======= SAMPLE SEC FILING NARRATIVES ======= - - - -.. raw:: html - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DateHeadlineLabelQuote
02023-05-05MARCHEX INC files FORM 10-Q for Q1, FY 2023 on...Current AI investments may not generate predic...We operate primarily in domestic markets. Inno...
12023-05-05TRUPANION, INC. files FORM 10-Q for Q1, FY 202...Tech CEOs acknowledge AI implementation challe...In addition, we use artificial intelligence an...
22023-05-08Viant Technology Inc. files FORM 10-Q for Q1, ...Current AI investments may not generate predic...involve educating our customers about the use,...
-
- - -Narrative Analysis Functions ----------------------------- - -Define functions to prepare the narrative time series data and calculate -overall source scores. - -.. toggle:: Narrative Analysis Functions - - .. code:: python - - # Narrative Analysis Functions - - def prepare_narrative_data(df, frequency='W'): - """ - Prepare narrative data for visualization by creating time series of narrative counts, - converting to z-scores, and applying smoothing. - """ - pivot_df = pd.pivot_table(df, index='Date', columns='Label', aggfunc='size', fill_value=0) - resampled_df = pivot_df.resample(frequency).sum() - - # Calculate z-scores for each narrative - zscore_df = pd.DataFrame() - for column in resampled_df.columns: - mean = resampled_df[column].mean() - std = resampled_df[column].std() - if std == 0: - zscore_df[column] = 0 - else: - zscore_df[column] = (resampled_df[column] - mean) / std - - # Apply smoothing using Gaussian filter - smoothed_df = pd.DataFrame(index=zscore_df.index) - for column in zscore_df.columns: - smoothed_data = gaussian_filter1d(zscore_df[column].fillna(0).values, sigma=2) - smoothed_df[column] = smoothed_data - return smoothed_df - - def calculate_source_scores(df): - """ - Calculate overall narrative scores (z-scores) across all narratives by source. - """ - date_counts = df.groupby('Date').size() - weekly_counts = date_counts.resample('W').sum().fillna(0) - mean = weekly_counts.mean() - std = weekly_counts.std() - if std == 0: - zscore = weekly_counts * 0 - else: - zscore = (weekly_counts - mean) / std - smoothed = gaussian_filter1d(zscore.fillna(0).values, sigma=2) - return pd.Series(smoothed, index=zscore.index) - -Creating Narrative Visualizations ---------------------------------- - -Define functions to create a comparative visualization across sources -and a narrative breakdown chart for news media. - -.. code:: python - - def visualize_cross_source_narratives(): - """Create a comparative visualization of narrative prevalence across sources with fixed annotation arrows""" - # Prepare data for each source - news_score = calculate_source_scores(news_df) - transcript_score = calculate_source_scores(transcripts_df) - filing_score = calculate_source_scores(filings_df) - - # Align indices across all sources - all_dates = sorted(set(news_score.index) | - set(transcript_score.index) | - set(filing_score.index)) - - # Create dataframe with aligned dates - comparison_df = pd.DataFrame(index=all_dates) - comparison_df['News Media'] = pd.Series(news_score) - comparison_df['Earnings Calls'] = pd.Series(transcript_score) - comparison_df['SEC Filings'] = pd.Series(filing_score) - comparison_df = comparison_df.sort_index().fillna(method='ffill').fillna(0) - - # Create visualization - fig = go.Figure() - - # Add traces for each source - source_colors = { - 'News Media': '#FF6B6B', - 'Earnings Calls': '#4ECDC4', - 'SEC Filings': '#6A0572' - } - - for source, color in source_colors.items(): - fig.add_trace( - go.Scatter( - x=comparison_df.index, - y=comparison_df[source], - mode='lines', - name=source, - line=dict(width=3, color=color), - hovertemplate=( - f"{source}
" + - "Date: %{x|%B %d, %Y}
" + - "Intensity: %{y:.2f}" - ) - ) - ) - - # Find important points for annotation placement - peak_news = comparison_df['News Media'].idxmax() - peak_earnings = comparison_df['Earnings Calls'].idxmax() - peak_filings = comparison_df['SEC Filings'].idxmax() - - # Create annotations with fixed arrows - annotations = [ - # Peak news annotation - dict( - x=peak_news, - y=comparison_df.loc[peak_news, 'News Media'], - text="Peak news coverage
of AI bubble concerns", - showarrow=True, - arrowhead=2, - ax=10, - ay=-30, - bgcolor="rgba(255, 255, 255, 0.8)", - bordercolor="#FF6B6B", - borderwidth=2, - font=dict(size=10), - xanchor="right" - ), - - # Earnings calls annotation - dict( - x=peak_earnings, - y=comparison_df.loc[peak_earnings, 'Earnings Calls'], - text="Executives address
bubble concerns on
earnings calls", - showarrow=True, - arrowhead=2, - ax=90, - ay=-20, - bgcolor="rgba(255, 255, 255, 0.8)", - bordercolor="#4ECDC4", - borderwidth=2, - font=dict(size=10), - xanchor="right" - ) - ] - - # Customize the layout - fig.update_layout( - title={ - 'text': 'AI Bubble Narrative: Media vs. Corporate Communications', - 'y': 0.95, - 'x': 0.5, - 'xanchor': 'center', - 'yanchor': 'top', - 'font': dict(size=18, color='#1f1f1f') - }, - xaxis=dict( - title='', - gridcolor='rgba(100, 100, 100, 0.2)', - tickangle=-45, - tickformat='%b %Y', - tickfont=dict(color='#1f1f1f', size=10), - showgrid=True - ), - yaxis=dict( - title=dict(text='Narrative Intensity (z-score)', font=dict(color='#1f1f1f')), - tickfont=dict(color='#1f1f1f'), - gridcolor='rgba(100, 100, 100, 0.2)', - zerolinecolor='rgba(0, 0, 0, 0.4)', - range=[-1.5, 3.5], - automargin=True - ), - hovermode='closest', - legend=dict( - orientation='h', - yanchor='top', - y=-0.2, - xanchor='center', - x=0.5, - font=dict(size=12, color='#1f1f1f'), - bgcolor='rgba(255,255,255,0.8)' - ), - annotations=annotations, - margin=dict(l=50, r=50, t=70, b=120), - template='plotly', - plot_bgcolor='rgba(255,255,255,1)', - paper_bgcolor='rgba(255,255,255,1)', - height=600, - showlegend=True - ) - - # Add horizontal reference line - fig.add_shape( - type='line', - x0=comparison_df.index.min(), - y0=0, - x1=comparison_df.index.max(), - y1=0, - line=dict( - color='#666666', - width=1, - dash='dash' - ) - ) - - # Add time period selectors - fig.update_xaxes( - rangeselector=dict( - buttons=list([ - dict(count=1, label="1m", step="month", stepmode="backward"), - dict(count=3, label="3m", step="month", stepmode="backward"), - dict(count=6, label="6m", step="month", stepmode="backward"), - dict(step="all") - ]), - bgcolor='rgba(150,150,150,0.2)' - ) - ) - - fig.add_annotation( - x=peak_filings, # X position at peak date - y=comparison_df.loc[peak_filings, 'SEC Filings'], - text="Peak mentions in
SEC filings", - showarrow=True, - arrowhead=2, - ax=0, - ay=60, - bgcolor="rgba(255, 255, 255, 0.8)", - bordercolor="#6A0572", - borderwidth=2, - font=dict(size=10), - xanchor="center", - yanchor="top" # Anchor at top of text box - ) - - return fig - - def visualize_news_narrative_breakdown(): - """Create a stacked area chart showing the breakdown of specific narratives in news with unique colors""" - # Prepare news narrative data - news_narratives = prepare_narrative_data(news_df, frequency='W') - - # Filter to only include the top narratives - max_values = news_narratives.max() - top_narratives = max_values.nlargest(10).index.tolist() - filtered_narratives = news_narratives[top_narratives] - - # Create an expanded color palette with distinct colors for up to 10 narratives - # Each color is uniquely chosen to have good contrast with adjacent colors - distinct_colors = [ - '#FF6B6B', # Red - '#4ECDC4', # Teal - '#FFD166', # Gold - '#6A0572', # Purple - '#2D7DD2', # Blue - '#97CC04', # Green - '#F45D01', # Orange - '#8367C7', # Lavender - '#C44536', # Brick - '#2A6041' # Forest green - ] - - # Create figure - fig = go.Figure() - - # Add traces for each narrative with its unique color - for i, column in enumerate(filtered_narratives.columns): - color = distinct_colors[i] # Each narrative gets its own unique color - fig.add_trace( - go.Scatter( - x=filtered_narratives.index, - y=filtered_narratives[column], - mode='lines', - name=column, - line=dict(width=0.5, color=color), - stackgroup='one', - fillcolor=color, - hovertemplate=( - "%{fullData.name}
" + - "Date: %{x|%B %d, %Y}
" + - "Z-Score: %{y:.2f}" - ) - ) - ) - - # Customize layout - fig.update_layout( - title={ - 'text': 'Breakdown of AI Bubble Narratives in News Media', - 'y': 0.95, - 'x': 0.5, - 'xanchor': 'center', - 'yanchor': 'top', - 'font': dict(size=18, color='#1f1f1f') - }, - xaxis=dict( - title='', - gridcolor='rgba(100, 100, 100, 0.2)', - tickangle=-45, - tickformat='%b %Y', - tickfont=dict(color='#1f1f1f', size=11), - showgrid=True - ), - yaxis=dict( - title=dict(text='Narrative Prevalence (z-score)', font=dict(color='#1f1f1f')), - tickfont=dict(color='#1f1f1f'), - gridcolor='rgba(100, 100, 100, 0.2)', - zerolinecolor='rgba(0, 0, 0, 0.4)' - ), - hovermode='closest', - # Use horizontal legend below the chart with improved formatting - legend=dict( - orientation='h', # Horizontal orientation for better space usage - yanchor='top', - y=-0.25, # Further below the chart - xanchor='center', - x=0.5, # Centered - font=dict(size=13), # Larger font size - bgcolor='rgba(255,255,255,0.95)', # More opaque background - bordercolor='rgba(100,100,100,0.5)', # More visible border - borderwidth=1, - itemsizing='constant', # Fixed size markers - itemwidth=40, # Wider legend items - traceorder='normal' - ), - margin=dict(l=50, r=50, t=80, b=280), # Increased bottom margin for legend - template='plotly', - plot_bgcolor='rgba(255,255,255,1)', - paper_bgcolor='rgba(255,255,255,1)', - height=750, # Increased height to accommodate legend - showlegend=True - ) - - # Add time period selectors - fig.update_xaxes( - rangeselector=dict( - buttons=list([ - dict(count=1, label="1m", step="month", stepmode="backward"), - dict(count=3, label="3m", step="month", stepmode="backward"), - dict(count=6, label="6m", step="month", stepmode="backward"), - dict(step="all", label="All") - ]), - bgcolor='rgba(150,150,150,0.2)' - ) - ) - - return fig - -Create and Save Visualizations ------------------------------- - -Generate and view the visualizations: - -.. code:: python - - # Create and Save Visualizations - warnings.filterwarnings("ignore", message=".*'method'.*", category=FutureWarning) - - # Create the comparative source visualization - fig = visualize_cross_source_narratives() - fig.show() - - - # Create the narrative breakdown visualization - fig = visualize_news_narrative_breakdown() - fig.show() - - - -.. raw:: html - -
-
- - - -.. raw:: html - -
-
- - -Extract and Print Key Insights ------------------------------- - -Extract key insights from the narrative mining data and display them. - -.. toggle:: Extract insights - - .. code:: python - - # Key Insights from Narrative Mining - def extract_narrative_insights(): - """ - Extract key insights from our narrative mining data. - """ - news_score = calculate_source_scores(news_df) - transcript_score = calculate_source_scores(transcripts_df) - filing_score = calculate_source_scores(filings_df) - - peak_news_month = news_score.idxmax().strftime('%B %Y') - peak_transcript_month = transcript_score.idxmax().strftime('%B %Y') - peak_filing_month = filing_score.idxmax().strftime('%B %Y') - - news_narrative_counts = news_df['Label'].value_counts() - transcript_narrative_counts = transcripts_df['Label'].value_counts() - filing_narrative_counts = filings_df['Label'].value_counts() - - top_news_narrative = news_narrative_counts.index[0] - top_transcript_narrative = transcript_narrative_counts.index[0] - top_filing_narrative = filing_narrative_counts.index[0] - - total_news_mentions = len(news_df) - total_transcript_mentions = len(transcripts_df) - total_filing_mentions = len(filings_df) - - news_peaks = pd.Series(news_score).nlargest(3) - transcript_peaks = pd.Series(transcript_score).nlargest(3) - filing_peaks = pd.Series(filing_score).nlargest(3) - - avg_lag_days = [] - for news_date in news_peaks.index: - closest_filing_date = min(filing_peaks.index, key=lambda x: abs((x - news_date).days)) - lag_days = (closest_filing_date - news_date).days - avg_lag_days.append(lag_days) - avg_lag = np.mean(avg_lag_days) - - return { - "peak_news_month": peak_news_month, - "peak_transcript_month": peak_transcript_month, - "peak_filing_month": peak_filing_month, - "top_news_narrative": top_news_narrative, - "top_transcript_narrative": top_transcript_narrative, - "top_filing_narrative": top_filing_narrative, - "total_news_mentions": total_news_mentions, - "total_transcript_mentions": total_transcript_mentions, - "total_filing_mentions": total_filing_mentions, - "avg_lag_days": int(avg_lag) - } - - insights = extract_narrative_insights() - - print("## AI Bubble Narrative Key Insights\n") - print(f"Peak month for news coverage: {insights['peak_news_month']}") - print(f"Peak month for earnings call mentions: {insights['peak_transcript_month']}") - print(f"Peak month for regulatory filing mentions: {insights['peak_filing_month']}") - print(f"\nDominant narrative in news: \"{insights['top_news_narrative']}\"") - print(f"Dominant narrative in earnings calls: \"{insights['top_transcript_narrative']}\"") - print(f"Dominant narrative in regulatory filings: \"{insights['top_filing_narrative']}\"") - print(f"\nTotal narrative mentions in news: {insights['total_news_mentions']}") - print(f"Total mentions in earnings calls: {insights['total_transcript_mentions']}") - print(f"Total mentions in regulatory filings: {insights['total_filing_mentions']}") - print(f"\nAverage lag between peaks in news coverage peaks and SEC filings: {insights['avg_lag_days']} days") - - -.. parsed-literal:: - - ## AI Bubble Narrative Key Insights - - Peak month for news coverage: November 2023 - Peak month for earnings call mentions: December 2023 - Peak month for regulatory filing mentions: August 2024 - - Dominant narrative in news: "Analysts are questioning the timeline for AI-driven profits" - Dominant narrative in earnings calls: "Tech CEOs acknowledge AI implementation challenges amid high expectations" - Dominant narrative in regulatory filings: "Current AI investments may not generate predicted financial returns" - - Total narrative mentions in news: 64 - Total mentions in earnings calls: 641 - Total mentions in regulatory filings: 744 - - Average lag between peaks in news coverage peaks and SEC filings: 280 days - - -Conclusion ----------- - -The Narrative Miners reveal important patterns in how the AI bubble -narrative evolved across information sources: - -Timing and Intensity Variations: - -- News media (red line) shows a major spike in AI bubble concerns - around October 2023, reaching the highest peak on the chart, followed - by mostly below-average coverage until a smaller resurgence in late - 2024. -- Earnings calls (teal line) show cyclical attention to bubble - concerns, with executives addressing the topic most prominently in - early 2024, followed by diminishing attention through 2025. -- SEC filings (purple line) demonstrate the most volatile pattern, with - multiple significant spikes in early/mid-2024 and again in late - 2024/early 2025, suggesting ongoing regulatory concern. - -Narrative Progression: - -- Media coverage led the initial bubble narrative in late 2023, - potentially triggering the corporate response visible in early 2024 - earnings calls. -- Corporate executives’ discussions peaked early but diminished over - time, while SEC filing mentions increased in frequency and intensity - throughout 2024-2025. -- By early 2025, SEC filings show the strongest ongoing concerns about - an AI bubble, while both media coverage and earnings call mentions - have declined significantly. - -Enjoy exploring and extending your narrative analysis! diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 8831aa3..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -Bigdata Research Tools Documentation -====================================== - -👋 Welcome to the documentation for Bigdata Research Tools! - -Bigdata Research Tools is a Python library designed to automate research workflows. It provides plug-and-play functions for building customized, chain-of-thought research processes with minimal effort. This package includes modular, ready-to-use components that integrate seamlessly with the Bigdata API, enabling efficient data analysis and insight generation. - -Python versions -^^^^^^^^^^^^^^^ -Bigdata Research Tools supports Python 3.9 or higher. - -Cookbooks -^^^^^^^^^ -The Cookbooks section provides practical examples and tutorials to help you get the most out of Bigdata Research Tools. - -API Reference Guide -^^^^^^^^^^^^^^^^^^^ - -Refer to :ref:`api_research_tools` for detailed information on the library's modules, -classes, and functions. - - -**Happy coding!** 🚀 - ----- - -Contents --------- - -.. toctree:: - :maxdepth: 2 - - - cookbooks/index.rst - reference/index.rst - changelog_link.md - bigdata_sdk/index.rst - Terms of Service - Privacy Policy diff --git a/docs/reference/index.rst b/docs/reference/index.rst deleted file mode 100644 index 55c133b..0000000 --- a/docs/reference/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -.. _api_research_tools: - -API Reference -============= - -.. toctree:: - :maxdepth: 2 - - miners.rst - screeners.rst - themes.rst - search.rst diff --git a/docs/reference/miners.rst b/docs/reference/miners.rst deleted file mode 100644 index ab5b302..0000000 --- a/docs/reference/miners.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _miners: - -Miners (Experimental) -======================= - - -.. autoclass:: bigdata_research_tools.workflows.NarrativeMiner - :members: - :exclude-members: __init__ - :undoc-members: \ No newline at end of file diff --git a/docs/reference/screeners.rst b/docs/reference/screeners.rst deleted file mode 100644 index 80748bc..0000000 --- a/docs/reference/screeners.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. _screeners: - -Screeners (Experimental) -======================== - - -.. autoclass:: bigdata_research_tools.workflows.ThematicScreener - :members: - :exclude-members: __init__ - :undoc-members: diff --git a/docs/reference/search.rst b/docs/reference/search.rst deleted file mode 100644 index 44375ef..0000000 --- a/docs/reference/search.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _search_2: - -Search -======================= - - -.. autofunction:: bigdata_research_tools.search.run_search diff --git a/docs/reference/themes.rst b/docs/reference/themes.rst deleted file mode 100644 index ad831a5..0000000 --- a/docs/reference/themes.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. _themes: - -Themes (Experimental) -======================= - - -.. autoclass:: bigdata_research_tools.themes.ThemeTree - :members: - :exclude-members: __init__, as_string - -.. autofunction:: bigdata_research_tools.themes.generate_theme_tree - -.. autofunction:: bigdata_research_tools.themes.stringify_label_summaries From 3750f8e40a03601b599e8520a32948fa931da707 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 12:08:47 +0200 Subject: [PATCH 14/82] Add formater, linter and type checker --- Makefile | 30 +-- README.md | 9 + src/bigdata_research_tools/__init__.py | 2 +- src/bigdata_research_tools/labeler/labeler.py | 1 - .../labeler/narrative_labeler.py | 4 +- .../labeler/risk_labeler.py | 67 +++--- .../labeler/screener_labeler.py | 13 +- src/bigdata_research_tools/llm/azure.py | 4 +- src/bigdata_research_tools/llm/bedrock.py | 51 +++-- src/bigdata_research_tools/llm/utils.py | 2 +- .../portfolio/__init__.py | 12 +- .../portfolio/motivation.py | 147 ++++++------- .../portfolio/portfolio_constructor.py | 186 +++++++++-------- src/bigdata_research_tools/prompts/labeler.py | 7 +- .../prompts/motivation.py | 21 +- src/bigdata_research_tools/prompts/risk.py | 2 +- src/bigdata_research_tools/prompts/themes.py | 4 +- src/bigdata_research_tools/search/__init__.py | 6 +- .../search/narrative_search.py | 10 +- .../search/query_builder.py | 195 ++++++++++-------- .../search/screener_search.py | 21 +- src/bigdata_research_tools/search/search.py | 10 +- .../search/search_utils.py | 17 +- src/bigdata_research_tools/tracing.py | 1 + src/bigdata_research_tools/utils/distance.py | 21 +- src/bigdata_research_tools/utils/observer.py | 11 +- .../visuals/risk_visuals.py | 4 +- .../visuals/thematic_visuals.py | 6 +- src/bigdata_research_tools/watchlists.py | 24 ++- src/bigdata_research_tools/workflows/base.py | 4 +- .../workflows/narrative_miner.py | 17 +- .../workflows/risk_analyzer.py | 45 ++-- .../workflows/thematic_screener.py | 59 ++++-- src/bigdata_research_tools/workflows/utils.py | 11 +- tests/test_llm/test_base.py | 8 +- tests/test_llm/test_bedrock.py | 34 ++- tests/test_llm/test_init.py | 1 + tests/test_llm/test_openai.py | 17 +- tests/test_llm/test_utils.py | 4 +- tests/test_utils/test_distance.py | 44 ++-- tests/test_utils/test_observer.py | 23 ++- 41 files changed, 677 insertions(+), 478 deletions(-) diff --git a/Makefile b/Makefile index 9fea510..9601be8 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,16 @@ -# Minimal makefile for Sphinx documentation -# +.PHONY: tests lint format -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = docs -BUILDDIR = build +tests: + @uv run -m pytest --cov --cov-config=.coveragerc --cov-report term --cov-report xml:./coverage-reports/coverage.xml -s tests/* -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +lint: + @uvx ruff check --extend-select I --fix src/bigdata_research_tools/ tests/ -.PHONY: help Makefile +lint-check: + @uvx ruff check --extend-select I src/bigdata_research_tools/ tests/ -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +format: + @uvx ruff format src/bigdata_research_tools/ tests/ - -create-docs: - @uv run $(MAKE) clean html +type-check: + @uvx ty check src/bigdata_research_tools/ tests/ \ No newline at end of file diff --git a/README.md b/README.md index 46e8cab..a6c51dd 100644 --- a/README.md +++ b/README.md @@ -1198,6 +1198,15 @@ INFO:__main__: Bloomberg: 8 documents --- + +## Tooling +This project uses [ruff](https://docs.astral.sh/ruff/) for linting and formatting and [ty](https://docs.astral.sh/ty/) for a type checker. To ensure your code adheres to the project's style guidelines, run the following commands before committing your changes: +```bash +make type-check +make lint +make format +``` + ## License This software is licensed for use solely under the terms agreed upon in the diff --git a/src/bigdata_research_tools/__init__.py b/src/bigdata_research_tools/__init__.py index a950dc0..9a7799f 100644 --- a/src/bigdata_research_tools/__init__.py +++ b/src/bigdata_research_tools/__init__.py @@ -1,7 +1,7 @@ """ Bigdata.com Research Tools """ -from importlib.metadata import version +from importlib.metadata import version __version__: str = version("bigdata-research-tools") diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index e37bb1f..8955bec 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -56,7 +56,6 @@ def _deserialize_label_responses( """ response_mapping = {} for response in responses: - if not response or not isinstance(response, dict): continue diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index cdc6de4..0e014e3 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -149,6 +149,6 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: ] sort_columns = ["Date", "Time Period", "Document ID", "Headline", "Chunk Text"] - df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) - + df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) + return df diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 05d5ebf..4738b26 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -1,6 +1,5 @@ - from logging import Logger, getLogger -from typing import List, Optional, Any, Dict +from typing import Any, Dict, List, Optional from pandas import DataFrame, Series @@ -11,8 +10,8 @@ ) from bigdata_research_tools.prompts.labeler import ( get_other_entity_placeholder, - get_target_entity_placeholder, get_risk_system_prompt, + get_target_entity_placeholder, ) logger: Logger = getLogger(__name__) @@ -81,7 +80,9 @@ def get_labels( return self._deserialize_label_responses(responses) - def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_columns: List[str]) -> DataFrame: + def post_process_dataframe( + self, df: DataFrame, extra_fields: dict, extra_columns: List[str] + ) -> DataFrame: """ Post-process the labeled DataFrame. @@ -148,31 +149,33 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column df["Time Period"] = df["timestamp_utc"].dt.strftime("%b %Y") df["Date"] = df["timestamp_utc"].dt.strftime("%Y-%m-%d") - df["Document ID"] = df["document_id"] if "document_id" in df.columns else df["rp_document_id"] - + df["Document ID"] = ( + df["document_id"] if "document_id" in df.columns else df["rp_document_id"] + ) + columns_map = { - "entity_name": "Company", - "entity_sector": "Sector", - "entity_industry": "Industry", - "entity_country": "Country", - "entity_ticker": "Ticker", - "headline": "Headline", - "text": "Quote", - "motivation": "Motivation", - "label": "Sub-Scenario" - } + "entity_name": "Company", + "entity_sector": "Sector", + "entity_industry": "Industry", + "entity_country": "Country", + "entity_ticker": "Ticker", + "headline": "Headline", + "text": "Quote", + "motivation": "Motivation", + "label": "Sub-Scenario", + } if extra_fields: columns_map.update(extra_fields) if "quotes" in extra_fields.keys(): if "quotes" in df.columns: - df["quotes"] = df.apply(replace_company_placeholders, axis=1, col_name = 'quotes') + df["quotes"] = df.apply( + replace_company_placeholders, axis=1, col_name="quotes" + ) else: print("quotes column not in df") - df = df.rename( - columns=columns_map - ) + df = df.rename(columns=columns_map) # Select and order columns export_columns = [ @@ -189,15 +192,14 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column "Motivation", "Sub-Scenario", ] - + if extra_columns: export_columns += extra_columns return df[export_columns] -def replace_company_placeholders(row: Series, col_name: str = 'motivation') -> str: - +def replace_company_placeholders(row: Series, col_name: str = "motivation") -> str: """ Replace company placeholders in text. @@ -215,16 +217,25 @@ def replace_company_placeholders(row: Series, col_name: str = 'motivation') -> s if row.get("other_entities_map"): for entity_id, entity_name in row["other_entities_map"]: text = text.replace( - f"{get_other_entity_placeholder()}_{entity_id}", entity_name) - + f"{get_other_entity_placeholder()}_{entity_id}", entity_name + ) + elif isinstance(text, list): - text = [t.replace(get_target_entity_placeholder(), row["entity_name"]) for t in text] + text = [ + t.replace(get_target_entity_placeholder(), row["entity_name"]) for t in text + ] if row.get("other_entities_map"): for entity_id, entity_name in row["other_entities_map"]: - text = [t.replace(f"{get_other_entity_placeholder()}_{entity_id}", entity_name) for t in text] + text = [ + t.replace( + f"{get_other_entity_placeholder()}_{entity_id}", entity_name + ) + for t in text + ] return text + # Function to map risk_factor to risk_category def map_risk_category(risk_factor, mapping): - return mapping.get(risk_factor, 'Not Applicable') + return mapping.get(risk_factor, "Not Applicable") diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index e3857b5..65d11a6 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -171,9 +171,16 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "Theme", ] - sort_columns = ["Date", "Time Period", "Company", "Document ID", "Headline", "Quote"] - df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) - + sort_columns = [ + "Date", + "Time Period", + "Company", + "Document ID", + "Headline", + "Quote", + ] + df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) + return df diff --git a/src/bigdata_research_tools/llm/azure.py b/src/bigdata_research_tools/llm/azure.py index 233dfb7..ec73e3f 100644 --- a/src/bigdata_research_tools/llm/azure.py +++ b/src/bigdata_research_tools/llm/azure.py @@ -69,7 +69,7 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st ) return chat_completion.choices[0].message.content - except Exception as e: + except Exception: if attempt == max_retries - 1: raise await asyncio.sleep(delay) @@ -195,7 +195,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: ) return chat_completion.choices[0].message.content - except Exception as e: + except Exception: if attempt == max_retries - 1: raise time.sleep(delay) diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index f3cff74..aef06d5 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -13,7 +13,7 @@ class AsyncBedrockProvider(AsyncLLMProvider): - # Asynchronous boto3 is tricky, for now use the synchronous client, this will not + # Asynchronous boto3 is tricky, for now use the synchronous client, this will not # provide the benefits from async, but will at least let our workflows run for now def __init__(self, model: str, region: str = None): super().__init__(model) @@ -52,13 +52,16 @@ def _get_bedrock_input( response_prefix = "" for message in chat_history: if message["role"] != "system": - formatted_history.append({"role": message["role"], "content": [{"text": message["content"]}]}) + formatted_history.append( + {"role": message["role"], "content": [{"text": message["content"]}]} + ) else: system.append({"text": message["content"]}) - if "response_format" in kwargs and kwargs["response_format"].get("type") == "json": - formatted_history.append( - {"role": "assistant", "content": [{"text": "{"}]} - ) + if ( + "response_format" in kwargs + and kwargs["response_format"].get("type") == "json" + ): + formatted_history.append({"role": "assistant", "content": [{"text": "{"}]}) response_prefix = "{" # https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html model_kwargs = { @@ -91,7 +94,9 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st Only implemented for a few models. See https://docs.aws.amazon.com/bedrock/latest/userguide/latency-optimized-inference.html """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) + bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( + chat_history, **kwargs + ) response = bedrock_client.converse(**model_kwargs) output_message = ( @@ -124,7 +129,9 @@ async def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) + bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( + chat_history, **kwargs + ) if tools: model_kwargs["toolConfig"] = {"tools": tools} response = bedrock_client.converse(**model_kwargs) @@ -145,7 +152,8 @@ async def get_tools_response( for f in output_message if "text" not in f ], - "text": output_prefix + "".join([x["text"] for x in output_message if "text" in x]), + "text": output_prefix + + "".join([x["text"] for x in output_message if "text" in x]), } return output @@ -166,6 +174,7 @@ async def get_stream_response( """ raise NotImplementedError + class BedrockProvider(LLMProvider): def __init__(self, model: str, region: str = None): super().__init__(model) @@ -204,13 +213,16 @@ def _get_bedrock_input( response_prefix = "" for message in chat_history: if message["role"] != "system": - formatted_history.append({"role": message["role"], "content": [{"text": message["content"]}]}) + formatted_history.append( + {"role": message["role"], "content": [{"text": message["content"]}]} + ) else: system.append({"text": message["content"]}) - if "response_format" in kwargs and kwargs["response_format"].get("type") == "json": - formatted_history.append( - {"role": "assistant", "content": [{"text": "{"}]} - ) + if ( + "response_format" in kwargs + and kwargs["response_format"].get("type") == "json" + ): + formatted_history.append({"role": "assistant", "content": [{"text": "{"}]}) response_prefix = "{" # https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html model_kwargs = { @@ -243,7 +255,9 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: Only implemented for a few models. See https://docs.aws.amazon.com/bedrock/latest/userguide/latency-optimized-inference.html """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) + bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( + chat_history, **kwargs + ) response = bedrock_client.converse(**model_kwargs) output_message = ( @@ -276,7 +290,9 @@ def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) + bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( + chat_history, **kwargs + ) if tools: model_kwargs["toolConfig"] = {"tools": tools} response = bedrock_client.converse(**model_kwargs) @@ -297,7 +313,8 @@ def get_tools_response( for f in output_message if "text" not in f ], - "text": output_prefix + "".join([x["text"] for x in output_message if "text" in x]), + "text": output_prefix + + "".join([x["text"] for x in output_message if "text" in x]), } return output diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 15ef20b..134313f 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -7,7 +7,7 @@ from openai import APITimeoutError, RateLimitError from tqdm import tqdm -from bigdata_research_tools.llm.base import AsyncLLMEngine, LLMEngine +from bigdata_research_tools.llm.base import AsyncLLMEngine logger: Logger = getLogger(__name__) diff --git a/src/bigdata_research_tools/portfolio/__init__.py b/src/bigdata_research_tools/portfolio/__init__.py index 58209e8..d1ebf9a 100644 --- a/src/bigdata_research_tools/portfolio/__init__.py +++ b/src/bigdata_research_tools/portfolio/__init__.py @@ -1,12 +1,8 @@ -from bigdata_research_tools.portfolio.portfolio_constructor import ( - PortfolioConstructor, -) - from bigdata_research_tools.portfolio.motivation import ( Motivation, ) +from bigdata_research_tools.portfolio.portfolio_constructor import ( + PortfolioConstructor, +) -__all__ = [ - "PortfolioConstructor", - "Motivation" -] +__all__ = ["PortfolioConstructor", "Motivation"] diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index 6265165..f5aab4e 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -1,28 +1,29 @@ -import pandas as pd from collections import defaultdict -from typing import Tuple, Dict, Any -from tqdm import tqdm +from typing import Any, Dict, Tuple + +import pandas as pd +from tqdm import tqdm -from bigdata_research_tools.prompts.motivation import get_motivation_prompt from bigdata_research_tools.llm.base import LLMEngine +from bigdata_research_tools.prompts.motivation import get_motivation_prompt class Motivation: """ A class for generating motivation statements for companies based on thematic analysis. """ - + def __init__(self, model: str = None, model_config: Dict[str, Any] = None): """ Initialize the Motivation class. - + Parameters: - model: Model string in format "provider::model" (e.g., "openai::gpt-4o-mini") - model_config: Configuration for the LLM model """ self.model_config = model_config or self._get_default_model_config() self.llm_engine = LLMEngine(model=model) - + @staticmethod def _get_default_model_config() -> Dict[str, Any]: """Get default LLM model configuration.""" @@ -34,130 +35,138 @@ def _get_default_model_config() -> Dict[str, Any]: "max_tokens": 300, "seed": 42, } - + def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> Dict: """ Group quotes and labels by company. - + Parameters: - filtered_df: DataFrame filtered by theme - + Returns: - Dictionary with company data - + Raises: - ValueError: If required columns are missing from the DataFrame """ # Check for required columns - required_columns = ['Company', 'Quote', 'Theme'] - missing_columns = [col for col in required_columns if col not in filtered_df.columns] - + required_columns = ["Company", "Quote", "Theme"] + missing_columns = [ + col for col in required_columns if col not in filtered_df.columns + ] + if missing_columns: available_columns = list(filtered_df.columns) raise ValueError( f"Missing required columns: {missing_columns}. " f"Available columns are: {available_columns}" ) - + # Check if DataFrame is empty if filtered_df.empty: print("Warning: DataFrame is empty. Returning empty dictionary.") return {} - - company_data = defaultdict(lambda: {'quotes_and_labels': []}) - + + company_data = defaultdict(lambda: {"quotes_and_labels": []}) + # Use .get() with default values as additional safety for _, row in filtered_df.iterrows(): - company = row.get('Company', 'Unknown Company') - quote = row.get('Quote', '') - theme = row.get('Theme', 'Unknown Theme') - + company = row.get("Company", "Unknown Company") + quote = row.get("Quote", "") + theme = row.get("Theme", "Unknown Theme") + # Skip rows with missing essential data if not company or not quote: continue - - company_data[company]['quotes_and_labels'].append({ - 'quote': quote, - 'label': theme - }) - + + company_data[company]["quotes_and_labels"].append( + {"quote": quote, "label": theme} + ) + print(f"Found {len(company_data)} unique companies with quotes") - + # Count label occurrences for each company for company, data in company_data.items(): label_counts = {} - for item in data['quotes_and_labels']: - label = item['label'] + for item in data["quotes_and_labels"]: + label = item["label"] label_counts[label] = label_counts.get(label, 0) + 1 - + # Sort labels by frequency (highest first) - sorted_labels = sorted(label_counts.items(), key=lambda x: x[1], reverse=True) - company_data[company]['label_counts'] = sorted_labels - company_data[company]['total_quotes'] = len(data['quotes_and_labels']) - + sorted_labels = sorted( + label_counts.items(), key=lambda x: x[1], reverse=True + ) + company_data[company]["label_counts"] = sorted_labels + company_data[company]["total_quotes"] = len(data["quotes_and_labels"]) + return company_data - + def query_llm_for_motivation(self, prompt: str) -> str: """ Generate motivation statement using LLM Engine. - + Parameters: - prompt (str): Formatted prompt string - + Returns: - Generated motivation statement """ chat_history = [{"role": "user", "content": prompt}] - + motivation = self.llm_engine.get_response( - chat_history=chat_history, - **self.model_config + chat_history=chat_history, **self.model_config ) - + return motivation.strip() - - def generate_company_motivations(self, - df: pd.DataFrame, - theme_name: str, - word_range: Tuple[int, int]) -> pd.DataFrame: + + def generate_company_motivations( + self, df: pd.DataFrame, theme_name: str, word_range: Tuple[int, int] + ) -> pd.DataFrame: """ Generates motivation statement with specified verbosity for companies in a thematic watchlist. - + Parameters: - df (pd.DataFrame): DataFrame with columns: Company, Quote, Label, Theme - theme_name (str): Name of the theme to filter by - word_range (Tuple[int, int]): Tuple (min_words, max_words) defining motivation length - + Returns: - DataFrame with company motivations in multiple lengths """ company_data = self.group_quotes_by_company(df) - + # Generate motivations for each company results = [] - + # Use tqdm for progress tracking - for company, data in tqdm(company_data.items(), - desc=f"Generating motivations for {len(company_data)} companies", - unit="company"): - + for company, data in tqdm( + company_data.items(), + desc=f"Generating motivations for {len(company_data)} companies", + unit="company", + ): # Create prompt for this word range - prompt = get_motivation_prompt(company, data, theme_name, word_range[0], word_range[1]) - + prompt = get_motivation_prompt( + company, data, theme_name, word_range[0], word_range[1] + ) + # Generate motivation with this word range motivation = self.query_llm_for_motivation(prompt) - - results.append({ - 'Company': company, - 'Motivation': motivation, - 'Composite Score': data['total_quotes'] - }) - + + results.append( + { + "Company": company, + "Motivation": motivation, + "Composite Score": data["total_quotes"], + } + ) + # Create and return sorted DataFrame - return (pd.DataFrame(results) - .sort_values("Composite Score", ascending=False) - .reset_index(drop=True)) - + return ( + pd.DataFrame(results) + .sort_values("Composite Score", ascending=False) + .reset_index(drop=True) + ) + def update_model_config(self, config: Dict[str, Any]): """Update the model configuration.""" self.model_config.update(config) diff --git a/src/bigdata_research_tools/portfolio/portfolio_constructor.py b/src/bigdata_research_tools/portfolio/portfolio_constructor.py index 007923a..e660d46 100644 --- a/src/bigdata_research_tools/portfolio/portfolio_constructor.py +++ b/src/bigdata_research_tools/portfolio/portfolio_constructor.py @@ -1,13 +1,15 @@ -import pandas as pd -import numpy as np -from enum import Enum, auto import warnings +from enum import Enum, auto + +import numpy as np +import pandas as pd + class WeightMethod(Enum): - EQUAL = auto() # Equal weighting for all companies - COLUMN = auto() # Weight based on a specific column (e.g., market cap) - SCORE = auto() # Weight based on score values (softmax-normalized) - + EQUAL = auto() # Equal weighting for all companies + COLUMN = auto() # Weight based on a specific column (e.g., market cap) + SCORE = auto() # Weight based on score values (softmax-normalized) + def __str__(self): return self.name.lower() @@ -15,20 +17,16 @@ def __str__(self): class PortfolioConstructor: """ A class to construct balanced and weighted portfolios with constraints. - + This class provides methods to create portfolios with balanced category representation, flexible weighting methods, and customizable position and category weight constraints. Includes safeguards against infinite loops in constraint enforcement. """ - - def __init__( - self, - max_iterations: int = 1000, - tolerance: float = 1e-6 - ) -> None: + + def __init__(self, max_iterations: int = 1000, tolerance: float = 1e-6) -> None: """ Initialize the PortfolioConstructor with constraint enforcement parameters. - + Parameters: ----------- max_iterations : int, default=1000 @@ -38,7 +36,7 @@ def __init__( """ self.max_iterations = max_iterations self.tolerance = tolerance - + def construct_portfolio( self, df: pd.DataFrame, @@ -48,16 +46,16 @@ def construct_portfolio( size: int = None, max_position_weight: float = 0.05, max_category_weight: float = 0.15, - weight_method: WeightMethod = WeightMethod.EQUAL + weight_method: WeightMethod = WeightMethod.EQUAL, ) -> pd.DataFrame: """ Build a balanced and weighted portfolio with position and category constraints. - + This method performs three main steps: 1. Balances the portfolio by selecting top companies from each category 2. Calculates initial weights based on the selected weighting method 3. Enforces position and category weight constraints with convergence protection - + Parameters: ----------- df : pandas.DataFrame @@ -76,7 +74,7 @@ def construct_portfolio( Maximum weight allowed for any category (e.g., 0.15 = 15%) weight_method : WeightMethod, default=WeightMethod.EQUAL Weighting methodology to use - + Returns: -------- pandas.DataFrame @@ -95,16 +93,20 @@ def construct_portfolio( portfolio = self._calculate_weights(portfolio, weight_method, weight_col) # Step 3: Enforce position and category weight constraints with safety checks - portfolio = self._enforce_constraints(portfolio, max_position_weight, max_category_weight, balance_col) + portfolio = self._enforce_constraints( + portfolio, max_position_weight, max_category_weight, balance_col + ) # Clean up temporary columns and sort by weight portfolio = portfolio.drop(columns=["rank", "raw_weight"]) return portfolio.sort_values("weight", ascending=False) - - def _balance_by_category(self, df: pd.DataFrame, score_col: str, balance_col: str, size: int) -> pd.DataFrame: + + def _balance_by_category( + self, df: pd.DataFrame, score_col: str, balance_col: str, size: int + ) -> pd.DataFrame: """ Select top companies from each category to create a balanced portfolio. - + Parameters: ----------- df : pandas.DataFrame @@ -115,7 +117,7 @@ def _balance_by_category(self, df: pd.DataFrame, score_col: str, balance_col: st Column to use for category balancing size : int Target portfolio size - + Returns: -------- pandas.DataFrame @@ -123,12 +125,12 @@ def _balance_by_category(self, df: pd.DataFrame, score_col: str, balance_col: st """ categories = df[balance_col].unique() n_categories = len(categories) - + # Calculate allocations per category base_per_category = size // n_categories extras = size % n_categories allocations = pd.Series(base_per_category, index=categories) - + # Distribute extras to categories that have enough companies category_sizes = df.groupby(balance_col).size() eligible = category_sizes[category_sizes > base_per_category].index @@ -137,15 +139,21 @@ def _balance_by_category(self, df: pd.DataFrame, score_col: str, balance_col: st # Rank and select top companies per category df = df.copy() - df["rank"] = df.groupby(balance_col)[score_col].rank(ascending=False, method="first") - mask = df.apply(lambda x: x["rank"] <= allocations.get(x[balance_col], 0), axis=1) - + df["rank"] = df.groupby(balance_col)[score_col].rank( + ascending=False, method="first" + ) + mask = df.apply( + lambda x: x["rank"] <= allocations.get(x[balance_col], 0), axis=1 + ) + return df[mask].copy() - - def _calculate_weights(self, portfolio: pd.DataFrame, weight_method: WeightMethod, weight_col: str) -> pd.DataFrame: + + def _calculate_weights( + self, portfolio: pd.DataFrame, weight_method: WeightMethod, weight_col: str + ) -> pd.DataFrame: """ Calculate initial weights based on the specified weighting method. - + Parameters: ----------- portfolio : pandas.DataFrame @@ -154,14 +162,14 @@ def _calculate_weights(self, portfolio: pd.DataFrame, weight_method: WeightMetho Method to use for calculating weights weight_col : str or None Column to use for column-based or score-based weighting - + Returns: -------- pandas.DataFrame Portfolio with added 'weight' column """ portfolio = portfolio.copy() - + # Calculate raw weights based on selected method if weight_method == WeightMethod.EQUAL or weight_col is None: # Equal weighting - all positions get the same raw weight @@ -182,15 +190,17 @@ def _calculate_weights(self, portfolio: pd.DataFrame, weight_method: WeightMetho # Normalize raw weights to sum to 1 portfolio["weight"] = portfolio["raw_weight"] / portfolio["raw_weight"].sum() - + return portfolio - - def _enforce_constraints(self, df: pd.DataFrame, max_pos: float, max_cat: float, balance_col: str) -> pd.DataFrame: + + def _enforce_constraints( + self, df: pd.DataFrame, max_pos: float, max_cat: float, balance_col: str + ) -> pd.DataFrame: """ Enforce maximum position weight and category weight constraints. - + Includes convergence protection to prevent infinite loops. - + Parameters: ----------- df : pandas.DataFrame @@ -201,95 +211,107 @@ def _enforce_constraints(self, df: pd.DataFrame, max_pos: float, max_cat: float, Maximum allowed weight for any category (0-1) balance_col : str Column name for category grouping - + Returns: -------- pandas.DataFrame Portfolio with weights adjusted to meet all constraints """ df = df.copy() - + # Validation checks if max_pos <= 0 or max_pos > 1: - raise ValueError(f"max_position_weight must be between 0 and 1, got {max_pos}") + raise ValueError( + f"max_position_weight must be between 0 and 1, got {max_pos}" + ) if max_cat <= 0 or max_cat > 1: - raise ValueError(f"max_category_weight must be between 0 and 1, got {max_cat}") - + raise ValueError( + f"max_category_weight must be between 0 and 1, got {max_cat}" + ) + # Check if constraints are achievable n_positions = len(df) n_categories = df[balance_col].nunique() - + # Warn if constraints might be too tight if max_pos * n_positions < 1.0: - warnings.warn(f"Position constraint may be too tight: {max_pos:.1%} * {n_positions} positions = {max_pos * n_positions:.1%} total") - + warnings.warn( + f"Position constraint may be too tight: {max_pos:.1%} * {n_positions} positions = {max_pos * n_positions:.1%} total" + ) + if max_cat * n_categories < 1.0: - warnings.warn(f"Category constraint may be too tight: {max_cat:.1%} * {n_categories} categories = {max_cat * n_categories:.1%} total") - + warnings.warn( + f"Category constraint may be too tight: {max_cat:.1%} * {n_categories} categories = {max_cat * n_categories:.1%} total" + ) + iteration = 0 prev_weights = None - + while iteration < self.max_iterations: iteration += 1 - + # Track whether we made any changes position_violation = False category_violation = False - + # Check and fix position weight violations - overweight_positions = df['weight'] > max_pos + overweight_positions = df["weight"] > max_pos if overweight_positions.any(): position_violation = True - + # Calculate total excess weight from violating positions - excess = (df.loc[overweight_positions, 'weight'] - max_pos).sum() - + excess = (df.loc[overweight_positions, "weight"] - max_pos).sum() + # Cap violating positions - df.loc[overweight_positions, 'weight'] = max_pos - + df.loc[overweight_positions, "weight"] = max_pos + # Redistribute excess to compliant positions proportionally compliant = ~overweight_positions - if compliant.any() and df.loc[compliant, 'weight'].sum() > 0: - df.loc[compliant, 'weight'] += excess * ( - df.loc[compliant, 'weight'] / df.loc[compliant, 'weight'].sum() + if compliant.any() and df.loc[compliant, "weight"].sum() > 0: + df.loc[compliant, "weight"] += excess * ( + df.loc[compliant, "weight"] / df.loc[compliant, "weight"].sum() ) - + # Check and fix category weight violations - category_weights = df.groupby(balance_col)['weight'].sum() + category_weights = df.groupby(balance_col)["weight"].sum() overweight_categories = category_weights > max_cat - + if overweight_categories.any(): category_violation = True - + # Scale down weights in overweight categories for category in category_weights[overweight_categories].index: mask = df[balance_col] == category - current_weight = df.loc[mask, 'weight'].sum() - + current_weight = df.loc[mask, "weight"].sum() + if current_weight > 0: # Avoid division by zero # Scale all category holdings proportionally scale_factor = max_cat / current_weight - df.loc[mask, 'weight'] *= scale_factor - + df.loc[mask, "weight"] *= scale_factor + # Renormalize all weights to sum to 1 - total_weight = df['weight'].sum() + total_weight = df["weight"].sum() if total_weight > 0: - df['weight'] = df['weight'] / total_weight - + df["weight"] = df["weight"] / total_weight + # Check for convergence if not position_violation and not category_violation: break - + # Check if weights have converged (small changes) if prev_weights is not None: - weight_change = np.abs(df['weight'].values - prev_weights).max() + weight_change = np.abs(df["weight"].values - prev_weights).max() if weight_change < self.tolerance: - warnings.warn(f"Constraint enforcement converged with tolerance {self.tolerance:.2e} after {iteration} iterations") + warnings.warn( + f"Constraint enforcement converged with tolerance {self.tolerance:.2e} after {iteration} iterations" + ) break - - prev_weights = df['weight'].values.copy() - + + prev_weights = df["weight"].values.copy() + if iteration >= self.max_iterations: - warnings.warn(f"Constraint enforcement stopped after {self.max_iterations} iterations without full convergence") - - return df \ No newline at end of file + warnings.warn( + f"Constraint enforcement stopped after {self.max_iterations} iterations without full convergence" + ) + + return df diff --git a/src/bigdata_research_tools/prompts/labeler.py b/src/bigdata_research_tools/prompts/labeler.py index 96968cd..a15013d 100644 --- a/src/bigdata_research_tools/prompts/labeler.py +++ b/src/bigdata_research_tools/prompts/labeler.py @@ -144,6 +144,7 @@ def get_screener_system_prompt( unknown_label=unknown_label, ) + risk_system_prompt_template: str = """ Forget all previous prompts. @@ -308,9 +309,9 @@ def get_screener_system_prompt( """ + def get_risk_system_prompt(main_theme: str, label_summaries: List[str]) -> str: """Generate a system prompt for labeling sentences with thematic labels.""" return risk_system_prompt_template.format( - main_theme=main_theme, - label_summaries=label_summaries - ) \ No newline at end of file + main_theme=main_theme, label_summaries=label_summaries + ) diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index 6e7c063..9792c8c 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -1,4 +1,5 @@ -import pandas as pd +import pandas as pd + def generate_prompt_template() -> str: """ @@ -28,7 +29,10 @@ def generate_prompt_template() -> str: 8. Keeps the statement concise ({min_words}-{max_words} words) """ -def get_motivation_prompt(company: str, data: pd.DataFrame, theme_name: str, min_words: int, max_words: int) -> str: + +def get_motivation_prompt( + company: str, data: pd.DataFrame, theme_name: str, min_words: int, max_words: int +) -> str: """ Formats the motivation prompt using company data and the prompt template. @@ -42,21 +46,22 @@ def get_motivation_prompt(company: str, data: pd.DataFrame, theme_name: str, min Returns: - str: Fully formatted motivation prompt """ - label_summary = "\n".join([f"- {label}: {count} quotes" for label, count in data['label_counts']]) + label_summary = "\n".join( + [f"- {label}: {count} quotes" for label, count in data["label_counts"]] + ) quotes_text = "" - for i, item in enumerate(data['quotes_and_labels']): - quotes_text += f"{i+1}. \"{item['quote']}\" [Label: {item['label']}]\n" + for i, item in enumerate(data["quotes_and_labels"]): + quotes_text += f'{i + 1}. "{item["quote"]}" [Label: {item["label"]}]\n' prompt_template = generate_prompt_template() return prompt_template.format( theme=theme_name, company=company, - total_quotes=data['total_quotes'], + total_quotes=data["total_quotes"], label_summary=label_summary, quotes_and_labels=quotes_text, min_words=min_words, - max_words=max_words + max_words=max_words, ) - diff --git a/src/bigdata_research_tools/prompts/risk.py b/src/bigdata_research_tools/prompts/risk.py index 9ca9d98..2080b69 100644 --- a/src/bigdata_research_tools/prompts/risk.py +++ b/src/bigdata_research_tools/prompts/risk.py @@ -115,5 +115,5 @@ def compose_risk_system_prompt_focus(main_theme: str, analyst_focus: str) -> str ], "Keywords": ["Tariffs", "China"] }} - """ + """ return prompt.strip() diff --git a/src/bigdata_research_tools/prompts/themes.py b/src/bigdata_research_tools/prompts/themes.py index 7b999ed..f8faaae 100644 --- a/src/bigdata_research_tools/prompts/themes.py +++ b/src/bigdata_research_tools/prompts/themes.py @@ -1,6 +1,4 @@ -def compose_themes_system_prompt( - main_theme: str, analyst_focus: str = "" -) -> str: +def compose_themes_system_prompt(main_theme: str, analyst_focus: str = "") -> str: prompt = f""" Forget all previous prompts. You are assisting a professional analyst tasked with creating a screener to measure the impact of the theme {main_theme} on companies. diff --git a/src/bigdata_research_tools/search/__init__.py b/src/bigdata_research_tools/search/__init__.py index d40e49c..d68df55 100644 --- a/src/bigdata_research_tools/search/__init__.py +++ b/src/bigdata_research_tools/search/__init__.py @@ -1,7 +1,9 @@ from bigdata_research_tools.search.narrative_search import search_narratives +from bigdata_research_tools.search.query_builder import ( + build_batched_query, + create_date_ranges, +) from bigdata_research_tools.search.screener_search import search_by_companies -from bigdata_research_tools.search.query_builder import build_batched_query, create_date_ranges - from bigdata_research_tools.search.search import ( SEARCH_QUERY_RESULTS_TYPE, SearchManager, diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index d40f633..de0708d 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -14,7 +14,7 @@ ) from bigdata_research_tools.search.search import run_search from bigdata_research_tools.search.search_utils import ( - build_chunk_entities, + build_chunk_entities, filter_search_results, ) @@ -146,7 +146,7 @@ def _process_narrative_search( chunk_entities = build_chunk_entities(chunk, entities) if not chunk_entities: - continue + continue # Collect all necessary information in the row rows.append( @@ -156,11 +156,11 @@ def _process_narrative_search( "sentence_id": f"{result.id}-{chunk.chunk}", "headline": result.headline, "text": chunk.text, - "entity": [entity["name"] for entity in chunk_entities], + "entity": [entity["name"] for entity in chunk_entities], "country_code": [entity["country"] for entity in chunk_entities], - "entity_type": [entity["entity_type"] for entity in chunk_entities], + "entity_type": [entity["entity_type"] for entity in chunk_entities], } - ) + ) if not rows: raise ValueError("No rows to process") diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 53d92e2..f9791fc 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -1,18 +1,12 @@ from dataclasses import dataclass -from itertools import chain,zip_longest -from typing import List, Optional, Tuple, Type, Dict +from itertools import chain, zip_longest +from typing import Dict, List, Optional, Tuple, Type + import pandas as pd from bigdata_client.daterange import AbsoluteDateRange from bigdata_client.models.advanced_search_query import QueryComponent +from bigdata_client.models.entities import Concept, Organization, Person, Place, Product from bigdata_client.models.search import DocumentType -from bigdata_client.models.entities import ( - Person, - Place, - Organization, - Product, - Concept -) - from bigdata_client.query import ( Any, Entity, @@ -21,11 +15,12 @@ ReportingEntity, Similarity, Source, - Topic + Topic, ) from bigdata_research_tools.client import bigdata_connection + @dataclass class EntitiesToSearch: people: Optional[List[str]] = None @@ -39,13 +34,13 @@ class EntitiesToSearch: @staticmethod def get_entity_type_map() -> Dict[str, Type]: return { - 'people': Person, - 'product': Product, - 'org': Organization, - 'place': Place, - 'topic': Topic, - 'concepts': Concept, - 'companies': Entity + "people": Person, + "product": Product, + "org": Organization, + "place": Place, + "topic": Topic, + "concepts": Concept, + "companies": Entity, } @@ -76,7 +71,7 @@ def build_similarity_queries(sentences: List[str]) -> List[Similarity]: def build_batched_query( - sentences: List[str], + sentences: List[str], keywords: Optional[List[str]], entities: Optional[EntitiesToSearch], control_entities: Optional[EntitiesToSearch], @@ -110,35 +105,46 @@ def build_batched_query( Config of custom entity batches of different types (people, companies, organisations..) Returns: - List[QueryComponent]: List of expanded query components. + List[QueryComponent]: List of expanded query components. """ # Early validation: ensure only one of entities or custom_batches is used if entities and custom_batches: - raise ValueError("Only one of `entities` or `custom_batches` should be provided, not both.") + raise ValueError( + "Only one of `entities` or `custom_batches` should be provided, not both." + ) _validate_parameters(document_scope=scope, fiscal_year=fiscal_year) # Step 1: Build base queries (similarity, keyword, source) - base_queries, keyword_query, source_query = _build_base_queries(sentences, keywords, sources) - + base_queries, keyword_query, source_query = _build_base_queries( + sentences, keywords, sources + ) + # Step 2: Build control entity query - control_query = _build_control_entity_query(control_entities, scope=scope) if control_entities else None - + control_query = ( + _build_control_entity_query(control_entities, scope=scope) + if control_entities + else None + ) + # Step 3: Build entity batch queries - entity_batch_queries = _build_entity_batch_queries(entities, custom_batches, batch_size, scope) - + entity_batch_queries = _build_entity_batch_queries( + entities, custom_batches, batch_size, scope + ) + # Step 4: Combine everything into expanded queries queries_expanded = _expand_queries( - (base_queries, keyword_query, source_query), - entity_batch_queries, + (base_queries, keyword_query, source_query), + entity_batch_queries, control_query, source_query, - fiscal_year + fiscal_year, ) - + return queries_expanded + def _validate_parameters( document_scope: DocumentType = None, fiscal_year: int = None ) -> None: @@ -162,27 +168,29 @@ def _validate_parameters( raise ValueError( f"`fiscal_year` must be None when `document_scope` is `{document_scope.value}`" ) - + + def _build_base_queries( - sentences: Optional[List[str]], + sentences: Optional[List[str]], keywords: Optional[List[str]], - sources: Optional[List[str]] + sources: Optional[List[str]], ) -> Tuple[List[QueryComponent], Optional[QueryComponent], Optional[QueryComponent]]: """Build the base queries from sentences, keywords, and sources.""" # Create similarity queries from sentences queries = build_similarity_queries(sentences) if sentences else [] - + # Create keyword query keyword_query = Any([Keyword(word) for word in keywords]) if keywords else None - + # Create source query source_query = Any([Source(source) for source in sources]) if sources else None - + return queries, keyword_query, source_query + def _get_entity_ids( - entity_names: List[str], - entity_type: Type, + entity_names: List[str], + entity_type: Type, ) -> list[Type]: bigdata = bigdata_connection() entity_ids = [] @@ -193,9 +201,9 @@ def _get_entity_ids( Person: bigdata.knowledge_graph.find_people, Organization: bigdata.knowledge_graph.find_organizations, Topic: bigdata.knowledge_graph.find_topics, - Concept: bigdata.knowledge_graph.find_concepts, - Entity: bigdata.knowledge_graph.find_companies, - ReportingEntity: bigdata.knowledge_graph.find_companies, + Concept: bigdata.knowledge_graph.find_concepts, + Entity: bigdata.knowledge_graph.find_companies, + ReportingEntity: bigdata.knowledge_graph.find_companies, } lookup_func = lookup_map.get(entity_type) @@ -212,55 +220,57 @@ def _get_entity_ids( return entity_ids + def _build_control_entity_query( control_entities: EntitiesToSearch, scope: DocumentType = DocumentType.ALL, ) -> QueryComponent: """Build a query for control entities.""" - + entity_ids = [] comp_ids = [] if control_entities.people: people_ids = _get_entity_ids(control_entities.people, Person) - if people_ids: + if people_ids: entity_ids.extend(people_ids) - - if control_entities.product: + + if control_entities.product: prod_ids = _get_entity_ids(control_entities.product, Product) - if prod_ids: + if prod_ids: entity_ids.extend(prod_ids) if control_entities.companies: entity_type = _get_entity_type(scope) - comp_ids = _get_entity_ids(control_entities.companies,entity_type) - if comp_ids: + comp_ids = _get_entity_ids(control_entities.companies, entity_type) + if comp_ids: entity_ids.extend(comp_ids) if control_entities.place: place_ids = _get_entity_ids(control_entities.place, Place) - if place_ids: + if place_ids: entity_ids.extend(place_ids) if control_entities.org: orga_ids = _get_entity_ids(control_entities.org, Organization) - if orga_ids: + if orga_ids: entity_ids.extend(orga_ids) if control_entities.topic: topic_ids = _get_entity_ids(control_entities.topic, Topic) - if topic_ids: + if topic_ids: entity_ids.extend(topic_ids) - + if control_entities.concepts: concept_ids = _get_entity_ids(control_entities.concepts, Concept) - if concept_ids: + if concept_ids: entity_ids.extend(concept_ids) control_query = Any(entity_ids) return control_query + def _build_entity_batch_queries( - entities: EntitiesToSearch, + entities: EntitiesToSearch, custom_batches: List[EntitiesToSearch], batch_size: int, scope: DocumentType, @@ -270,14 +280,15 @@ def _build_entity_batch_queries( # If no entities specified, return a single None to ensure at least one iteration if not entities and not custom_batches: return [None] - + # If using custom batches, process them if custom_batches: return _build_custom_batch_queries(custom_batches, scope) - + # Otherwise, auto-batch the entities return _auto_batch_entities(entities, batch_size, scope) + def _get_entity_type(scope: DocumentType) -> type: """Determine the entity type based on document scope.""" return ( @@ -286,62 +297,74 @@ def _get_entity_type(scope: DocumentType) -> type: else Entity ) + def _build_custom_batch_queries( - custom_batches: List[EntitiesToSearch], - scope: DocumentType + custom_batches: List[EntitiesToSearch], scope: DocumentType ) -> List[QueryComponent]: """Build entity queries from a list of EntitiesToSearch objects.""" entity_type_map = EntitiesToSearch.get_entity_type_map() - - def get_entity_ids_for_attr(entity_config: EntitiesToSearch, attr_name: str, entity_class) -> List[int]: + + def get_entity_ids_for_attr( + entity_config: EntitiesToSearch, attr_name: str, entity_class + ) -> List[int]: """Get entity IDs for a specific attribute.""" entity_names = getattr(entity_config, attr_name, None) if not entity_names: return [] - - entity_type = _get_entity_type(scope) if entity_class == Entity else entity_class + + entity_type = ( + _get_entity_type(scope) if entity_class == Entity else entity_class + ) return _get_entity_ids(entity_names, entity_type) - + batch_queries = [] for entity_config in custom_batches: # Use chain to flatten all entity IDs from all attributes - all_entities = list(chain.from_iterable( - get_entity_ids_for_attr(entity_config, attr_name, entity_class) - for attr_name, entity_class in entity_type_map.items() - )) - + all_entities = list( + chain.from_iterable( + get_entity_ids_for_attr(entity_config, attr_name, entity_class) + for attr_name, entity_class in entity_type_map.items() + ) + ) + if all_entities: batch_queries.append(Any(all_entities)) - + return batch_queries if batch_queries else [None] + def _auto_batch_entities( entities: EntitiesToSearch, batch_size: int, scope: DocumentType = DocumentType.ALL, ) -> List[QueryComponent]: """Auto-batch entities by type using the specified batch size.""" - + # Create batches for each entity type all_entity_batches = [] - + for attr_name, entity_class in EntitiesToSearch.get_entity_type_map().items(): entity_names = getattr(entities, attr_name, None) if not entity_names: continue - + # Get valid entity IDs - entity_type = _get_entity_type(scope) if entity_class == Entity else entity_class + entity_type = ( + _get_entity_type(scope) if entity_class == Entity else entity_class + ) entity_ids = _get_entity_ids(entity_names, entity_type) - + # Split into batches and add to collection if entity_ids: - batches = [entity_ids[i:i + batch_size] for i in range(0, len(entity_ids), batch_size)] + batches = [ + entity_ids[i : i + batch_size] + for i in range(0, len(entity_ids), batch_size) + ] all_entity_batches.append(batches) - + if not all_entity_batches: return [] - + # Combine batches across entity types using zip_longest return [ Any([entity for batch in batch_group for entity in batch]) @@ -349,12 +372,15 @@ def _auto_batch_entities( if any(batch for batch in batch_group) # Skip empty batch groups ] + def _expand_queries( - base_queries_tuple: Tuple[List[QueryComponent], Optional[QueryComponent], Optional[QueryComponent]], - entity_batch_queries: Optional[List[Optional[QueryComponent]]] = None, + base_queries_tuple: Tuple[ + List[QueryComponent], Optional[QueryComponent], Optional[QueryComponent] + ], + entity_batch_queries: Optional[List[Optional[QueryComponent]]] = None, control_query: Optional[QueryComponent] = None, source_query: Optional[QueryComponent] = None, - fiscal_year: Optional[int] = None + fiscal_year: Optional[int] = None, ) -> List[QueryComponent]: """Expand all query components into the final list of queries.""" base_queries, keyword_query, source_query = base_queries_tuple @@ -395,7 +421,8 @@ def _expand_queries( queries_expanded.append(expanded_query) return queries_expanded - + + def create_date_intervals( start_date: str, end_date: str, frequency: str ) -> List[Tuple[pd.Timestamp, pd.Timestamp]]: @@ -447,7 +474,9 @@ def create_date_intervals( # Generate date range based on the adjusted frequency try: - date_range = pd.date_range(start=start_date, end=end_date, frequency=adjusted_freq) + date_range = pd.date_range( + start=start_date, end=end_date, frequency=adjusted_freq + ) except ValueError: raise ValueError("Invalid frequency. Use 'Y', 'M', 'W', or 'D'.") diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index 89d2426..6caf8db 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -1,5 +1,5 @@ from logging import Logger, getLogger -from typing import List, Optional, Dict +from typing import Dict, List, Optional from bigdata_client.document import Document from bigdata_client.models.advanced_search_query import ListQueryComponent @@ -14,14 +14,13 @@ get_target_entity_placeholder, ) from bigdata_research_tools.search.query_builder import ( - build_batched_query, EntitiesToSearch, + build_batched_query, create_date_ranges, ) from bigdata_research_tools.search.search import run_search -from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace from bigdata_research_tools.search.search_utils import filter_search_results - +from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace logger: Logger = getLogger(__name__) @@ -113,9 +112,9 @@ def search_by_companies( # Extract entities for search querying entity_keys = [entity.id for entity in companies] - # Create entity configs + # Create entity configs entities_config = EntitiesToSearch(companies=entity_keys) - + # If control_entities are provided, create a control EntityConfig # For this example, assuming control_entities are all company entities control_entities_config = None @@ -158,7 +157,7 @@ def search_by_companies( results, entities = filter_search_results(results) # Filter entities to only include COMPANY entities entities = filter_company_entities(entities) - + # Determine whether to filter by companies based on document type # For filings and transcripts, we don't need to filter as we use reporting entities # For news, we need to check against our original universe of companies as a news article @@ -188,6 +187,7 @@ def search_by_companies( return df_sentences + def filter_company_entities( entities: List[ListQueryComponent], ) -> List[ListQueryComponent]: @@ -205,6 +205,7 @@ def filter_company_entities( if hasattr(entity, "entity_type") and getattr(entity, "entity_type") == "COMP" ] + def process_screener_search_results( results: List[Document], entities: List[ListQueryComponent], @@ -315,7 +316,7 @@ def process_screener_search_results( if not entity_key: continue # Skip if entity is not found - + # # if entity isn't in our original watchlist, skip if companies and entity_key not in companies: continue @@ -362,9 +363,7 @@ def process_screener_search_results( return df.reset_index(drop=True) -def mask_sentences( - df: DataFrame -) -> DataFrame: +def mask_sentences(df: DataFrame) -> DataFrame: """ Mask the target entity and other entities in the text. diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index bc20112..e90c8c4 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -243,7 +243,10 @@ def normalize_date_range(date_ranges: DATE_RANGE_TYPE) -> DATE_RANGE_TYPE: # Convert mutable AbsoluteDateRange into hashable objects for i, dr in enumerate(date_ranges): if isinstance(dr, AbsoluteDateRange): - date_ranges[i] = (dr.start_dt.strftime("%Y-%m-%d %H:%M:%S"), dr.end_dt.strftime("%Y-%m-%d %H:%M:%S")) + date_ranges[i] = ( + dr.start_dt.strftime("%Y-%m-%d %H:%M:%S"), + dr.end_dt.strftime("%Y-%m-%d %H:%M:%S"), + ) return date_ranges @@ -284,7 +287,7 @@ def run_search( if not kwargs.get("current_trace"): start_date = date_ranges[0][0] if date_ranges else None - end_date = date_ranges[-1][1] if date_ranges else None + end_date = date_ranges[-1][1] if date_ranges else None current_trace = Trace( event_name=TraceEventNames.RUN_SEARCH, @@ -299,7 +302,7 @@ def run_search( kwargs["current_trace"] = current_trace - try: + try: manager = SearchManager(**kwargs) query_results = manager.concurrent_search( queries=queries, @@ -322,7 +325,6 @@ def run_search( current_trace.result = execution_result # noqa send_trace(bigdata_connection(), current_trace) - if only_results: return list(query_results.values()) return query_results diff --git a/src/bigdata_research_tools/search/search_utils.py b/src/bigdata_research_tools/search/search_utils.py index 84409e5..3854a4e 100644 --- a/src/bigdata_research_tools/search/search_utils.py +++ b/src/bigdata_research_tools/search/search_utils.py @@ -1,19 +1,22 @@ from itertools import chain from json import JSONDecodeError from logging import Logger, getLogger -from pydantic import ValidationError from re import findall from time import sleep -from typing import List, Tuple +from typing import List, Tuple + from bigdata_client.connection import RequestMaxLimitExceeds from bigdata_client.document import Document from bigdata_client.models.advanced_search_query import ListQueryComponent from bigdata_client.models.document import DocumentChunk from bigdata_client.query_type import QueryType +from pydantic import ValidationError + from bigdata_research_tools.client import bigdata_connection logger: Logger = getLogger(__name__) + def _collect_entity_keys(results: List[Document]) -> List[str]: """ Collect all entity keys from the search results. @@ -33,6 +36,7 @@ def _collect_entity_keys(results: List[Document]) -> List[str]: entity_keys = list(entity_keys) return entity_keys + def _look_up_entities_binary_search( entity_keys: List[str], max_batch_size: int = 50 ) -> List[ListQueryComponent]: @@ -63,7 +67,7 @@ def depth_first_search(batch: List[str]) -> None: """ non_entity_key_pattern = r"'key':\s*'([A-Z0-9]{6})'.+?'entityType':\s*'[A-Z]+'" - try: + try: batch_lookup = bigdata.knowledge_graph.get_entities(batch) entities.extend(batch_lookup) except ValidationError as e: @@ -99,6 +103,7 @@ def depth_first_search(batch: List[str]) -> None: return entities + def filter_search_results( results: List[List[Document]], ) -> Tuple[List[Document], List[ListQueryComponent]]: @@ -122,10 +127,10 @@ def filter_search_results( return results, entities -def build_chunk_entities(chunk: DocumentChunk, - entities: List[ListQueryComponent] -) -> List[dict]: +def build_chunk_entities( + chunk: DocumentChunk, entities: List[ListQueryComponent] +) -> List[dict]: entity_key_map = {entity.id: entity for entity in entities} chunk_entities = [ diff --git a/src/bigdata_research_tools/tracing.py b/src/bigdata_research_tools/tracing.py index 16f8db2..146b767 100644 --- a/src/bigdata_research_tools/tracing.py +++ b/src/bigdata_research_tools/tracing.py @@ -19,6 +19,7 @@ class TraceEventNames(Enum): COMPANY_SEARCH = "CompanySearchRun" RUN_SEARCH = "SearchRun" + @dataclasses.dataclass class Trace: event_name: TraceEventNames = None diff --git a/src/bigdata_research_tools/utils/distance.py b/src/bigdata_research_tools/utils/distance.py index 10a9837..a847c1d 100644 --- a/src/bigdata_research_tools/utils/distance.py +++ b/src/bigdata_research_tools/utils/distance.py @@ -1,16 +1,21 @@ from functools import lru_cache + def levenshtein_distance(a: str, b: str) -> int: @lru_cache(maxsize=None) def dist(i: int, j: int) -> int: - if i == 0: return j - if j == 0: return i + if i == 0: + return j + if j == 0: + return i - if a[i-1] == b[j-1]: - return dist(i-1, j-1) + if a[i - 1] == b[j - 1]: + return dist(i - 1, j - 1) return min( - dist(i-1, j) + 1, # delete a char from string 1 - dist(i, j-1) + 1, # insert a char into string 1 - dist(i-1, j-1) + 1 # substitute a char in string 1 from a char in string 2 + dist(i - 1, j) + 1, # delete a char from string 1 + dist(i, j - 1) + 1, # insert a char into string 1 + dist(i - 1, j - 1) + + 1, # substitute a char in string 1 from a char in string 2 ) - return dist(len(a), len(b)) \ No newline at end of file + + return dist(len(a), len(b)) diff --git a/src/bigdata_research_tools/utils/observer.py b/src/bigdata_research_tools/utils/observer.py index 16316a9..ed83942 100644 --- a/src/bigdata_research_tools/utils/observer.py +++ b/src/bigdata_research_tools/utils/observer.py @@ -1,14 +1,15 @@ -from typing import Any -from datetime import datetime - from abc import ABC, abstractmethod +from datetime import datetime +from typing import Any from pydantic import BaseModel + class OberserverNotification(BaseModel): timestamp: str message: Any + class Observer(ABC): @abstractmethod def update(self, message: OberserverNotification): @@ -18,13 +19,13 @@ def update(self, message: OberserverNotification): class Observable(ABC): def __init__(self): self.observers = [] - + def register_observer(self, observer: Observer): self.observers.append(observer) def unregister_observer(self, observer: Observer): self.observers.remove(observer) - + def notify_observers(self, message: Any): ts = datetime.now().isoformat() notification = OberserverNotification(timestamp=ts, message=message) diff --git a/src/bigdata_research_tools/visuals/risk_visuals.py b/src/bigdata_research_tools/visuals/risk_visuals.py index 4bb7e6c..7e0e8bf 100644 --- a/src/bigdata_research_tools/visuals/risk_visuals.py +++ b/src/bigdata_research_tools/visuals/risk_visuals.py @@ -1,9 +1,7 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Optional, Tuple -import numpy as np import pandas as pd import plotly.graph_objects as go -from plotly.subplots import make_subplots from bigdata_research_tools.settings import check_libraries_installed from bigdata_research_tools.visuals.visuals import ExposureDashboard diff --git a/src/bigdata_research_tools/visuals/thematic_visuals.py b/src/bigdata_research_tools/visuals/thematic_visuals.py index 0178103..ee9b39c 100644 --- a/src/bigdata_research_tools/visuals/thematic_visuals.py +++ b/src/bigdata_research_tools/visuals/thematic_visuals.py @@ -1,9 +1,7 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Optional, Tuple -import numpy as np import pandas as pd import plotly.graph_objects as go -from plotly.subplots import make_subplots from bigdata_research_tools.settings import check_libraries_installed from bigdata_research_tools.visuals.visuals import ExposureDashboard @@ -33,7 +31,7 @@ class ThematicExposureDashboard(ExposureDashboard): "subplot_titles": [ "Thematic Exposure Heatmap (Raw Scores)", "Total Thematic Exposure Score", - f"Top Thematic Exposures by Company", + "Top Thematic Exposures by Company", "Thematic Scores across Sub-Themes", ], "axis_titles": { diff --git a/src/bigdata_research_tools/watchlists.py b/src/bigdata_research_tools/watchlists.py index 214ccd6..845853d 100644 --- a/src/bigdata_research_tools/watchlists.py +++ b/src/bigdata_research_tools/watchlists.py @@ -1,11 +1,14 @@ from collections import namedtuple -from bigdata_client.models.watchlists import Watchlist from bigdata_client import Bigdata +from bigdata_client.models.watchlists import Watchlist from bigdata_research_tools.utils.distance import levenshtein_distance -def fuzzy_find_watchlist_by_name(name: str, bigdata: Bigdata, only_private: bool = False, max_distance: int = 2) -> Watchlist | None: + +def fuzzy_find_watchlist_by_name( + name: str, bigdata: Bigdata, only_private: bool = False, max_distance: int = 2 +) -> Watchlist | None: watchlists = bigdata.watchlists.list(owned=only_private) WlScore = namedtuple("WlScore", ["watchlist", "score"]) scored_list: list[WlScore] = [] @@ -21,20 +24,27 @@ def fuzzy_find_watchlist_by_name(name: str, bigdata: Bigdata, only_private: bool scored_list.sort(key=lambda x: x.score) return scored_list[0].watchlist -def find_watchlist_by_name(name: str, bigdata: Bigdata, only_private: bool = False) -> Watchlist | None: + +def find_watchlist_by_name( + name: str, bigdata: Bigdata, only_private: bool = False +) -> Watchlist | None: watchlists = bigdata.watchlists.list(owned=only_private) for wl in watchlists: if wl.name.lower() == name.lower(): return wl return None -def create_watchlist(name: str, company_names: list[str], bigdata: Bigdata) -> Watchlist: - """Create a watchlist with the given name from a list of company names. - """ + +def create_watchlist( + name: str, company_names: list[str], bigdata: Bigdata +) -> Watchlist: + """Create a watchlist with the given name from a list of company names.""" if find_watchlist_by_name(name, bigdata, only_private=True): raise ValueError(f"You already have access to a Watchlist with name '{name}'.") entity_list = [] for company_name in company_names: bigdata.knowledge_graph.find_companies(company_name, limit=1) - entity_list.append(bigdata.knowledge_graph.find_companies(company_name, limit=1)[0]) + entity_list.append( + bigdata.knowledge_graph.find_companies(company_name, limit=1)[0] + ) return bigdata.watchlists.create(name=name, items=[e.id for e in entity_list]) diff --git a/src/bigdata_research_tools/workflows/base.py b/src/bigdata_research_tools/workflows/base.py index 6383d4b..524ae7b 100644 --- a/src/bigdata_research_tools/workflows/base.py +++ b/src/bigdata_research_tools/workflows/base.py @@ -1,6 +1,6 @@ from bigdata_research_tools.utils.observer import Observable + class Workflow(Observable): - def __init__(self): - super().__init__() \ No newline at end of file + super().__init__() diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index acf80dd..e7f0e46 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -5,11 +5,11 @@ from pandas import merge from bigdata_research_tools.client import init_bigdata_client -from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.narrative_labeler import NarrativeLabeler from bigdata_research_tools.search import search_narratives from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace +from bigdata_research_tools.workflows.base import Workflow logger: Logger = getLogger(__name__) @@ -92,7 +92,7 @@ def mine_narratives( ) try: # Run a search via BigData API with our mining parameters - self.notify_observers(f"Searching documents for relevant content") + self.notify_observers("Searching documents for relevant content") df_sentences = search_narratives( sentences=self.narrative_sentences, sources=self.sources, @@ -106,9 +106,10 @@ def mine_narratives( current_trace=current_trace, fiscal_year=self.fiscal_year, bigdata_client=bigdata_client, - fiscal_year=self.fiscal_year, ) - self.notify_observers(f"Search completed. {len(df_sentences)} chunks found.") + self.notify_observers( + f"Search completed. {len(df_sentences)} chunks found." + ) self.notify_observers("Labelling search results") # Label the search results with our narrative sentences labeler = NarrativeLabeler(llm_model=self.llm_model) @@ -116,7 +117,9 @@ def mine_narratives( self.narrative_sentences, texts=df_sentences["text"].tolist(), ) - self.notify_observers(f"Labelling completed. {len(df_labels)} labels generated.") + self.notify_observers( + f"Labelling completed. {len(df_labels)} labels generated." + ) self.notify_observers("Post-processing results") # Merge and process results df_labeled = merge( @@ -131,11 +134,11 @@ def mine_narratives( return {} # Export to Excel if path provided if export_path: - self.notify_observers(f"Exporting results to excel") + self.notify_observers("Exporting results to excel") save_to_excel( export_path, tables={"Semantic Labels": (df_labeled, (0, 0))} ) - self.notify_observers(f"Results exported") + self.notify_observers("Results exported") except Exception: execution_result = "error" diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 9a48705..aa5e642 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -12,15 +12,13 @@ from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace from bigdata_research_tools.tree import SemanticTree, generate_risk_tree -from bigdata_research_tools.workflows.utils import get_scored_df from bigdata_research_tools.workflows.base import Workflow - +from bigdata_research_tools.workflows.utils import get_scored_df logger: Logger = getLogger(__name__) class RiskAnalyzer(Workflow): - def __init__( self, llm_model: str, @@ -256,7 +254,7 @@ def save_results( df_company: DataFrame, df_industry: DataFrame, motivation_df: DataFrame, - risk_tree: ThemeTree, + risk_tree: SemanticTree, export_path: str, ): """ @@ -337,22 +335,41 @@ def screen_companies( ) try: - self.notify_observers(f"Generating risk taxonomy") + self.notify_observers("Generating risk taxonomy") risk_tree, risk_summaries, terminal_labels = self.create_taxonomy() - self.notify_observers(f"Risk taxonomy generated with {len(terminal_labels)} leafs") + self.notify_observers( + f"Risk taxonomy generated with {len(terminal_labels)} leafs" + ) self.notify_observers(risk_tree.as_string()) - self.notify_observers(f"Searching companies for risk exposure") + self.notify_observers("Searching companies for risk exposure") df_sentences = self.retrieve_results( sentences=risk_summaries, frequency=frequency, document_limit=document_limit, batch_size=batch_size, ) - self.notify_observers(f"Search completed. {len(df_sentences)} chunks found for {len(self.companies)} companies.") - self.notify_observers(df_sentences[["timestamp_utc", "sentence_id", "headline", "entity_name", "text", "other_entities"]].head(10).to_markdown(index=False)) + self.notify_observers( + f"Search completed. {len(df_sentences)} chunks found for {len(self.companies)} companies." + ) + self.notify_observers( + df_sentences[ + [ + "timestamp_utc", + "sentence_id", + "headline", + "entity_name", + "text", + "other_entities", + ] + ] + .head(10) + .to_markdown(index=False) + ) - self.notify_observers(f"Labelling {len(df_sentences)} chunks with {len(terminal_labels)} risks") + self.notify_observers( + f"Labelling {len(df_sentences)} chunks with {len(terminal_labels)} risks" + ) df, df_labeled = self.label_search_results( df_sentences=df_sentences, terminal_labels=terminal_labels, @@ -363,7 +380,9 @@ def screen_companies( "headline", ], ) - self.notify_observers(f"Labeling completed. {len(df_labeled)} chunks labeled with risk factors.") + self.notify_observers( + f"Labeling completed. {len(df_labeled)} chunks labeled with risk factors." + ) self.notify_observers("Post-processing results") df_company, df_industry, df_motivation = self.generate_results( df_labeled, word_range @@ -371,7 +390,7 @@ def screen_companies( self.notify_observers("Results post-processed") # Export to Excel if path provided if export_path: - self.notify_observers(f"Exporting results to disk") + self.notify_observers("Exporting results to disk") self.save_results( df_labeled, df_company, @@ -380,7 +399,7 @@ def screen_companies( risk_tree, export_path=export_path, ) - self.notify_observers(f"Results exported") + self.notify_observers("Results exported") except Exception as e: execution_result = "error" raise e diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index b390fbe..cb8c3e8 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -5,22 +5,20 @@ from bigdata_client.models.search import DocumentType from pandas import DataFrame, merge -from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel -from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.client import init_bigdata_client +from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace from bigdata_research_tools.tree import generate_theme_tree +from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df - logger: Logger = getLogger(__name__) class ThematicScreener(Workflow): - def __init__( self, llm_model: str, @@ -120,7 +118,7 @@ def screen_companies( try: self.provider, self.model = self.llm_model.split("::") - self.notify_observers(f"Generating thematic tree") + self.notify_observers("Generating thematic tree") theme_tree = generate_theme_tree( main_theme=self.main_theme, focus=self.focus, @@ -129,9 +127,11 @@ def screen_companies( theme_summaries = theme_tree.get_terminal_summaries() terminal_labels = theme_tree.get_terminal_labels() - self.notify_observers(f"Thematic tree generated with {len(terminal_labels)} leafs") + self.notify_observers( + f"Thematic tree generated with {len(terminal_labels)} leafs" + ) self.notify_observers(theme_tree.as_string()) - self.notify_observers(f"Searching companies for thematic exposure") + self.notify_observers("Searching companies for thematic exposure") df_sentences = search_by_companies( companies=self.companies, sentences=theme_summaries, @@ -147,19 +147,36 @@ def screen_companies( current_trace=current_trace, bigdata_client=bigdata_client, ) - self.notify_observers(f"Search completed. {len(df_sentences)} chunks found for {len(self.companies)} companies.") - self.notify_observers(df_sentences[["timestamp_utc", "sentence_id", "headline", "entity_name", "text", "other_entities"]].head(10).to_markdown(index=False)) + self.notify_observers( + f"Search completed. {len(df_sentences)} chunks found for {len(self.companies)} companies." + ) + self.notify_observers( + df_sentences[ + [ + "timestamp_utc", + "sentence_id", + "headline", + "entity_name", + "text", + "other_entities", + ] + ] + .head(10) + .to_markdown(index=False) + ) # Label the search results with our theme labels labeler = ScreenerLabeler(llm_model=self.llm_model) - self.notify_observers(f"Labelling {len(df_sentences)} chunks with {len(terminal_labels)} themes") + self.notify_observers( + f"Labelling {len(df_sentences)} chunks with {len(terminal_labels)} themes" + ) df_labels = labeler.get_labels( main_theme=self.main_theme, labels=terminal_labels, texts=df_sentences["masked_text"].tolist(), ) - self.notify_observers(f"Labelling completed") + self.notify_observers("Labelling completed") # Merge and process results - self.notify_observers(f"Post-processing results") + self.notify_observers("Post-processing results") df = merge(df_sentences, df_labels, left_index=True, right_index=True) df = labeler.post_process_dataframe(df) @@ -172,8 +189,10 @@ def screen_companies( "df_motivation": DataFrame(), "theme_tree": theme_tree, } - self.notify_observers(f"Results post-processed") - self.notify_observers(f"Scoring thematic exposure for {len(df['Company'])} companies") + self.notify_observers("Results post-processed") + self.notify_observers( + f"Scoring thematic exposure for {len(df['Company'])} companies" + ) df_company = get_scored_df( df, index_columns=["Company", "Ticker", "Industry"], @@ -182,17 +201,19 @@ def screen_companies( df_industry = get_scored_df( df, index_columns=["Industry"], pivot_column="Theme" ) - self.notify_observers(f"Thematic exposure scored") - self.notify_observers(f"Generating motivations for {len(df_company)} companies") + self.notify_observers("Thematic exposure scored") + self.notify_observers( + f"Generating motivations for {len(df_company)} companies" + ) motivation_generator = Motivation(model=self.llm_model) motivation_df = motivation_generator.generate_company_motivations( df=df, theme_name=self.main_theme, word_range=word_range ) - self.notify_observers(f"Motivations generated") + self.notify_observers("Motivations generated") # Export to Excel if path provided if export_path: - self.notify_observers(f"Exporting results to excel") + self.notify_observers("Exporting results to excel") save_to_excel( file_path=export_path, tables={ @@ -202,7 +223,7 @@ def screen_companies( "Motivations": (motivation_df, (0, 0)), }, ) - self.notify_observers(f"Results exported.") + self.notify_observers("Results exported.") except Exception: execution_result = "error" raise diff --git a/src/bigdata_research_tools/workflows/utils.py b/src/bigdata_research_tools/workflows/utils.py index ebb71d2..bff3829 100644 --- a/src/bigdata_research_tools/workflows/utils.py +++ b/src/bigdata_research_tools/workflows/utils.py @@ -4,14 +4,14 @@ from typing import List +from IPython.display import HTML, display from pandas import DataFrame -from IPython.display import display, HTML def display_output_chunks_dataframe(final_df): """ Display selected document chunks in a formatted HTML view for better readability. - + Args: final_df: DataFrame containing semantic labels with document chunks """ @@ -24,13 +24,16 @@ def display_output_chunks_dataframe(final_df): output_lines.append(f"Industry: {element.Industry}
") output_lines.append(f"Date: {element.Date}
") output_lines.append(f"Headline: {element.Headline}
") - output_lines.append(f"Sentence Identifier: {element['Document ID']}
") + output_lines.append( + f"Sentence Identifier: {element['Document ID']}
" + ) output_lines.append(f"Quote: {element.Quote}
") output_lines.append(f"Sub-Theme Label: {element.Theme}
") output_lines.append("--------------------
") # Join all lines into a single string and display it - display(HTML(''.join(output_lines))) + display(HTML("".join(output_lines))) + def get_scored_df( df: DataFrame, index_columns: List[str], pivot_column: str diff --git a/tests/test_llm/test_base.py b/tests/test_llm/test_base.py index 5f7a9ca..baddc32 100644 --- a/tests/test_llm/test_base.py +++ b/tests/test_llm/test_base.py @@ -1,16 +1,20 @@ import pytest -from unittest.mock import AsyncMock + from bigdata_research_tools.llm.base import AsyncLLMProvider + class DummyAsyncLLMProvider(AsyncLLMProvider): async def get_response(self, chat_history, **kwargs): return "dummy response" + async def get_tools_response(self, chat_history, tools, temperature=0, **kwargs): return {"func_names": ["dummy_func"], "arguments": [{}], "text": "dummy text"} + async def get_stream_response(self, chat_history, **kwargs): for chunk in ["chunk1", "chunk2"]: yield chunk + @pytest.mark.asyncio async def test_get_response(): provider = DummyAsyncLLMProvider(model="dummy-model") @@ -18,6 +22,7 @@ async def test_get_response(): response = await provider.get_response(chat_history) assert response == "dummy response" + @pytest.mark.asyncio async def test_get_tools_response(): provider = DummyAsyncLLMProvider(model="dummy-model") @@ -27,6 +32,7 @@ async def test_get_tools_response(): assert response["func_names"] == ["dummy_func"] assert response["text"] == "dummy text" + @pytest.mark.asyncio async def test_get_stream_response(): provider = DummyAsyncLLMProvider(model="dummy-model") diff --git a/tests/test_llm/test_bedrock.py b/tests/test_llm/test_bedrock.py index 3bfc7d1..fba1b91 100644 --- a/tests/test_llm/test_bedrock.py +++ b/tests/test_llm/test_bedrock.py @@ -1,32 +1,43 @@ +from unittest.mock import MagicMock, patch import pytest -from unittest.mock import patch, MagicMock, AsyncMock + from bigdata_research_tools.llm.bedrock import AsyncBedrockProvider + @pytest.mark.asyncio -@patch('bigdata_research_tools.llm.bedrock.Session') +@patch("bigdata_research_tools.llm.bedrock.Session") async def test_get_response(mock_session): mock_bedrock_client = MagicMock() mock_bedrock_client.converse.return_value = { "output": {"message": {"content": [{"text": "mocked bedrock response"}]}} } - mock_session.return_value = MagicMock(client=MagicMock(return_value=mock_bedrock_client)) + mock_session.return_value = MagicMock( + client=MagicMock(return_value=mock_bedrock_client) + ) provider = AsyncBedrockProvider(model="bedrock-model", region="us-east-1") chat_history = [{"role": "user", "content": "Hello"}] response = await provider.get_response(chat_history) assert response == "mocked bedrock response" + @pytest.mark.asyncio -@patch('bigdata_research_tools.llm.bedrock.Session') +@patch("bigdata_research_tools.llm.bedrock.Session") async def test_get_tools_response(mock_session): mock_bedrock_client = MagicMock() mock_bedrock_client.converse.return_value = { - "output": {"message": {"content": [ - {"toolUse": {"name": "tool1", "input": {"arg": "val"}}}, - {"text": "tool response text"} - ]}} + "output": { + "message": { + "content": [ + {"toolUse": {"name": "tool1", "input": {"arg": "val"}}}, + {"text": "tool response text"}, + ] + } + } } - mock_session.return_value = MagicMock(client=MagicMock(return_value=mock_bedrock_client)) + mock_session.return_value = MagicMock( + client=MagicMock(return_value=mock_bedrock_client) + ) provider = AsyncBedrockProvider(model="bedrock-model", region="us-east-1") chat_history = [{"role": "user", "content": "Use tool"}] tools = [{"name": "tool1"}] @@ -35,10 +46,11 @@ async def test_get_tools_response(mock_session): assert response["func_names"] == ["tool1"] assert response["arguments"] == [{"arg": "val"}] + @pytest.mark.asyncio -@patch('bigdata_research_tools.llm.bedrock.Session') +@patch("bigdata_research_tools.llm.bedrock.Session") async def test_get_stream_response_not_implemented(mock_session): provider = AsyncBedrockProvider(model="bedrock-model", region="us-east-1") chat_history = [{"role": "user", "content": "Stream"}] with pytest.raises(NotImplementedError): - await provider.get_stream_response(chat_history) \ No newline at end of file + await provider.get_stream_response(chat_history) diff --git a/tests/test_llm/test_init.py b/tests/test_llm/test_init.py index 0305ce3..9fe0303 100644 --- a/tests/test_llm/test_init.py +++ b/tests/test_llm/test_init.py @@ -1,5 +1,6 @@ from bigdata_research_tools.llm import AsyncLLMEngine, LLMEngine + def test_imports(): assert AsyncLLMEngine is not None assert LLMEngine is not None diff --git a/tests/test_llm/test_openai.py b/tests/test_llm/test_openai.py index 2e5421b..b9a5183 100644 --- a/tests/test_llm/test_openai.py +++ b/tests/test_llm/test_openai.py @@ -1,9 +1,12 @@ +from unittest.mock import AsyncMock, MagicMock, patch + import pytest -from unittest.mock import patch, MagicMock, AsyncMock + from bigdata_research_tools.llm.openai import AsyncOpenAIProvider + @pytest.mark.asyncio -@patch('bigdata_research_tools.llm.openai.AsyncOpenAI') +@patch("bigdata_research_tools.llm.openai.AsyncOpenAI") async def test_get_response(mock_async_openai): mock_client = AsyncMock() mock_client.chat.completions.create.return_value = MagicMock( @@ -15,8 +18,9 @@ async def test_get_response(mock_async_openai): response = await provider.get_response(chat_history) assert response == "mocked response" + @pytest.mark.asyncio -@patch('bigdata_research_tools.llm.openai.AsyncOpenAI') +@patch("bigdata_research_tools.llm.openai.AsyncOpenAI") async def test_get_tools_response(mock_async_openai): mock_client = AsyncMock() mock_client.chat.completions.create.return_value = MagicMock( @@ -29,12 +33,15 @@ async def test_get_tools_response(mock_async_openai): response = await provider.get_tools_response(chat_history, tools) assert isinstance(response, dict) + @pytest.mark.asyncio -@patch('bigdata_research_tools.llm.openai.AsyncOpenAI') +@patch("bigdata_research_tools.llm.openai.AsyncOpenAI") async def test_get_stream_response(mock_async_openai): mock_client = AsyncMock() mock_stream = AsyncMock() - mock_stream.__aiter__.return_value = [MagicMock(choices=[MagicMock(delta=MagicMock(content="streamed"))])] + mock_stream.__aiter__.return_value = [ + MagicMock(choices=[MagicMock(delta=MagicMock(content="streamed"))]) + ] mock_client.chat.completions.create.return_value = mock_stream mock_async_openai.return_value = mock_client provider = AsyncOpenAIProvider(model="gpt-3.5-turbo") diff --git a/tests/test_llm/test_utils.py b/tests/test_llm/test_utils.py index 0c472d2..ff4c10c 100644 --- a/tests/test_llm/test_utils.py +++ b/tests/test_llm/test_utils.py @@ -1,11 +1,11 @@ -import pytest -from unittest.mock import MagicMock, AsyncMock from bigdata_research_tools.llm.utils import run_concurrent_prompts + class DummyAsyncLLMEngine: async def get_response(self, chat_history, **kwargs): return "dummy response" + def test_run_concurrent_prompts(monkeypatch): engine = DummyAsyncLLMEngine() prompts = ["prompt1", "prompt2"] diff --git a/tests/test_utils/test_distance.py b/tests/test_utils/test_distance.py index 0c9d538..70d3ff3 100644 --- a/tests/test_utils/test_distance.py +++ b/tests/test_utils/test_distance.py @@ -1,27 +1,29 @@ import pytest + from src.bigdata_research_tools.utils.distance import levenshtein_distance + @pytest.mark.parametrize( - "a,b,expected", - [ - # Basic - ("kitten", "sitting", 3), - ("flaw", "lawn", 2), - ("gumbo", "gambol", 2), - ("book", "back", 2), - # Empty strings - ("", "", 0), - ("abc", "", 3), - ("", "abc", 3), - # Identical strings - ("test", "test", 0), - ("a", "a", 0), - # Case sensitivity - ("Test", "test", 1), - # Unicode characters - ("café", "cafe", 1), - ("mañana", "manana", 1), - ] + "a,b,expected", + [ + # Basic + ("kitten", "sitting", 3), + ("flaw", "lawn", 2), + ("gumbo", "gambol", 2), + ("book", "back", 2), + # Empty strings + ("", "", 0), + ("abc", "", 3), + ("", "abc", 3), + # Identical strings + ("test", "test", 0), + ("a", "a", 0), + # Case sensitivity + ("Test", "test", 1), + # Unicode characters + ("café", "cafe", 1), + ("mañana", "manana", 1), + ], ) def test_levenshtein_distance(a, b, expected): - assert levenshtein_distance(a, b) == expected + assert levenshtein_distance(a, b) == expected diff --git a/tests/test_utils/test_observer.py b/tests/test_utils/test_observer.py index e835e13..18d660d 100644 --- a/tests/test_utils/test_observer.py +++ b/tests/test_utils/test_observer.py @@ -1,27 +1,35 @@ -import pytest from datetime import datetime -from bigdata_research_tools.utils.observer import Observer, Observable, OberserverNotification + +from bigdata_research_tools.utils.observer import ( + OberserverNotification, + Observable, + Observer, +) + class TestObserver(Observer): def __init__(self): self.notifications = [] + def update(self, message: OberserverNotification): self.notifications.append(message) + def test_observer_notification_model(): ts = datetime.now().isoformat() - msg = {'foo': 'bar'} + msg = {"foo": "bar"} notification = OberserverNotification(timestamp=ts, message=msg) assert notification.timestamp == ts assert notification.message == msg + def test_observable_register_and_notify(): observable = Observable() observer1 = TestObserver() observer2 = TestObserver() observable.register_observer(observer1) observable.register_observer(observer2) - message = 'test message' + message = "test message" observable.notify_observers(message) assert len(observer1.notifications) == 1 assert len(observer2.notifications) == 1 @@ -29,12 +37,13 @@ def test_observable_register_and_notify(): assert observer2.notifications[0].message == message assert isinstance(observer1.notifications[0].timestamp, str) + def test_observable_unregister(): observable = Observable() observer = TestObserver() observable.register_observer(observer) - observable.notify_observers('first message') + observable.notify_observers("first message") assert len(observer.notifications) == 1 observable.unregister_observer(observer) - observable.notify_observers('should not be received') - assert len(observer.notifications) == 1 # No new notifications after unregistering + observable.notify_observers("should not be received") + assert len(observer.notifications) == 1 # No new notifications after unregistering From 134377e2dbba4977f1c71acbeee353d90ee93e3f Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 12:10:28 +0200 Subject: [PATCH 15/82] Add CI pipeline for formatting, linting and running unit test --- .github/workflows/test.yml | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..21c4c08 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,48 @@ +name: Lint, format and run unit tests +on: + push: + branches: + - master + pull_request: + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + - name: Install uv + uses: astral-sh/setup-uv@v6 + - name: Install dependencies + run: uv sync --locked --dev +# Disable type checking for now, the project is not mature enough to pass all checks +# - name: Type check the code +# run: make type-check + - name: Lint the code + run: make lint-check + - name: Format the code + run: make format + unit-tests: + runs-on: ubuntu-latest + needs: lint + strategy: + matrix: + python-version: ["3.11", "3.12", "3.13"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: astral-sh/setup-uv@v6 + with: + python-version: ${{ matrix.python-version }} + enable-cache: true + - name: Install dependencies + run: uv sync --locked --dev + - name: Test with pytest + run: make tests \ No newline at end of file From 178e8bd2674b9b8185f14fc9172ccebbdec52fb1 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 12:46:07 +0200 Subject: [PATCH 16/82] Ensure all non-optional dependencies are accuratly classified --- CHANGELOG.md | 1 + Makefile | 2 +- pyproject.toml | 22 +- src/bigdata_research_tools/tree.py | 11 +- uv.lock | 933 +++++------------------------ 5 files changed, 141 insertions(+), 828 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 601cbbe..d0e1714 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Preparation for a first stable release. ### Fixed - Changed build system to use `uv_build` instead of `setuptools` to avoid issues with package data inclusion. +- Fix duplicate dependencies in main vs optional dependencies. `openai` is now only optional while `graphviz`, `openpyxl` and `Pillow` is now only in main dependencies. ## [0.20.1] - 2025-09-16 diff --git a/Makefile b/Makefile index 9601be8..db0688e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: tests lint format tests: - @uv run -m pytest --cov --cov-config=.coveragerc --cov-report term --cov-report xml:./coverage-reports/coverage.xml -s tests/* + @uv run -m pytest --cov --cov-report term --cov-report xml:./coverage-reports/coverage.xml -s tests/* lint: @uvx ruff check --extend-select I --fix src/bigdata_research_tools/ tests/ diff --git a/pyproject.toml b/pyproject.toml index 53936c6..1382174 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,35 +10,20 @@ dependencies = [ "pandas>=2.2.3,<3.0.0", "openpyxl>=3.1.5,<4.0.0", "pillow>=11.1.0,<12.0.0", - "openai>=1.61.1,<2.0.0", "graphviz>=0.20.3,<0.21.0", "tqdm>=4.67.1", "ipython>=8.0.0,<9.0.0", "json-repair>=0.44.1", "tabulate>=0.9.0,<1.0.0", + "plotly>=6.0.0,<7.0.0", ] [project.urls] homepage = "https://bigdata.com/api" [project.optional-dependencies] -excel = ["openpyxl>=3.1.5,<4.0.0", "pillow>=11.1.0,<12.0.0"] openai = ["openai>=1.61.1,<2.0.0"] bedrock = ["boto3>=1.24.0,<2.0.0"] -plotly = ["plotly>=6.0.0,<7.0.0"] -graphviz = ["graphviz>=0.20.3,<0.21.0"] - -docs = [ - "Sphinx>=7.2.6", - "autodoc-pydantic>=2.0.1", - "myst-parser>=2.0.0", - "furo>=2024.1.29", - "sphinxcontrib-spelling>=8.0.0", - "sphinx-new-tab-link>=0.6.0", - "sphinx-copybutton>=0.5.2", - "sphinx-reredirects>=0.1.5", - "sphinx-togglebutton>=0.3.2", -] azure = [ "azure-identity>=1.24.0", "openai>=1.61.1,<2.0.0", @@ -47,12 +32,11 @@ azure = [ [dependency-groups] dev = [ "pytest>=8.4", + "pytest-cov>=7.0.0", "pytest-asyncio>=0.20.0", - "black>=25.1.0", - "isort>=6.0.0", "pre-commit>=4.1.0", ] [build-system] requires = ["uv_build>=0.8.22,<0.9.0"] -build-backend = "uv_build" \ No newline at end of file +build-backend = "uv_build" diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index 413d04b..4e92ade 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional +import graphviz from json_repair import repair_json from pandas import DataFrame @@ -196,21 +197,13 @@ def visualize(self, engine: str = "graphviz") -> None: f"Supported engines are 'graphviz' and 'plotly'." ) - def _visualize_graphviz(self) -> "graphviz.Digraph": + def _visualize_graphviz(self) -> graphviz.Digraph: """ Auxiliary function to visualize the tree using Graphviz. Returns: A Graphviz Digraph object for rendering the mindmap. """ - try: - import graphviz - except ImportError: - raise ImportError( - "Missing optional dependency for tree visualization, " - "please install `bigdata_research_tools[graphviz]` to enable them." - ) - mindmap = graphviz.Digraph() # Set direction to left-right diff --git a/uv.lock b/uv.lock index 7e81d07..e4b3ba6 100644 --- a/uv.lock +++ b/uv.lock @@ -8,18 +8,6 @@ resolution-markers = [ "python_full_version < '3.10'", ] -[[package]] -name = "accessible-pygments" -version = "0.0.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/bc/c1/bbac6a50d02774f91572938964c582fff4270eee73ab822a4aeea4d8b11b/accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872", size = 1377899, upload-time = "2024-05-10T11:23:10.216Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" }, -] - [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -145,32 +133,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] -[[package]] -name = "alabaster" -version = "0.7.16" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776, upload-time = "2024-01-10T00:56:10.189Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/34/d4e1c02d3bee589efb5dfa17f88ea08bdb3e3eac12bc475462aec52ed223/alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92", size = 13511, upload-time = "2024-01-10T00:56:08.388Z" }, -] - -[[package]] -name = "alabaster" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] -sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -222,21 +184,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] -[[package]] -name = "autodoc-pydantic" -version = "2.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/df/87120e2195f08d760bc5cf8a31cfa2381a6887517aa89453b23f1ae3354f/autodoc_pydantic-2.2.0-py3-none-any.whl", hash = "sha256:8c6a36fbf6ed2700ea9c6d21ea76ad541b621fbdf16b5a80ee04673548af4d95", size = 34001, upload-time = "2024-04-27T10:57:00.542Z" }, -] - [[package]] name = "azure-core" version = "1.35.0" @@ -267,15 +214,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/74/17428cb429e8d52f6d0d69ed685f4760a545cb0156594963a9337b53b6c9/azure_identity-1.24.0-py3-none-any.whl", hash = "sha256:9e04997cde0ab02ed66422c74748548e620b7b29361c72ce622acab0267ff7c4", size = 187890, upload-time = "2025-08-07T22:27:38.033Z" }, ] -[[package]] -name = "babel" -version = "2.17.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" }, -] - [[package]] name = "backports-asyncio-runner" version = "1.2.0" @@ -285,19 +223,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" }, ] -[[package]] -name = "beautifulsoup4" -version = "4.13.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "soupsieve" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/2e/3e5079847e653b1f6dc647aa24549d68c6addb4c595cc0d902d1b19308ad/beautifulsoup4-4.13.5.tar.gz", hash = "sha256:5e70131382930e7c3de33450a2f54a63d5e4b19386eab43a5b34d594268f3695", size = 622954, upload-time = "2025-08-24T14:06:13.168Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/eb/f4151e0c7377a6e08a38108609ba5cede57986802757848688aeedd1b9e8/beautifulsoup4-4.13.5-py3-none-any.whl", hash = "sha256:642085eaa22233aceadff9c69651bc51e8bf3f874fb6d7104ece2beb24b47c4a", size = 105113, upload-time = "2025-08-24T14:06:14.884Z" }, -] - [[package]] name = "bigdata-client" version = "2.18.2" @@ -327,10 +252,10 @@ dependencies = [ { name = "ipython", version = "8.37.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "json-repair", version = "0.44.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "json-repair", version = "0.50.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "openai" }, { name = "openpyxl" }, { name = "pandas" }, { name = "pillow" }, + { name = "plotly" }, { name = "tabulate" }, { name = "tqdm" }, ] @@ -343,123 +268,43 @@ azure = [ bedrock = [ { name = "boto3" }, ] -docs = [ - { name = "autodoc-pydantic" }, - { name = "furo" }, - { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "myst-parser", version = "4.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "sphinx-copybutton" }, - { name = "sphinx-new-tab-link" }, - { name = "sphinx-reredirects", version = "0.1.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "sphinx-reredirects", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "sphinx-togglebutton" }, - { name = "sphinxcontrib-spelling", version = "8.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-spelling", version = "8.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, -] -excel = [ - { name = "openpyxl" }, - { name = "pillow" }, -] -graphviz = [ - { name = "graphviz" }, -] openai = [ { name = "openai" }, ] -plotly = [ - { name = "plotly" }, -] [package.dev-dependencies] dev = [ - { name = "black" }, - { name = "isort" }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-asyncio" }, + { name = "pytest-cov" }, ] [package.metadata] requires-dist = [ - { name = "autodoc-pydantic", marker = "extra == 'docs'", specifier = ">=2.0.1" }, { name = "azure-identity", marker = "extra == 'azure'", specifier = ">=1.24.0" }, { name = "bigdata-client", specifier = ">=2.15.0" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.24.0,<2.0.0" }, - { name = "furo", marker = "extra == 'docs'", specifier = ">=2024.1.29" }, { name = "graphviz", specifier = ">=0.20.3,<0.21.0" }, - { name = "graphviz", marker = "extra == 'graphviz'", specifier = ">=0.20.3,<0.21.0" }, { name = "ipython", specifier = ">=8.0.0,<9.0.0" }, { name = "json-repair", specifier = ">=0.44.1" }, - { name = "myst-parser", marker = "extra == 'docs'", specifier = ">=2.0.0" }, - { name = "openai", specifier = ">=1.61.1,<2.0.0" }, { name = "openai", marker = "extra == 'azure'", specifier = ">=1.61.1,<2.0.0" }, { name = "openai", marker = "extra == 'openai'", specifier = ">=1.61.1,<2.0.0" }, { name = "openpyxl", specifier = ">=3.1.5,<4.0.0" }, - { name = "openpyxl", marker = "extra == 'excel'", specifier = ">=3.1.5,<4.0.0" }, { name = "pandas", specifier = ">=2.2.3,<3.0.0" }, { name = "pillow", specifier = ">=11.1.0,<12.0.0" }, - { name = "pillow", marker = "extra == 'excel'", specifier = ">=11.1.0,<12.0.0" }, - { name = "plotly", marker = "extra == 'plotly'", specifier = ">=6.0.0,<7.0.0" }, - { name = "sphinx", marker = "extra == 'docs'", specifier = ">=7.2.6" }, - { name = "sphinx-copybutton", marker = "extra == 'docs'", specifier = ">=0.5.2" }, - { name = "sphinx-new-tab-link", marker = "extra == 'docs'", specifier = ">=0.6.0" }, - { name = "sphinx-reredirects", marker = "extra == 'docs'", specifier = ">=0.1.5" }, - { name = "sphinx-togglebutton", marker = "extra == 'docs'", specifier = ">=0.3.2" }, - { name = "sphinxcontrib-spelling", marker = "extra == 'docs'", specifier = ">=8.0.0" }, + { name = "plotly", specifier = ">=6.0.0,<7.0.0" }, { name = "tabulate", specifier = ">=0.9.0,<1.0.0" }, { name = "tqdm", specifier = ">=4.67.1" }, ] -provides-extras = ["excel", "openai", "bedrock", "plotly", "graphviz", "docs", "azure"] +provides-extras = ["openai", "bedrock", "azure"] [package.metadata.requires-dev] dev = [ - { name = "black", specifier = ">=25.1.0" }, - { name = "isort", specifier = ">=6.0.0" }, { name = "pre-commit", specifier = ">=4.1.0" }, { name = "pytest", specifier = ">=8.4" }, { name = "pytest-asyncio", specifier = ">=0.20.0" }, -] - -[[package]] -name = "black" -version = "25.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "click", version = "8.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/3b/4ba3f93ac8d90410423fdd31d7541ada9bcee1df32fb90d26de41ed40e1d/black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32", size = 1629419, upload-time = "2025-01-29T05:37:06.642Z" }, - { url = "https://files.pythonhosted.org/packages/b4/02/0bde0485146a8a5e694daed47561785e8b77a0466ccc1f3e485d5ef2925e/black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da", size = 1461080, upload-time = "2025-01-29T05:37:09.321Z" }, - { url = "https://files.pythonhosted.org/packages/52/0e/abdf75183c830eaca7589144ff96d49bce73d7ec6ad12ef62185cc0f79a2/black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7", size = 1766886, upload-time = "2025-01-29T04:18:24.432Z" }, - { url = "https://files.pythonhosted.org/packages/dc/a6/97d8bb65b1d8a41f8a6736222ba0a334db7b7b77b8023ab4568288f23973/black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9", size = 1419404, upload-time = "2025-01-29T04:19:04.296Z" }, - { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" }, - { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" }, - { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" }, - { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" }, - { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, - { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, - { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, - { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, - { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" }, - { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" }, - { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" }, - { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" }, - { url = "https://files.pythonhosted.org/packages/d3/b6/ae7507470a4830dbbfe875c701e84a4a5fb9183d1497834871a715716a92/black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0", size = 1628593, upload-time = "2025-01-29T05:37:23.672Z" }, - { url = "https://files.pythonhosted.org/packages/24/c1/ae36fa59a59f9363017ed397750a0cd79a470490860bc7713967d89cdd31/black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f", size = 1460000, upload-time = "2025-01-29T05:37:25.829Z" }, - { url = "https://files.pythonhosted.org/packages/ac/b6/98f832e7a6c49aa3a464760c67c7856363aa644f2f3c74cf7d624168607e/black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e", size = 1765963, upload-time = "2025-01-29T04:18:38.116Z" }, - { url = "https://files.pythonhosted.org/packages/ce/e9/2cb0a017eb7024f70e0d2e9bdb8c5a5b078c5740c7f8816065d06f04c557/black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355", size = 1419419, upload-time = "2025-01-29T04:18:30.191Z" }, - { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, + { name = "pytest-cov", specifier = ">=7.0.0" }, ] [[package]] @@ -654,44 +499,128 @@ wheels = [ ] [[package]] -name = "click" -version = "8.1.8" +name = "colorama" +version = "0.4.6" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] [[package]] -name = "click" -version = "8.2.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] -dependencies = [ - { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" }, +name = "coverage" +version = "7.10.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/26/d22c300112504f5f9a9fd2297ce33c35f3d353e4aeb987c8419453b2a7c2/coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239", size = 827704, upload-time = "2025-09-21T20:03:56.815Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/6c/3a3f7a46888e69d18abe3ccc6fe4cb16cccb1e6a2f99698931dafca489e6/coverage-7.10.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc04cc7a3db33664e0c2d10eb8990ff6b3536f6842c9590ae8da4c614b9ed05a", size = 217987, upload-time = "2025-09-21T20:00:57.218Z" }, + { url = "https://files.pythonhosted.org/packages/03/94/952d30f180b1a916c11a56f5c22d3535e943aa22430e9e3322447e520e1c/coverage-7.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e201e015644e207139f7e2351980feb7040e6f4b2c2978892f3e3789d1c125e5", size = 218388, upload-time = "2025-09-21T20:01:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/50/2b/9e0cf8ded1e114bcd8b2fd42792b57f1c4e9e4ea1824cde2af93a67305be/coverage-7.10.7-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:240af60539987ced2c399809bd34f7c78e8abe0736af91c3d7d0e795df633d17", size = 245148, upload-time = "2025-09-21T20:01:01.768Z" }, + { url = "https://files.pythonhosted.org/packages/19/20/d0384ac06a6f908783d9b6aa6135e41b093971499ec488e47279f5b846e6/coverage-7.10.7-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8421e088bc051361b01c4b3a50fd39a4b9133079a2229978d9d30511fd05231b", size = 246958, upload-time = "2025-09-21T20:01:03.355Z" }, + { url = "https://files.pythonhosted.org/packages/60/83/5c283cff3d41285f8eab897651585db908a909c572bdc014bcfaf8a8b6ae/coverage-7.10.7-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6be8ed3039ae7f7ac5ce058c308484787c86e8437e72b30bf5e88b8ea10f3c87", size = 248819, upload-time = "2025-09-21T20:01:04.968Z" }, + { url = "https://files.pythonhosted.org/packages/60/22/02eb98fdc5ff79f423e990d877693e5310ae1eab6cb20ae0b0b9ac45b23b/coverage-7.10.7-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e28299d9f2e889e6d51b1f043f58d5f997c373cc12e6403b90df95b8b047c13e", size = 245754, upload-time = "2025-09-21T20:01:06.321Z" }, + { url = "https://files.pythonhosted.org/packages/b4/bc/25c83bcf3ad141b32cd7dc45485ef3c01a776ca3aa8ef0a93e77e8b5bc43/coverage-7.10.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e16bd7761c5e454f4efd36f345286d6f7c5fa111623c355691e2755cae3b9e", size = 246860, upload-time = "2025-09-21T20:01:07.605Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b7/95574702888b58c0928a6e982038c596f9c34d52c5e5107f1eef729399b5/coverage-7.10.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b1c81d0e5e160651879755c9c675b974276f135558cf4ba79fee7b8413a515df", size = 244877, upload-time = "2025-09-21T20:01:08.829Z" }, + { url = "https://files.pythonhosted.org/packages/47/b6/40095c185f235e085df0e0b158f6bd68cc6e1d80ba6c7721dc81d97ec318/coverage-7.10.7-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:606cc265adc9aaedcc84f1f064f0e8736bc45814f15a357e30fca7ecc01504e0", size = 245108, upload-time = "2025-09-21T20:01:10.527Z" }, + { url = "https://files.pythonhosted.org/packages/c8/50/4aea0556da7a4b93ec9168420d170b55e2eb50ae21b25062513d020c6861/coverage-7.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:10b24412692df990dbc34f8fb1b6b13d236ace9dfdd68df5b28c2e39cafbba13", size = 245752, upload-time = "2025-09-21T20:01:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/6a/28/ea1a84a60828177ae3b100cb6723838523369a44ec5742313ed7db3da160/coverage-7.10.7-cp310-cp310-win32.whl", hash = "sha256:b51dcd060f18c19290d9b8a9dd1e0181538df2ce0717f562fff6cf74d9fc0b5b", size = 220497, upload-time = "2025-09-21T20:01:13.459Z" }, + { url = "https://files.pythonhosted.org/packages/fc/1a/a81d46bbeb3c3fd97b9602ebaa411e076219a150489bcc2c025f151bd52d/coverage-7.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:3a622ac801b17198020f09af3eaf45666b344a0d69fc2a6ffe2ea83aeef1d807", size = 221392, upload-time = "2025-09-21T20:01:14.722Z" }, + { url = "https://files.pythonhosted.org/packages/d2/5d/c1a17867b0456f2e9ce2d8d4708a4c3a089947d0bec9c66cdf60c9e7739f/coverage-7.10.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a609f9c93113be646f44c2a0256d6ea375ad047005d7f57a5c15f614dc1b2f59", size = 218102, upload-time = "2025-09-21T20:01:16.089Z" }, + { url = "https://files.pythonhosted.org/packages/54/f0/514dcf4b4e3698b9a9077f084429681bf3aad2b4a72578f89d7f643eb506/coverage-7.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:65646bb0359386e07639c367a22cf9b5bf6304e8630b565d0626e2bdf329227a", size = 218505, upload-time = "2025-09-21T20:01:17.788Z" }, + { url = "https://files.pythonhosted.org/packages/20/f6/9626b81d17e2a4b25c63ac1b425ff307ecdeef03d67c9a147673ae40dc36/coverage-7.10.7-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5f33166f0dfcce728191f520bd2692914ec70fac2713f6bf3ce59c3deacb4699", size = 248898, upload-time = "2025-09-21T20:01:19.488Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ef/bd8e719c2f7417ba03239052e099b76ea1130ac0cbb183ee1fcaa58aaff3/coverage-7.10.7-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:35f5e3f9e455bb17831876048355dca0f758b6df22f49258cb5a91da23ef437d", size = 250831, upload-time = "2025-09-21T20:01:20.817Z" }, + { url = "https://files.pythonhosted.org/packages/a5/b6/bf054de41ec948b151ae2b79a55c107f5760979538f5fb80c195f2517718/coverage-7.10.7-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4da86b6d62a496e908ac2898243920c7992499c1712ff7c2b6d837cc69d9467e", size = 252937, upload-time = "2025-09-21T20:01:22.171Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e5/3860756aa6f9318227443c6ce4ed7bf9e70bb7f1447a0353f45ac5c7974b/coverage-7.10.7-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:6b8b09c1fad947c84bbbc95eca841350fad9cbfa5a2d7ca88ac9f8d836c92e23", size = 249021, upload-time = "2025-09-21T20:01:23.907Z" }, + { url = "https://files.pythonhosted.org/packages/26/0f/bd08bd042854f7fd07b45808927ebcce99a7ed0f2f412d11629883517ac2/coverage-7.10.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4376538f36b533b46f8971d3a3e63464f2c7905c9800db97361c43a2b14792ab", size = 250626, upload-time = "2025-09-21T20:01:25.721Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a7/4777b14de4abcc2e80c6b1d430f5d51eb18ed1d75fca56cbce5f2db9b36e/coverage-7.10.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:121da30abb574f6ce6ae09840dae322bef734480ceafe410117627aa54f76d82", size = 248682, upload-time = "2025-09-21T20:01:27.105Z" }, + { url = "https://files.pythonhosted.org/packages/34/72/17d082b00b53cd45679bad682fac058b87f011fd8b9fe31d77f5f8d3a4e4/coverage-7.10.7-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:88127d40df529336a9836870436fc2751c339fbaed3a836d42c93f3e4bd1d0a2", size = 248402, upload-time = "2025-09-21T20:01:28.629Z" }, + { url = "https://files.pythonhosted.org/packages/81/7a/92367572eb5bdd6a84bfa278cc7e97db192f9f45b28c94a9ca1a921c3577/coverage-7.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ba58bbcd1b72f136080c0bccc2400d66cc6115f3f906c499013d065ac33a4b61", size = 249320, upload-time = "2025-09-21T20:01:30.004Z" }, + { url = "https://files.pythonhosted.org/packages/2f/88/a23cc185f6a805dfc4fdf14a94016835eeb85e22ac3a0e66d5e89acd6462/coverage-7.10.7-cp311-cp311-win32.whl", hash = "sha256:972b9e3a4094b053a4e46832b4bc829fc8a8d347160eb39d03f1690316a99c14", size = 220536, upload-time = "2025-09-21T20:01:32.184Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ef/0b510a399dfca17cec7bc2f05ad8bd78cf55f15c8bc9a73ab20c5c913c2e/coverage-7.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:a7b55a944a7f43892e28ad4bc0561dfd5f0d73e605d1aa5c3c976b52aea121d2", size = 221425, upload-time = "2025-09-21T20:01:33.557Z" }, + { url = "https://files.pythonhosted.org/packages/51/7f/023657f301a276e4ba1850f82749bc136f5a7e8768060c2e5d9744a22951/coverage-7.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:736f227fb490f03c6488f9b6d45855f8e0fd749c007f9303ad30efab0e73c05a", size = 220103, upload-time = "2025-09-21T20:01:34.929Z" }, + { url = "https://files.pythonhosted.org/packages/13/e4/eb12450f71b542a53972d19117ea5a5cea1cab3ac9e31b0b5d498df1bd5a/coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417", size = 218290, upload-time = "2025-09-21T20:01:36.455Z" }, + { url = "https://files.pythonhosted.org/packages/37/66/593f9be12fc19fb36711f19a5371af79a718537204d16ea1d36f16bd78d2/coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973", size = 218515, upload-time = "2025-09-21T20:01:37.982Z" }, + { url = "https://files.pythonhosted.org/packages/66/80/4c49f7ae09cafdacc73fbc30949ffe77359635c168f4e9ff33c9ebb07838/coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c", size = 250020, upload-time = "2025-09-21T20:01:39.617Z" }, + { url = "https://files.pythonhosted.org/packages/a6/90/a64aaacab3b37a17aaedd83e8000142561a29eb262cede42d94a67f7556b/coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7", size = 252769, upload-time = "2025-09-21T20:01:41.341Z" }, + { url = "https://files.pythonhosted.org/packages/98/2e/2dda59afd6103b342e096f246ebc5f87a3363b5412609946c120f4e7750d/coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6", size = 253901, upload-time = "2025-09-21T20:01:43.042Z" }, + { url = "https://files.pythonhosted.org/packages/53/dc/8d8119c9051d50f3119bb4a75f29f1e4a6ab9415cd1fa8bf22fcc3fb3b5f/coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59", size = 250413, upload-time = "2025-09-21T20:01:44.469Z" }, + { url = "https://files.pythonhosted.org/packages/98/b3/edaff9c5d79ee4d4b6d3fe046f2b1d799850425695b789d491a64225d493/coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b", size = 251820, upload-time = "2025-09-21T20:01:45.915Z" }, + { url = "https://files.pythonhosted.org/packages/11/25/9a0728564bb05863f7e513e5a594fe5ffef091b325437f5430e8cfb0d530/coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a", size = 249941, upload-time = "2025-09-21T20:01:47.296Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fd/ca2650443bfbef5b0e74373aac4df67b08180d2f184b482c41499668e258/coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb", size = 249519, upload-time = "2025-09-21T20:01:48.73Z" }, + { url = "https://files.pythonhosted.org/packages/24/79/f692f125fb4299b6f963b0745124998ebb8e73ecdfce4ceceb06a8c6bec5/coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1", size = 251375, upload-time = "2025-09-21T20:01:50.529Z" }, + { url = "https://files.pythonhosted.org/packages/5e/75/61b9bbd6c7d24d896bfeec57acba78e0f8deac68e6baf2d4804f7aae1f88/coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256", size = 220699, upload-time = "2025-09-21T20:01:51.941Z" }, + { url = "https://files.pythonhosted.org/packages/ca/f3/3bf7905288b45b075918d372498f1cf845b5b579b723c8fd17168018d5f5/coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba", size = 221512, upload-time = "2025-09-21T20:01:53.481Z" }, + { url = "https://files.pythonhosted.org/packages/5c/44/3e32dbe933979d05cf2dac5e697c8599cfe038aaf51223ab901e208d5a62/coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf", size = 220147, upload-time = "2025-09-21T20:01:55.2Z" }, + { url = "https://files.pythonhosted.org/packages/9a/94/b765c1abcb613d103b64fcf10395f54d69b0ef8be6a0dd9c524384892cc7/coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d", size = 218320, upload-time = "2025-09-21T20:01:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/72/4f/732fff31c119bb73b35236dd333030f32c4bfe909f445b423e6c7594f9a2/coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b", size = 218575, upload-time = "2025-09-21T20:01:58.203Z" }, + { url = "https://files.pythonhosted.org/packages/87/02/ae7e0af4b674be47566707777db1aa375474f02a1d64b9323e5813a6cdd5/coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e", size = 249568, upload-time = "2025-09-21T20:01:59.748Z" }, + { url = "https://files.pythonhosted.org/packages/a2/77/8c6d22bf61921a59bce5471c2f1f7ac30cd4ac50aadde72b8c48d5727902/coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b", size = 252174, upload-time = "2025-09-21T20:02:01.192Z" }, + { url = "https://files.pythonhosted.org/packages/b1/20/b6ea4f69bbb52dac0aebd62157ba6a9dddbfe664f5af8122dac296c3ee15/coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49", size = 253447, upload-time = "2025-09-21T20:02:02.701Z" }, + { url = "https://files.pythonhosted.org/packages/f9/28/4831523ba483a7f90f7b259d2018fef02cb4d5b90bc7c1505d6e5a84883c/coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911", size = 249779, upload-time = "2025-09-21T20:02:04.185Z" }, + { url = "https://files.pythonhosted.org/packages/a7/9f/4331142bc98c10ca6436d2d620c3e165f31e6c58d43479985afce6f3191c/coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0", size = 251604, upload-time = "2025-09-21T20:02:06.034Z" }, + { url = "https://files.pythonhosted.org/packages/ce/60/bda83b96602036b77ecf34e6393a3836365481b69f7ed7079ab85048202b/coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f", size = 249497, upload-time = "2025-09-21T20:02:07.619Z" }, + { url = "https://files.pythonhosted.org/packages/5f/af/152633ff35b2af63977edd835d8e6430f0caef27d171edf2fc76c270ef31/coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c", size = 249350, upload-time = "2025-09-21T20:02:10.34Z" }, + { url = "https://files.pythonhosted.org/packages/9d/71/d92105d122bd21cebba877228990e1646d862e34a98bb3374d3fece5a794/coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f", size = 251111, upload-time = "2025-09-21T20:02:12.122Z" }, + { url = "https://files.pythonhosted.org/packages/a2/9e/9fdb08f4bf476c912f0c3ca292e019aab6712c93c9344a1653986c3fd305/coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698", size = 220746, upload-time = "2025-09-21T20:02:13.919Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b1/a75fd25df44eab52d1931e89980d1ada46824c7a3210be0d3c88a44aaa99/coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843", size = 221541, upload-time = "2025-09-21T20:02:15.57Z" }, + { url = "https://files.pythonhosted.org/packages/14/3a/d720d7c989562a6e9a14b2c9f5f2876bdb38e9367126d118495b89c99c37/coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546", size = 220170, upload-time = "2025-09-21T20:02:17.395Z" }, + { url = "https://files.pythonhosted.org/packages/bb/22/e04514bf2a735d8b0add31d2b4ab636fc02370730787c576bb995390d2d5/coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c", size = 219029, upload-time = "2025-09-21T20:02:18.936Z" }, + { url = "https://files.pythonhosted.org/packages/11/0b/91128e099035ece15da3445d9015e4b4153a6059403452d324cbb0a575fa/coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15", size = 219259, upload-time = "2025-09-21T20:02:20.44Z" }, + { url = "https://files.pythonhosted.org/packages/8b/51/66420081e72801536a091a0c8f8c1f88a5c4bf7b9b1bdc6222c7afe6dc9b/coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4", size = 260592, upload-time = "2025-09-21T20:02:22.313Z" }, + { url = "https://files.pythonhosted.org/packages/5d/22/9b8d458c2881b22df3db5bb3e7369e63d527d986decb6c11a591ba2364f7/coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0", size = 262768, upload-time = "2025-09-21T20:02:24.287Z" }, + { url = "https://files.pythonhosted.org/packages/f7/08/16bee2c433e60913c610ea200b276e8eeef084b0d200bdcff69920bd5828/coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0", size = 264995, upload-time = "2025-09-21T20:02:26.133Z" }, + { url = "https://files.pythonhosted.org/packages/20/9d/e53eb9771d154859b084b90201e5221bca7674ba449a17c101a5031d4054/coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65", size = 259546, upload-time = "2025-09-21T20:02:27.716Z" }, + { url = "https://files.pythonhosted.org/packages/ad/b0/69bc7050f8d4e56a89fb550a1577d5d0d1db2278106f6f626464067b3817/coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541", size = 262544, upload-time = "2025-09-21T20:02:29.216Z" }, + { url = "https://files.pythonhosted.org/packages/ef/4b/2514b060dbd1bc0aaf23b852c14bb5818f244c664cb16517feff6bb3a5ab/coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6", size = 260308, upload-time = "2025-09-21T20:02:31.226Z" }, + { url = "https://files.pythonhosted.org/packages/54/78/7ba2175007c246d75e496f64c06e94122bdb914790a1285d627a918bd271/coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999", size = 258920, upload-time = "2025-09-21T20:02:32.823Z" }, + { url = "https://files.pythonhosted.org/packages/c0/b3/fac9f7abbc841409b9a410309d73bfa6cfb2e51c3fada738cb607ce174f8/coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2", size = 261434, upload-time = "2025-09-21T20:02:34.86Z" }, + { url = "https://files.pythonhosted.org/packages/ee/51/a03bec00d37faaa891b3ff7387192cef20f01604e5283a5fabc95346befa/coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a", size = 221403, upload-time = "2025-09-21T20:02:37.034Z" }, + { url = "https://files.pythonhosted.org/packages/53/22/3cf25d614e64bf6d8e59c7c669b20d6d940bb337bdee5900b9ca41c820bb/coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb", size = 222469, upload-time = "2025-09-21T20:02:39.011Z" }, + { url = "https://files.pythonhosted.org/packages/49/a1/00164f6d30d8a01c3c9c48418a7a5be394de5349b421b9ee019f380df2a0/coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb", size = 220731, upload-time = "2025-09-21T20:02:40.939Z" }, + { url = "https://files.pythonhosted.org/packages/23/9c/5844ab4ca6a4dd97a1850e030a15ec7d292b5c5cb93082979225126e35dd/coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520", size = 218302, upload-time = "2025-09-21T20:02:42.527Z" }, + { url = "https://files.pythonhosted.org/packages/f0/89/673f6514b0961d1f0e20ddc242e9342f6da21eaba3489901b565c0689f34/coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32", size = 218578, upload-time = "2025-09-21T20:02:44.468Z" }, + { url = "https://files.pythonhosted.org/packages/05/e8/261cae479e85232828fb17ad536765c88dd818c8470aca690b0ac6feeaa3/coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f", size = 249629, upload-time = "2025-09-21T20:02:46.503Z" }, + { url = "https://files.pythonhosted.org/packages/82/62/14ed6546d0207e6eda876434e3e8475a3e9adbe32110ce896c9e0c06bb9a/coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a", size = 252162, upload-time = "2025-09-21T20:02:48.689Z" }, + { url = "https://files.pythonhosted.org/packages/ff/49/07f00db9ac6478e4358165a08fb41b469a1b053212e8a00cb02f0d27a05f/coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360", size = 253517, upload-time = "2025-09-21T20:02:50.31Z" }, + { url = "https://files.pythonhosted.org/packages/a2/59/c5201c62dbf165dfbc91460f6dbbaa85a8b82cfa6131ac45d6c1bfb52deb/coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69", size = 249632, upload-time = "2025-09-21T20:02:51.971Z" }, + { url = "https://files.pythonhosted.org/packages/07/ae/5920097195291a51fb00b3a70b9bbd2edbfe3c84876a1762bd1ef1565ebc/coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14", size = 251520, upload-time = "2025-09-21T20:02:53.858Z" }, + { url = "https://files.pythonhosted.org/packages/b9/3c/a815dde77a2981f5743a60b63df31cb322c944843e57dbd579326625a413/coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe", size = 249455, upload-time = "2025-09-21T20:02:55.807Z" }, + { url = "https://files.pythonhosted.org/packages/aa/99/f5cdd8421ea656abefb6c0ce92556709db2265c41e8f9fc6c8ae0f7824c9/coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e", size = 249287, upload-time = "2025-09-21T20:02:57.784Z" }, + { url = "https://files.pythonhosted.org/packages/c3/7a/e9a2da6a1fc5d007dd51fca083a663ab930a8c4d149c087732a5dbaa0029/coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd", size = 250946, upload-time = "2025-09-21T20:02:59.431Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5b/0b5799aa30380a949005a353715095d6d1da81927d6dbed5def2200a4e25/coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2", size = 221009, upload-time = "2025-09-21T20:03:01.324Z" }, + { url = "https://files.pythonhosted.org/packages/da/b0/e802fbb6eb746de006490abc9bb554b708918b6774b722bb3a0e6aa1b7de/coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681", size = 221804, upload-time = "2025-09-21T20:03:03.4Z" }, + { url = "https://files.pythonhosted.org/packages/9e/e8/71d0c8e374e31f39e3389bb0bd19e527d46f00ea8571ec7ec8fd261d8b44/coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880", size = 220384, upload-time = "2025-09-21T20:03:05.111Z" }, + { url = "https://files.pythonhosted.org/packages/62/09/9a5608d319fa3eba7a2019addeacb8c746fb50872b57a724c9f79f146969/coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63", size = 219047, upload-time = "2025-09-21T20:03:06.795Z" }, + { url = "https://files.pythonhosted.org/packages/f5/6f/f58d46f33db9f2e3647b2d0764704548c184e6f5e014bef528b7f979ef84/coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2", size = 219266, upload-time = "2025-09-21T20:03:08.495Z" }, + { url = "https://files.pythonhosted.org/packages/74/5c/183ffc817ba68e0b443b8c934c8795553eb0c14573813415bd59941ee165/coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d", size = 260767, upload-time = "2025-09-21T20:03:10.172Z" }, + { url = "https://files.pythonhosted.org/packages/0f/48/71a8abe9c1ad7e97548835e3cc1adbf361e743e9d60310c5f75c9e7bf847/coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0", size = 262931, upload-time = "2025-09-21T20:03:11.861Z" }, + { url = "https://files.pythonhosted.org/packages/84/fd/193a8fb132acfc0a901f72020e54be5e48021e1575bb327d8ee1097a28fd/coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699", size = 265186, upload-time = "2025-09-21T20:03:13.539Z" }, + { url = "https://files.pythonhosted.org/packages/b1/8f/74ecc30607dd95ad50e3034221113ccb1c6d4e8085cc761134782995daae/coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9", size = 259470, upload-time = "2025-09-21T20:03:15.584Z" }, + { url = "https://files.pythonhosted.org/packages/0f/55/79ff53a769f20d71b07023ea115c9167c0bb56f281320520cf64c5298a96/coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f", size = 262626, upload-time = "2025-09-21T20:03:17.673Z" }, + { url = "https://files.pythonhosted.org/packages/88/e2/dac66c140009b61ac3fc13af673a574b00c16efdf04f9b5c740703e953c0/coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1", size = 260386, upload-time = "2025-09-21T20:03:19.36Z" }, + { url = "https://files.pythonhosted.org/packages/a2/f1/f48f645e3f33bb9ca8a496bc4a9671b52f2f353146233ebd7c1df6160440/coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0", size = 258852, upload-time = "2025-09-21T20:03:21.007Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3b/8442618972c51a7affeead957995cfa8323c0c9bcf8fa5a027421f720ff4/coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399", size = 261534, upload-time = "2025-09-21T20:03:23.12Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" }, + { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" }, + { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" }, + { url = "https://files.pythonhosted.org/packages/a3/ad/d1c25053764b4c42eb294aae92ab617d2e4f803397f9c7c8295caa77a260/coverage-7.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fff7b9c3f19957020cac546c70025331113d2e61537f6e2441bc7657913de7d3", size = 217978, upload-time = "2025-09-21T20:03:30.362Z" }, + { url = "https://files.pythonhosted.org/packages/52/2f/b9f9daa39b80ece0b9548bbb723381e29bc664822d9a12c2135f8922c22b/coverage-7.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bc91b314cef27742da486d6839b677b3f2793dfe52b51bbbb7cf736d5c29281c", size = 218370, upload-time = "2025-09-21T20:03:32.147Z" }, + { url = "https://files.pythonhosted.org/packages/dd/6e/30d006c3b469e58449650642383dddf1c8fb63d44fdf92994bfd46570695/coverage-7.10.7-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:567f5c155eda8df1d3d439d40a45a6a5f029b429b06648235f1e7e51b522b396", size = 244802, upload-time = "2025-09-21T20:03:33.919Z" }, + { url = "https://files.pythonhosted.org/packages/b0/49/8a070782ce7e6b94ff6a0b6d7c65ba6bc3091d92a92cef4cd4eb0767965c/coverage-7.10.7-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af88deffcc8a4d5974cf2d502251bc3b2db8461f0b66d80a449c33757aa9f40", size = 246625, upload-time = "2025-09-21T20:03:36.09Z" }, + { url = "https://files.pythonhosted.org/packages/6a/92/1c1c5a9e8677ce56d42b97bdaca337b2d4d9ebe703d8c174ede52dbabd5f/coverage-7.10.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7315339eae3b24c2d2fa1ed7d7a38654cba34a13ef19fbcb9425da46d3dc594", size = 248399, upload-time = "2025-09-21T20:03:38.342Z" }, + { url = "https://files.pythonhosted.org/packages/c0/54/b140edee7257e815de7426d5d9846b58505dffc29795fff2dfb7f8a1c5a0/coverage-7.10.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:912e6ebc7a6e4adfdbb1aec371ad04c68854cd3bf3608b3514e7ff9062931d8a", size = 245142, upload-time = "2025-09-21T20:03:40.591Z" }, + { url = "https://files.pythonhosted.org/packages/e4/9e/6d6b8295940b118e8b7083b29226c71f6154f7ff41e9ca431f03de2eac0d/coverage-7.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f49a05acd3dfe1ce9715b657e28d138578bc40126760efb962322c56e9ca344b", size = 246284, upload-time = "2025-09-21T20:03:42.355Z" }, + { url = "https://files.pythonhosted.org/packages/db/e5/5e957ca747d43dbe4d9714358375c7546cb3cb533007b6813fc20fce37ad/coverage-7.10.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cce2109b6219f22ece99db7644b9622f54a4e915dad65660ec435e89a3ea7cc3", size = 244353, upload-time = "2025-09-21T20:03:44.218Z" }, + { url = "https://files.pythonhosted.org/packages/9a/45/540fc5cc92536a1b783b7ef99450bd55a4b3af234aae35a18a339973ce30/coverage-7.10.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:f3c887f96407cea3916294046fc7dab611c2552beadbed4ea901cbc6a40cc7a0", size = 244430, upload-time = "2025-09-21T20:03:46.065Z" }, + { url = "https://files.pythonhosted.org/packages/75/0b/8287b2e5b38c8fe15d7e3398849bb58d382aedc0864ea0fa1820e8630491/coverage-7.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:635adb9a4507c9fd2ed65f39693fa31c9a3ee3a8e6dc64df033e8fdf52a7003f", size = 245311, upload-time = "2025-09-21T20:03:48.19Z" }, + { url = "https://files.pythonhosted.org/packages/0c/1d/29724999984740f0c86d03e6420b942439bf5bd7f54d4382cae386a9d1e9/coverage-7.10.7-cp39-cp39-win32.whl", hash = "sha256:5a02d5a850e2979b0a014c412573953995174743a3f7fa4ea5a6e9a3c5617431", size = 220500, upload-time = "2025-09-21T20:03:50.024Z" }, + { url = "https://files.pythonhosted.org/packages/43/11/4b1e6b129943f905ca54c339f343877b55b365ae2558806c1be4f7476ed5/coverage-7.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:c134869d5ffe34547d14e174c866fd8fe2254918cc0a95e99052903bc1543e07", size = 221408, upload-time = "2025-09-21T20:03:51.803Z" }, + { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" }, ] -[[package]] -name = "colorama" -version = "0.4.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +[package.optional-dependencies] +toml = [ + { name = "tomli", marker = "python_full_version <= '3.11'" }, ] [[package]] @@ -768,15 +697,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" }, ] -[[package]] -name = "docutils" -version = "0.21.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444, upload-time = "2024-04-23T18:57:18.24Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" }, -] - [[package]] name = "et-xmlfile" version = "2.0.0" @@ -927,24 +847,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/45/b82e3c16be2182bff01179db177fe144d58b5dc787a7d4492c6ed8b9317f/frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e", size = 13106, upload-time = "2025-06-09T23:02:34.204Z" }, ] -[[package]] -name = "furo" -version = "2025.7.19" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "accessible-pygments" }, - { name = "beautifulsoup4" }, - { name = "pygments" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "sphinx-basic-ng" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d0/69/312cd100fa45ddaea5a588334d2defa331ff427bcb61f5fe2ae61bdc3762/furo-2025.7.19.tar.gz", hash = "sha256:4164b2cafcf4023a59bb3c594e935e2516f6b9d35e9a5ea83d8f6b43808fe91f", size = 1662054, upload-time = "2025-07-19T10:52:09.754Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/34/2b07b72bee02a63241d654f5d8af87a2de977c59638eec41ca356ab915cd/furo-2025.7.19-py3-none-any.whl", hash = "sha256:bdea869822dfd2b494ea84c0973937e35d1575af088b6721a29c7f7878adc9e3", size = 342175, upload-time = "2025-07-19T10:52:02.399Z" }, -] - [[package]] name = "graphviz" version = "0.20.3" @@ -1009,27 +911,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, ] -[[package]] -name = "imagesize" -version = "1.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026, upload-time = "2022-07-01T12:21:05.687Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" }, -] - -[[package]] -name = "importlib-metadata" -version = "8.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "zipp", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641, upload-time = "2025-04-27T15:29:01.736Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" }, -] - [[package]] name = "iniconfig" version = "2.1.0" @@ -1091,15 +972,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/d0/274fbf7b0b12643cbbc001ce13e6a5b1607ac4929d1b11c72460152c9fc3/ipython-8.37.0-py3-none-any.whl", hash = "sha256:ed87326596b878932dbcb171e3e698845434d8c61b8d8cd474bf663041a9dcf2", size = 831864, upload-time = "2025-05-31T16:39:06.38Z" }, ] -[[package]] -name = "isort" -version = "6.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b8/21/1e2a441f74a653a144224d7d21afe8f4169e6c7c20bb13aec3a2dc3815e0/isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450", size = 821955, upload-time = "2025-02-26T21:13:16.955Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/11/114d0a5f4dabbdcedc1125dee0888514c3c3b16d3e9facad87ed96fad97c/isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615", size = 94186, upload-time = "2025-02-26T21:13:14.911Z" }, -] - [[package]] name = "jedi" version = "0.19.2" @@ -1112,18 +984,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, ] -[[package]] -name = "jinja2" -version = "3.1.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, -] - [[package]] name = "jiter" version = "0.10.0" @@ -1243,86 +1103,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/c2/93368d4c9355e8ad1f6d62b804de241939d0796b2a3a73737f665b802808/json_repair-0.50.0-py3-none-any.whl", hash = "sha256:b15da2c42deb43419b182d97dcfde6cd86d0b18ccd18ed1a887104ce85e7a364", size = 25985, upload-time = "2025-08-20T15:01:56.567Z" }, ] -[[package]] -name = "markdown-it-py" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mdurl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, -] - -[[package]] -name = "markupsafe" -version = "3.0.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357, upload-time = "2024-10-18T15:20:51.44Z" }, - { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393, upload-time = "2024-10-18T15:20:52.426Z" }, - { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732, upload-time = "2024-10-18T15:20:53.578Z" }, - { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866, upload-time = "2024-10-18T15:20:55.06Z" }, - { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964, upload-time = "2024-10-18T15:20:55.906Z" }, - { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977, upload-time = "2024-10-18T15:20:57.189Z" }, - { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366, upload-time = "2024-10-18T15:20:58.235Z" }, - { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091, upload-time = "2024-10-18T15:20:59.235Z" }, - { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065, upload-time = "2024-10-18T15:21:00.307Z" }, - { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514, upload-time = "2024-10-18T15:21:01.122Z" }, - { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353, upload-time = "2024-10-18T15:21:02.187Z" }, - { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392, upload-time = "2024-10-18T15:21:02.941Z" }, - { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984, upload-time = "2024-10-18T15:21:03.953Z" }, - { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120, upload-time = "2024-10-18T15:21:06.495Z" }, - { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032, upload-time = "2024-10-18T15:21:07.295Z" }, - { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057, upload-time = "2024-10-18T15:21:08.073Z" }, - { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359, upload-time = "2024-10-18T15:21:09.318Z" }, - { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306, upload-time = "2024-10-18T15:21:10.185Z" }, - { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094, upload-time = "2024-10-18T15:21:11.005Z" }, - { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521, upload-time = "2024-10-18T15:21:12.911Z" }, - { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274, upload-time = "2024-10-18T15:21:13.777Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348, upload-time = "2024-10-18T15:21:14.822Z" }, - { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149, upload-time = "2024-10-18T15:21:15.642Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118, upload-time = "2024-10-18T15:21:17.133Z" }, - { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993, upload-time = "2024-10-18T15:21:18.064Z" }, - { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178, upload-time = "2024-10-18T15:21:18.859Z" }, - { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319, upload-time = "2024-10-18T15:21:19.671Z" }, - { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352, upload-time = "2024-10-18T15:21:20.971Z" }, - { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097, upload-time = "2024-10-18T15:21:22.646Z" }, - { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" }, - { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274, upload-time = "2024-10-18T15:21:24.577Z" }, - { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352, upload-time = "2024-10-18T15:21:25.382Z" }, - { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122, upload-time = "2024-10-18T15:21:26.199Z" }, - { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085, upload-time = "2024-10-18T15:21:27.029Z" }, - { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978, upload-time = "2024-10-18T15:21:27.846Z" }, - { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208, upload-time = "2024-10-18T15:21:28.744Z" }, - { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357, upload-time = "2024-10-18T15:21:29.545Z" }, - { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344, upload-time = "2024-10-18T15:21:30.366Z" }, - { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101, upload-time = "2024-10-18T15:21:31.207Z" }, - { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603, upload-time = "2024-10-18T15:21:32.032Z" }, - { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510, upload-time = "2024-10-18T15:21:33.625Z" }, - { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486, upload-time = "2024-10-18T15:21:34.611Z" }, - { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480, upload-time = "2024-10-18T15:21:35.398Z" }, - { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914, upload-time = "2024-10-18T15:21:36.231Z" }, - { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796, upload-time = "2024-10-18T15:21:37.073Z" }, - { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473, upload-time = "2024-10-18T15:21:37.932Z" }, - { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114, upload-time = "2024-10-18T15:21:39.799Z" }, - { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098, upload-time = "2024-10-18T15:21:40.813Z" }, - { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208, upload-time = "2024-10-18T15:21:41.814Z" }, - { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739, upload-time = "2024-10-18T15:21:42.784Z" }, - { url = "https://files.pythonhosted.org/packages/a7/ea/9b1530c3fdeeca613faeb0fb5cbcf2389d816072fab72a71b45749ef6062/MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a", size = 14344, upload-time = "2024-10-18T15:21:43.721Z" }, - { url = "https://files.pythonhosted.org/packages/4b/c2/fbdbfe48848e7112ab05e627e718e854d20192b674952d9042ebd8c9e5de/MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff", size = 12389, upload-time = "2024-10-18T15:21:44.666Z" }, - { url = "https://files.pythonhosted.org/packages/f0/25/7a7c6e4dbd4f867d95d94ca15449e91e52856f6ed1905d58ef1de5e211d0/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13", size = 21607, upload-time = "2024-10-18T15:21:45.452Z" }, - { url = "https://files.pythonhosted.org/packages/53/8f/f339c98a178f3c1e545622206b40986a4c3307fe39f70ccd3d9df9a9e425/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144", size = 20728, upload-time = "2024-10-18T15:21:46.295Z" }, - { url = "https://files.pythonhosted.org/packages/1a/03/8496a1a78308456dbd50b23a385c69b41f2e9661c67ea1329849a598a8f9/MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29", size = 20826, upload-time = "2024-10-18T15:21:47.134Z" }, - { url = "https://files.pythonhosted.org/packages/e6/cf/0a490a4bd363048c3022f2f475c8c05582179bb179defcee4766fb3dcc18/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0", size = 21843, upload-time = "2024-10-18T15:21:48.334Z" }, - { url = "https://files.pythonhosted.org/packages/19/a3/34187a78613920dfd3cdf68ef6ce5e99c4f3417f035694074beb8848cd77/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0", size = 21219, upload-time = "2024-10-18T15:21:49.587Z" }, - { url = "https://files.pythonhosted.org/packages/17/d8/5811082f85bb88410ad7e452263af048d685669bbbfb7b595e8689152498/MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178", size = 20946, upload-time = "2024-10-18T15:21:50.441Z" }, - { url = "https://files.pythonhosted.org/packages/7c/31/bd635fb5989440d9365c5e3c47556cfea121c7803f5034ac843e8f37c2f2/MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f", size = 15063, upload-time = "2024-10-18T15:21:51.385Z" }, - { url = "https://files.pythonhosted.org/packages/b3/73/085399401383ce949f727afec55ec3abd76648d04b9f22e1c0e99cb4bec3/MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a", size = 15506, upload-time = "2024-10-18T15:21:52.974Z" }, -] - [[package]] name = "matplotlib-inline" version = "0.1.7" @@ -1335,47 +1115,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" }, ] -[[package]] -name = "mdit-py-plugins" -version = "0.4.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "markdown-it-py", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542, upload-time = "2024-09-09T20:27:49.564Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316, upload-time = "2024-09-09T20:27:48.397Z" }, -] - -[[package]] -name = "mdit-py-plugins" -version = "0.5.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] -dependencies = [ - { name = "markdown-it-py", marker = "python_full_version >= '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" }, -] - -[[package]] -name = "mdurl" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, -] - [[package]] name = "msal" version = "1.33.0" @@ -1522,58 +1261,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/69/b547032297c7e63ba2af494edba695d781af8a0c6e89e4d06cf848b21d80/multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c", size = 12313, upload-time = "2025-08-11T12:08:46.891Z" }, ] -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, -] - -[[package]] -name = "myst-parser" -version = "3.0.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "docutils", marker = "python_full_version < '3.10'" }, - { name = "jinja2", marker = "python_full_version < '3.10'" }, - { name = "markdown-it-py", marker = "python_full_version < '3.10'" }, - { name = "mdit-py-plugins", version = "0.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "pyyaml", marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/49/64/e2f13dac02f599980798c01156393b781aec983b52a6e4057ee58f07c43a/myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87", size = 92392, upload-time = "2024-04-28T20:22:42.116Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e2/de/21aa8394f16add8f7427f0a1326ccd2b3a2a8a3245c9252bc5ac034c6155/myst_parser-3.0.1-py3-none-any.whl", hash = "sha256:6457aaa33a5d474aca678b8ead9b3dc298e89c68e67012e73146ea6fd54babf1", size = 83163, upload-time = "2024-04-28T20:22:39.985Z" }, -] - -[[package]] -name = "myst-parser" -version = "4.0.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] -dependencies = [ - { name = "docutils", marker = "python_full_version >= '3.10'" }, - { name = "jinja2", marker = "python_full_version >= '3.10'" }, - { name = "markdown-it-py", marker = "python_full_version >= '3.10'" }, - { name = "mdit-py-plugins", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "pyyaml", marker = "python_full_version >= '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/66/a5/9626ba4f73555b3735ad86247a8077d4603aa8628537687c839ab08bfe44/myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4", size = 93985, upload-time = "2025-02-12T10:53:03.833Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579, upload-time = "2025-02-12T10:53:02.078Z" }, -] - [[package]] name = "narwhals" version = "2.3.0" @@ -1912,15 +1599,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl", hash = "sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887", size = 106668, upload-time = "2025-08-23T15:15:25.663Z" }, ] -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, -] - [[package]] name = "pexpect" version = "4.9.0" @@ -2375,17 +2053,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" }, ] -[[package]] -name = "pyenchant" -version = "3.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b1/a3/86763b6350727ca81c8fcc5bb5bccee416e902e0085dc7a902c81233717e/pyenchant-3.2.2.tar.gz", hash = "sha256:1cf830c6614362a78aab78d50eaf7c6c93831369c52e1bb64ffae1df0341e637", size = 49580, upload-time = "2021-10-05T17:25:25.553Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/54/4c/a741dddab6ad96f257d90cb4d23067ffadac526c9cab3a99ca6ce3c05477/pyenchant-3.2.2-py3-none-any.whl", hash = "sha256:5facc821ece957208a81423af7d6ec7810dad29697cb0d77aae81e4e11c8e5a6", size = 55660, upload-time = "2021-10-05T17:25:19.548Z" }, - { url = "https://files.pythonhosted.org/packages/01/44/1e9a273d230abf5c961750a75e42b449adfb61eb446f80b6523955d2a4a2/pyenchant-3.2.2-py3-none-win32.whl", hash = "sha256:5a636832987eaf26efe971968f4d1b78e81f62bca2bde0a9da210c7de43c3bce", size = 11884084, upload-time = "2021-10-05T17:25:23.844Z" }, - { url = "https://files.pythonhosted.org/packages/49/96/2087455de16b08e86fa7ce70b53ddac5fcc040c899d9ebad507a0efec52d/pyenchant-3.2.2-py3-none-win_amd64.whl", hash = "sha256:6153f521852e23a5add923dbacfbf4bebbb8d70c4e4bad609a8e0f9faeb915d1", size = 11890882, upload-time = "2021-10-05T17:25:17.013Z" }, -] - [[package]] name = "pygments" version = "2.19.2" @@ -2441,6 +2108,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/9d/bf86eddabf8c6c9cb1ea9a869d6873b46f105a5d292d3a6f7071f5b07935/pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf", size = 15157, upload-time = "2025-07-16T04:29:24.929Z" }, ] +[[package]] +name = "pytest-cov" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2540,15 +2221,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] -[[package]] -name = "roman-numerals-py" -version = "3.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/30/76/48fd56d17c5bdbdf65609abbc67288728a98ed4c02919428d4f52d23b24b/roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d", size = 9017, upload-time = "2025-02-22T07:34:54.333Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/53/97/d2cbbaa10c9b826af0e10fdf836e1bf344d9f0abb873ebc34d1f49642d3f/roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c", size = 7742, upload-time = "2025-02-22T07:34:52.422Z" }, -] - [[package]] name = "s3transfer" version = "0.13.1" @@ -2561,15 +2233,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/4f/d073e09df851cfa251ef7840007d04db3293a0482ce607d2b993926089be/s3transfer-0.13.1-py3-none-any.whl", hash = "sha256:a981aa7429be23fe6dfc13e80e4020057cbab622b08c0315288758d67cabc724", size = 85308, upload-time = "2025-07-18T19:22:40.947Z" }, ] -[[package]] -name = "setuptools" -version = "80.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" }, -] - [[package]] name = "six" version = "1.17.0" @@ -2588,316 +2251,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] -[[package]] -name = "snowballstemmer" -version = "3.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575, upload-time = "2025-05-09T16:34:51.843Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" }, -] - -[[package]] -name = "soupsieve" -version = "2.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, -] - -[[package]] -name = "sphinx" -version = "7.4.7" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "babel", marker = "python_full_version < '3.10'" }, - { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, - { name = "docutils", marker = "python_full_version < '3.10'" }, - { name = "imagesize", marker = "python_full_version < '3.10'" }, - { name = "importlib-metadata", marker = "python_full_version < '3.10'" }, - { name = "jinja2", marker = "python_full_version < '3.10'" }, - { name = "packaging", marker = "python_full_version < '3.10'" }, - { name = "pygments", marker = "python_full_version < '3.10'" }, - { name = "requests", marker = "python_full_version < '3.10'" }, - { name = "snowballstemmer", marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.10'" }, - { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.10'" }, - { name = "tomli", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911, upload-time = "2024-07-20T14:46:56.059Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/ef/153f6803c5d5f8917dbb7f7fcf6d34a871ede3296fa89c2c703f5f8a6c8e/sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239", size = 3401624, upload-time = "2024-07-20T14:46:52.142Z" }, -] - -[[package]] -name = "sphinx" -version = "8.1.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.10.*'", -] -dependencies = [ - { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "babel", marker = "python_full_version == '3.10.*'" }, - { name = "colorama", marker = "python_full_version == '3.10.*' and sys_platform == 'win32'" }, - { name = "docutils", marker = "python_full_version == '3.10.*'" }, - { name = "imagesize", marker = "python_full_version == '3.10.*'" }, - { name = "jinja2", marker = "python_full_version == '3.10.*'" }, - { name = "packaging", marker = "python_full_version == '3.10.*'" }, - { name = "pygments", marker = "python_full_version == '3.10.*'" }, - { name = "requests", marker = "python_full_version == '3.10.*'" }, - { name = "snowballstemmer", marker = "python_full_version == '3.10.*'" }, - { name = "sphinxcontrib-applehelp", marker = "python_full_version == '3.10.*'" }, - { name = "sphinxcontrib-devhelp", marker = "python_full_version == '3.10.*'" }, - { name = "sphinxcontrib-htmlhelp", marker = "python_full_version == '3.10.*'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.10.*'" }, - { name = "sphinxcontrib-qthelp", marker = "python_full_version == '3.10.*'" }, - { name = "sphinxcontrib-serializinghtml", marker = "python_full_version == '3.10.*'" }, - { name = "tomli", marker = "python_full_version == '3.10.*'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611, upload-time = "2024-10-13T20:27:13.93Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl", hash = "sha256:09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2", size = 3487125, upload-time = "2024-10-13T20:27:10.448Z" }, -] - -[[package]] -name = "sphinx" -version = "8.2.3" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", -] -dependencies = [ - { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "babel", marker = "python_full_version >= '3.11'" }, - { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" }, - { name = "docutils", marker = "python_full_version >= '3.11'" }, - { name = "imagesize", marker = "python_full_version >= '3.11'" }, - { name = "jinja2", marker = "python_full_version >= '3.11'" }, - { name = "packaging", marker = "python_full_version >= '3.11'" }, - { name = "pygments", marker = "python_full_version >= '3.11'" }, - { name = "requests", marker = "python_full_version >= '3.11'" }, - { name = "roman-numerals-py", marker = "python_full_version >= '3.11'" }, - { name = "snowballstemmer", marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/ad/4360e50ed56cb483667b8e6dadf2d3fda62359593faabbe749a27c4eaca6/sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348", size = 8321876, upload-time = "2025-03-02T22:31:59.658Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/31/53/136e9eca6e0b9dc0e1962e2c908fbea2e5ac000c2a2fbd9a35797958c48b/sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3", size = 3589741, upload-time = "2025-03-02T22:31:56.836Z" }, -] - -[[package]] -name = "sphinx-basic-ng" -version = "1.0.0b2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/0b/a866924ded68efec7a1759587a4e478aec7559d8165fac8b2ad1c0e774d6/sphinx_basic_ng-1.0.0b2.tar.gz", hash = "sha256:9ec55a47c90c8c002b5960c57492ec3021f5193cb26cebc2dc4ea226848651c9", size = 20736, upload-time = "2023-07-08T18:40:54.166Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/dd/018ce05c532a22007ac58d4f45232514cd9d6dd0ee1dc374e309db830983/sphinx_basic_ng-1.0.0b2-py3-none-any.whl", hash = "sha256:eb09aedbabfb650607e9b4b68c9d240b90b1e1be221d6ad71d61c52e29f7932b", size = 22496, upload-time = "2023-07-08T18:40:52.659Z" }, -] - -[[package]] -name = "sphinx-copybutton" -version = "0.5.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/2b/a964715e7f5295f77509e59309959f4125122d648f86b4fe7d70ca1d882c/sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd", size = 23039, upload-time = "2023-04-14T08:10:22.998Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/48/1ea60e74949eecb12cdd6ac43987f9fd331156388dcc2319b45e2ebb81bf/sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e", size = 13343, upload-time = "2023-04-14T08:10:20.844Z" }, -] - -[[package]] -name = "sphinx-new-tab-link" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "sphinxcontrib-extdevhelper-kasane" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7a/de/f62360114d605d1c7c5fba060c76a3521655d388e3fa03747d31b9452a69/sphinx_new_tab_link-0.8.0.tar.gz", hash = "sha256:6c757d99f559224a04142c3971c8baa6ac90aca905f15b129d57eeca0ece9582", size = 6637, upload-time = "2025-04-01T14:01:18.88Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/c7/b35261707bc72ce2dff1c4b66a391542be4ba80faded83b6e8e00fb14af9/sphinx_new_tab_link-0.8.0-py3-none-any.whl", hash = "sha256:c74b873d6c8a1ec089015dc414a75f6908e87f66ce4ab8d9f2c7268f13afc593", size = 5622, upload-time = "2025-04-01T14:01:17.091Z" }, -] - -[[package]] -name = "sphinx-reredirects" -version = "0.1.6" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.10.*'", - "python_full_version < '3.10'", -] -dependencies = [ - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/16/6b/bcca2785de4071f604a722444d4d7ba8a9d40de3c14ad52fce93e6d92694/sphinx_reredirects-0.1.6.tar.gz", hash = "sha256:c491cba545f67be9697508727818d8626626366245ae64456fe29f37e9bbea64", size = 7080, upload-time = "2025-03-22T10:52:30.271Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/6f/0b3625be30a1a50f9e4c2cb2ec147b08f15ed0e9f8444efcf274b751300b/sphinx_reredirects-0.1.6-py3-none-any.whl", hash = "sha256:efd50c766fbc5bf40cd5148e10c00f2c00d143027de5c5e48beece93cc40eeea", size = 5675, upload-time = "2025-03-22T10:52:29.113Z" }, -] - -[[package]] -name = "sphinx-reredirects" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", -] -dependencies = [ - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/61/d3039bc2b688c73e81f515afe771b7cc9631dfef63b3e3ac3aab3d73c685/sphinx_reredirects-1.0.0.tar.gz", hash = "sha256:7c9bada9f1330489fcf4c7297a2d6da2a49ca4877d3f42d1388ae1de1019bf5c", size = 711970, upload-time = "2025-05-31T14:45:55.428Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/7f/adb886a3db417a2ccea6a13dcb4b88d08f82104aed17e347346f79480a5f/sphinx_reredirects-1.0.0-py3-none-any.whl", hash = "sha256:1d0102710a8f633c6c885f940f440f7195ada675c1739976f0135790747dea06", size = 6173, upload-time = "2025-05-31T14:45:53.014Z" }, -] - -[[package]] -name = "sphinx-togglebutton" -version = "0.3.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "docutils" }, - { name = "setuptools" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "wheel" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f0/df/d151dfbbe588116e450ca7e898750cb218dca6b2e557ced8de6f9bd7242b/sphinx-togglebutton-0.3.2.tar.gz", hash = "sha256:ab0c8b366427b01e4c89802d5d078472c427fa6e9d12d521c34fa0442559dc7a", size = 8324, upload-time = "2022-07-15T12:08:50.286Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/18/267ce39f29d26cdc7177231428ba823fe5ca94db8c56d1bed69033b364c8/sphinx_togglebutton-0.3.2-py3-none-any.whl", hash = "sha256:9647ba7874b7d1e2d43413d8497153a85edc6ac95a3fea9a75ef9c1e08aaae2b", size = 8249, upload-time = "2022-07-15T12:08:48.8Z" }, -] - -[[package]] -name = "sphinxcontrib-applehelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" }, -] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" }, -] - -[[package]] -name = "sphinxcontrib-extdevhelper-kasane" -version = "0.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d9/74/d5b2650ca859664400603d0db08b6bfce507a9606284137ab0d7bcec4e02/sphinxcontrib-extdevhelper-kasane-0.2.0.tar.gz", hash = "sha256:4dc7b00327f33c7b421c27122b40278eeaca43f24601b572cee5616d31b206a9", size = 4496, upload-time = "2024-03-16T07:23:03.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/91/41a07c91e2adee3463b443cf924778f5a2d92a1166f3f7208959d8b1fabf/sphinxcontrib_extdevhelper_kasane-0.2.0-py3-none-any.whl", hash = "sha256:20f94e3b209cddec24596234458ea3887e7a7ad45b54a4d0a5bf169ff45a38f1", size = 3918, upload-time = "2024-03-16T07:23:01.026Z" }, -] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" }, -] - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" }, -] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" }, -] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, -] - -[[package]] -name = "sphinxcontrib-spelling" -version = "8.0.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "pyenchant", marker = "python_full_version < '3.10'" }, - { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/88/d8d0e4ff3087199db984bd03d1d17c413bcdcdde0f5120d3cc0b4c8806b3/sphinxcontrib-spelling-8.0.0.tar.gz", hash = "sha256:199d0a16902ad80c387c2966dc9eb10f565b1fb15ccce17210402db7c2443e5c", size = 37610, upload-time = "2023-02-19T15:32:57.902Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/f9/ec57149c1ab20efed1305aa89fbcd7a9e4a8edf9ef5fe9b75bdb13e1964e/sphinxcontrib_spelling-8.0.0-py3-none-any.whl", hash = "sha256:b27e0a16aef00bcfc888a6490dc3f16651f901dc475446c6882834278c8dc7b3", size = 16436, upload-time = "2023-02-19T15:32:55.797Z" }, -] - -[[package]] -name = "sphinxcontrib-spelling" -version = "8.0.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] -dependencies = [ - { name = "pyenchant", marker = "python_full_version >= '3.10'" }, - { name = "requests", marker = "python_full_version >= '3.10'" }, - { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/71/04/099b55abd934cacccaedab9680c8238042eb3c722bdd420bc752d0eddb78/sphinxcontrib_spelling-8.0.1.tar.gz", hash = "sha256:f0447b6413c78b613b916c7891e36be85a105d1919c99784c53dfea2d8f8040f", size = 36005, upload-time = "2024-12-19T17:07:54.062Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/03/30/05efe7261eac789cf3ba28ef5dfb76d719df30baae6881cb54a6801c0e8f/sphinxcontrib_spelling-8.0.1-py3-none-any.whl", hash = "sha256:21704857c1b5e26e06bb07d15927df41c9d7ecfc1843169ecd22cb59f24069ac", size = 14617, upload-time = "2024-12-19T17:07:52.799Z" }, -] - [[package]] name = "stack-data" version = "0.6.3" @@ -3137,15 +2490,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, ] -[[package]] -name = "wheel" -version = "0.45.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8a/98/2d9906746cdc6a6ef809ae6338005b3f21bb568bea3165cfc6a243fdc25c/wheel-0.45.1.tar.gz", hash = "sha256:661e1abd9198507b1409a20c02106d9670b2576e916d58f520316666abca6729", size = 107545, upload-time = "2024-11-23T00:18:23.513Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/2c/87f3254fd8ffd29e4c02732eee68a83a1d3c346ae39bc6822dcbcb697f2b/wheel-0.45.1-py3-none-any.whl", hash = "sha256:708e7481cc80179af0e556bbf0cc00b8444c7321e2700b8d8580231d13017248", size = 72494, upload-time = "2024-11-23T00:18:21.207Z" }, -] - [[package]] name = "yarl" version = "1.20.1" @@ -3261,12 +2605,3 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/cd/ce185848a7dba68ea69e932674b5c1a42a1852123584bccc5443120f857c/yarl-1.20.1-cp39-cp39-win_amd64.whl", hash = "sha256:eae7bfe2069f9c1c5b05fc7fe5d612e5bbc089a39309904ee8b829e322dcad00", size = 87385, upload-time = "2025-06-10T00:46:05.655Z" }, { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, ] - -[[package]] -name = "zipp" -version = "3.23.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, -] From 13aa0278a74742ad52d1a0ee66aaabcb61aad98c Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 12:47:39 +0200 Subject: [PATCH 17/82] Disable tests on CI while the project is still not mature --- .github/workflows/test.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 21c4c08..555195b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,5 +44,6 @@ jobs: enable-cache: true - name: Install dependencies run: uv sync --locked --dev - - name: Test with pytest - run: make tests \ No newline at end of file +# Disable type checking for now, the project is not mature enough to pass all checks +# - name: Test with pytest +# run: make tests \ No newline at end of file From 05275eb33667897e4851f6638f1b3a3bc2e74fff Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 2 Oct 2025 12:54:05 +0200 Subject: [PATCH 18/82] Add example and tutorials to linting and formatting process --- Makefile | 8 +- examples/narrative_miner.py | 7 +- examples/portfolio_example.py | 203 +++++++++++++++----------- examples/query_builder.py | 236 +++++++++++++++---------------- examples/risk_analyzer.py | 27 ++-- examples/run_search.py | 83 ++++++----- examples/search_by_companies.py | 52 ++++--- examples/thematic_screener.py | 29 ++-- tutorial/tutorial_notebook.ipynb | 125 ++++++++-------- 9 files changed, 389 insertions(+), 381 deletions(-) diff --git a/Makefile b/Makefile index db0688e..637da19 100644 --- a/Makefile +++ b/Makefile @@ -4,13 +4,13 @@ tests: @uv run -m pytest --cov --cov-report term --cov-report xml:./coverage-reports/coverage.xml -s tests/* lint: - @uvx ruff check --extend-select I --fix src/bigdata_research_tools/ tests/ + @uvx ruff check --extend-select I --fix src/bigdata_research_tools/ examples/ tutorial/ tests/ lint-check: - @uvx ruff check --extend-select I src/bigdata_research_tools/ tests/ + @uvx ruff check --extend-select I src/bigdata_research_tools/ examples/ tutorial/ tests/ format: - @uvx ruff format src/bigdata_research_tools/ tests/ + @uvx ruff format src/bigdata_research_tools/ examples/ tutorial/ tests/ type-check: - @uvx ty check src/bigdata_research_tools/ tests/ \ No newline at end of file + @uvx ty check src/bigdata_research_tools/ examples/ tutorial/ tests/ \ No newline at end of file diff --git a/examples/narrative_miner.py b/examples/narrative_miner.py index 89cc8a4..c4c7818 100644 --- a/examples/narrative_miner.py +++ b/examples/narrative_miner.py @@ -2,12 +2,11 @@ from bigdata_client.models.search import DocumentType -from bigdata_research_tools.utils.observer import Observer, OberserverNotification +from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.workflows import NarrativeMiner def narrative_miner_example(export_path: str = "narrative_miner_sample.xlsx") -> Dict: - narrative_miner = NarrativeMiner( narrative_sentences=[ "Supervised Learning Techniques", @@ -35,20 +34,16 @@ def narrative_miner_example(export_path: str = "narrative_miner_sample.xlsx") -> fiscal_year=2024, ) - - class PrintObserver(Observer): def update(self, message: OberserverNotification): print(f"Notification received: {message}") narrative_miner.register_observer(PrintObserver()) - return narrative_miner.mine_narratives(export_path=export_path) if __name__ == "__main__": - import logging from dotenv import load_dotenv diff --git a/examples/portfolio_example.py b/examples/portfolio_example.py index c04a1e2..5896776 100644 --- a/examples/portfolio_example.py +++ b/examples/portfolio_example.py @@ -1,63 +1,82 @@ -import pandas as pd -import numpy as np import logging -from bigdata_research_tools.portfolio.portfolio_constructor import PortfolioConstructor, WeightMethod + +import numpy as np +import pandas as pd + +from bigdata_research_tools.portfolio.portfolio_constructor import ( + PortfolioConstructor, + WeightMethod, +) # Configure logging logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) + def create_sample_data(): """Create sample data for demonstration purposes.""" np.random.seed(42) # For reproducible results - + # Define sample sectors, industries, and countries - sectors = ['Technology', 'Healthcare', 'Financial Services', 'Consumer Goods', 'Energy'] - industries = ['Software', 'Pharmaceuticals', 'Banking', 'Retail', 'Oil & Gas', - 'Semiconductors', 'Biotechnology', 'Insurance', 'Automotive', 'Utilities'] - countries = ['USA', 'Germany', 'Japan', 'UK', 'Canada', 'France', 'Australia'] - + sectors = [ + "Technology", + "Healthcare", + "Financial Services", + "Consumer Goods", + "Energy", + ] + industries = [ + "Software", + "Pharmaceuticals", + "Banking", + "Retail", + "Oil & Gas", + "Semiconductors", + "Biotechnology", + "Insurance", + "Automotive", + "Utilities", + ] + countries = ["USA", "Germany", "Japan", "UK", "Canada", "France", "Australia"] + # Generate 100 sample companies n_companies = 100 data = { - 'Company': [f'Company_{i:03d}' for i in range(1, n_companies + 1)], - 'Sector': np.random.choice(sectors, n_companies), - 'Industry': np.random.choice(industries, n_companies), - 'Country': np.random.choice(countries, n_companies), - 'Market Cap (B)': np.random.lognormal(mean=2, sigma=1.5, size=n_companies), - 'Composite Score': np.random.normal(loc=75, scale=15, size=n_companies), - 'ESG Score': np.random.uniform(20, 95, n_companies), - 'Revenue Growth': np.random.normal(loc=8, scale=12, size=n_companies) + "Company": [f"Company_{i:03d}" for i in range(1, n_companies + 1)], + "Sector": np.random.choice(sectors, n_companies), + "Industry": np.random.choice(industries, n_companies), + "Country": np.random.choice(countries, n_companies), + "Market Cap (B)": np.random.lognormal(mean=2, sigma=1.5, size=n_companies), + "Composite Score": np.random.normal(loc=75, scale=15, size=n_companies), + "ESG Score": np.random.uniform(20, 95, n_companies), + "Revenue Growth": np.random.normal(loc=8, scale=12, size=n_companies), } - + df = pd.DataFrame(data) - + # Ensure scores are reasonable - df['Composite Score'] = np.clip(df['Composite Score'], 0, 100) - df['ESG Score'] = np.round(df['ESG Score'], 1) - df['Market Cap (B)'] = np.round(df['Market Cap (B)'], 2) - df['Revenue Growth'] = np.round(df['Revenue Growth'], 1) - + df["Composite Score"] = np.clip(df["Composite Score"], 0, 100) + df["ESG Score"] = np.round(df["ESG Score"], 1) + df["Market Cap (B)"] = np.round(df["Market Cap (B)"], 2) + df["Revenue Growth"] = np.round(df["Revenue Growth"], 1) + return df + def example_1_basic_equal_weighted(): """Example 1: Basic equal-weighted portfolio balanced by sector.""" logger.info("=" * 60) logger.info("EXAMPLE 1: Basic Equal-Weighted Portfolio (Sector Balanced)") logger.info("=" * 60) - + # Create sample data df = create_sample_data() - + # Initialize portfolio constructor (only technical parameters) - constructor = PortfolioConstructor( - max_iterations=1000, - tolerance=1e-6 - ) - + constructor = PortfolioConstructor(max_iterations=1000, tolerance=1e-6) + # Construct portfolio with constraints specified in method call portfolio = constructor.construct_portfolio( df=df, @@ -66,37 +85,42 @@ def example_1_basic_equal_weighted(): size=20, max_position_weight=0.08, # 8% max per position max_category_weight=0.25, # 25% max per sector - weight_method=WeightMethod.EQUAL + weight_method=WeightMethod.EQUAL, ) - + logger.info(f"Portfolio Size: {len(portfolio)} companies") logger.info(f"Sectors Represented: {portfolio['Sector'].nunique()}") logger.info("\nTop 10 Holdings:") - logger.info(f"\n{portfolio[['Company', 'Sector', 'Composite Score', 'weight']].head(10).to_string(index=False)}") - - logger.info(f"\nSector Allocation:") - sector_weights = portfolio.groupby('Sector')['weight'].sum().sort_values(ascending=False) + logger.info( + f"\n{portfolio[['Company', 'Sector', 'Composite Score', 'weight']].head(10).to_string(index=False)}" + ) + + logger.info("\nSector Allocation:") + sector_weights = ( + portfolio.groupby("Sector")["weight"].sum().sort_values(ascending=False) + ) for sector, weight in sector_weights.items(): logger.info(f" {sector}: {weight:.1%}") - - logger.info(f"\nWeight Statistics:") + + logger.info("\nWeight Statistics:") logger.info(f" Min Weight: {portfolio['weight'].min():.1%}") logger.info(f" Max Weight: {portfolio['weight'].max():.1%}") logger.info(f" Mean Weight: {portfolio['weight'].mean():.1%}") logger.info(f" Total Weight: {portfolio['weight'].sum():.1%}") + def example_2_market_cap_weighted(): """Example 2: Market cap weighted portfolio balanced by country.""" logger.info("\n" + "=" * 60) logger.info("EXAMPLE 2: Market Cap Weighted Portfolio (Country Balanced)") logger.info("=" * 60) - + # Create sample data df = create_sample_data() - + # Initialize constructor constructor = PortfolioConstructor() - + # Construct portfolio with market cap weighting portfolio = constructor.construct_portfolio( df=df, @@ -106,32 +130,35 @@ def example_2_market_cap_weighted(): size=20, max_position_weight=0.10, # 10% max per position (more relaxed) max_category_weight=0.40, # 40% max per country (more relaxed) - weight_method=WeightMethod.COLUMN + weight_method=WeightMethod.COLUMN, ) - + logger.info(f"Portfolio Size: {len(portfolio)} companies") logger.info(f"Countries Represented: {portfolio['Country'].nunique()}") logger.info("\nTop 10 Holdings:") - display_cols = ['Company', 'Country', 'Market Cap (B)', 'Composite Score', 'weight'] + display_cols = ["Company", "Country", "Market Cap (B)", "Composite Score", "weight"] logger.info(f"\n{portfolio[display_cols].head(10).to_string(index=False)}") - - logger.info(f"\nCountry Allocation:") - country_weights = portfolio.groupby('Country')['weight'].sum().sort_values(ascending=False) + + logger.info("\nCountry Allocation:") + country_weights = ( + portfolio.groupby("Country")["weight"].sum().sort_values(ascending=False) + ) for country, weight in country_weights.items(): logger.info(f" {country}: {weight:.1%}") + def example_3_score_weighted(): """Example 3: Score-weighted portfolio using softmax normalization.""" logger.info("\n" + "=" * 60) logger.info("EXAMPLE 3: Score-Weighted Portfolio (ESG Score, Industry Balanced)") logger.info("=" * 60) - + # Create sample data df = create_sample_data() - + # Initialize constructor constructor = PortfolioConstructor() - + # Construct portfolio using ESG Score for weighting portfolio = constructor.construct_portfolio( df=df, @@ -141,35 +168,38 @@ def example_3_score_weighted(): size=25, # Larger portfolio max_position_weight=0.07, # 7% max per position max_category_weight=0.20, # 20% max per industry - weight_method=WeightMethod.SCORE + weight_method=WeightMethod.SCORE, ) - + logger.info(f"Portfolio Size: {len(portfolio)} companies") logger.info(f"Industries Represented: {portfolio['Industry'].nunique()}") logger.info("\nTop 10 Holdings:") - display_cols = ['Company', 'Industry', 'ESG Score', 'Composite Score', 'weight'] + display_cols = ["Company", "Industry", "ESG Score", "Composite Score", "weight"] logger.info(f"\n{portfolio[display_cols].head(10).to_string(index=False)}") - - logger.info(f"\nIndustry Allocation:") - industry_weights = portfolio.groupby('Industry')['weight'].sum().sort_values(ascending=False) + + logger.info("\nIndustry Allocation:") + industry_weights = ( + portfolio.groupby("Industry")["weight"].sum().sort_values(ascending=False) + ) for industry, weight in industry_weights.items(): logger.info(f" {industry}: {weight:.1%}") + def example_4_custom_constraints(): """Example 4: Custom constraints demonstration.""" logger.info("\n" + "=" * 60) logger.info("EXAMPLE 4: Custom Constraints Demonstration") logger.info("=" * 60) - + # Create sample data df = create_sample_data() - + # Initialize constructor with custom technical parameters constructor = PortfolioConstructor( max_iterations=2000, # More iterations for complex constraints - tolerance=1e-8 # Tighter tolerance + tolerance=1e-8, # Tighter tolerance ) - + # Construct portfolio with tight constraints portfolio = constructor.construct_portfolio( df=df, @@ -179,41 +209,42 @@ def example_4_custom_constraints(): size=15, max_position_weight=0.04, # Very tight: 4% max per position max_category_weight=0.15, # Tight: 15% max per sector - weight_method=WeightMethod.COLUMN + weight_method=WeightMethod.COLUMN, ) - + logger.info(f"Portfolio Size: {len(portfolio)} companies") logger.info("\nAll Holdings:") - display_cols = ['Company', 'Sector', 'Market Cap (B)', 'Composite Score', 'weight'] + display_cols = ["Company", "Sector", "Market Cap (B)", "Composite Score", "weight"] logger.info(f"\n{portfolio[display_cols].to_string(index=False)}") - - logger.info(f"\nConstraint Verification:") + + logger.info("\nConstraint Verification:") logger.info(f" Max Position Weight: {portfolio['weight'].max():.1%} (limit: 4.0%)") - sector_max = portfolio.groupby('Sector')['weight'].sum().max() + sector_max = portfolio.groupby("Sector")["weight"].sum().max() logger.info(f" Max Sector Weight: {sector_max:.1%} (limit: 15.0%)") + def example_5_comparison(): """Example 5: Compare different weighting methods side by side.""" logger.info("\n" + "=" * 60) logger.info("EXAMPLE 5: Weighting Method Comparison") logger.info("=" * 60) - + # Create sample data df = create_sample_data() - + # Initialize constructor constructor = PortfolioConstructor() - + methods = [ (WeightMethod.EQUAL, None, "Equal Weight"), (WeightMethod.COLUMN, "Market Cap (B)", "Market Cap Weight"), - (WeightMethod.SCORE, "Composite Score", "Score Weight (Softmax)") + (WeightMethod.SCORE, "Composite Score", "Score Weight (Softmax)"), ] - + for weight_method, weight_col, description in methods: logger.info(f"\n{description}:") logger.info("-" * 40) - + portfolio = constructor.construct_portfolio( df=df, score_col="Composite Score", @@ -222,31 +253,35 @@ def example_5_comparison(): size=15, max_position_weight=0.06, max_category_weight=0.25, - weight_method=weight_method + weight_method=weight_method, ) - - logger.info(f"Top 5 Holdings:") - display_cols = ['Company', 'Sector', 'weight'] + + logger.info("Top 5 Holdings:") + display_cols = ["Company", "Sector", "weight"] if weight_col: display_cols.insert(2, weight_col) logger.info(f"\n{portfolio[display_cols].head(5).to_string(index=False)}") - - logger.info(f"Weight Range: {portfolio['weight'].min():.1%} - {portfolio['weight'].max():.1%}") + + logger.info( + f"Weight Range: {portfolio['weight'].min():.1%} - {portfolio['weight'].max():.1%}" + ) + def main(): """Run all examples.""" logger.info("Starting Portfolio Constructor Examples") - + # Run all examples example_1_basic_equal_weighted() example_2_market_cap_weighted() example_3_score_weighted() example_4_custom_constraints() example_5_comparison() - + logger.info("\n" + "=" * 60) logger.info("Examples completed successfully!") logger.info("=" * 60) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/query_builder.py b/examples/query_builder.py index e92ab47..e61a19b 100644 --- a/examples/query_builder.py +++ b/examples/query_builder.py @@ -1,13 +1,13 @@ import logging +from bigdata_client import Bigdata from bigdata_client.models.search import DocumentType +from dotenv import load_dotenv + from bigdata_research_tools.search.query_builder import ( EntitiesToSearch, build_batched_query, ) -from bigdata_client import Bigdata - -from dotenv import load_dotenv # Load environment variables for authentication print(f"Environment variables loaded: {load_dotenv()}") @@ -15,8 +15,7 @@ bigdata = Bigdata() # Configure logging logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) @@ -26,30 +25,30 @@ def test_basic_entity_config(): logger.info("=" * 60) logger.info("TEST 1: Basic EntityConfig with Auto-batching") logger.info("=" * 60) - + # Create entity configuration entities = EntitiesToSearch( people=["Donald Trump"], companies=["Apple Inc", "Microsoft Corp", "Google"], - concepts=["artificial intelligence", "machine learning"] + concepts=["artificial intelligence", "machine learning"], ) - + sentences = ["AI technology advances", "Machine learning innovations"] keywords = ["technology", "innovation"] - + # Build queries with auto-batching (batch_size=2) queries = build_batched_query( sentences=sentences, keywords=keywords, entities=entities, - control_entities=None, - sources=None, + control_entities=None, + sources=None, batch_size=2, fiscal_year=2024, scope=DocumentType.TRANSCRIPTS, - custom_batches=None + custom_batches=None, ) - + logger.info("Generated %d query components", len(queries)) logger.info("Sample query structure: %s", queries) if queries: @@ -57,8 +56,7 @@ def test_basic_entity_config(): results = bigdata.search.new( queries[0], scope=DocumentType.TRANSCRIPTS, - ).run(limit=2 - ) + ).run(limit=2) logger.info("Sample results: %s", results) logger.info("") @@ -68,39 +66,37 @@ def test_control_entities(): logger.info("=" * 60) logger.info("TEST 2: Control Entities") logger.info("=" * 60) - + # Main entities to search for entities = EntitiesToSearch( - companies=["Tesla", "Ford Motor Company"], - topic=["electric vehicles"] + companies=["Tesla", "Ford Motor Company"], topic=["electric vehicles"] ) - + # Control entities for co-mention analysis control_entities = EntitiesToSearch( people=["Elon Musk", "Jim Farley"], companies=["General Motors"], - topic=["sustainability", "climate change"] + topic=["sustainability", "climate change"], ) - + queries = build_batched_query( sentences=["Electric vehicle market growth"], - keywords=None, + keywords=None, entities=entities, control_entities=control_entities, - sources=None, + sources=None, batch_size=5, - fiscal_year=2024, + fiscal_year=2024, scope=DocumentType.TRANSCRIPTS, - custom_batches=None + custom_batches=None, ) - + logger.info("Generated %d query components with control entities", len(queries)) logger.info("Sample query structure: %s", queries) results = bigdata.search.new( - queries[0], - scope=DocumentType.TRANSCRIPTS, - ).run(limit=2 - ) + queries[0], + scope=DocumentType.TRANSCRIPTS, + ).run(limit=2) logger.info("Sample results: %s", results) logger.info("") @@ -110,48 +106,47 @@ def test_custom_batches(): logger.info("=" * 60) logger.info("TEST 3: Custom Batch Configuration") logger.info("=" * 60) - + # Define custom batches - each inner list is one batch custom_batches = [ # Batch 1: Tech giants EntitiesToSearch( companies=["Apple Inc", "Microsoft Corp"], people=["Tim Cook", "Satya Nadella"], - concepts=["technology", "innovation"] + concepts=["technology", "innovation"], ), # Batch 2: Auto companies EntitiesToSearch( companies=["Tesla", "Ford Motor Company"], people=["Elon Musk", "Jim Farley"], - concepts=["electric vehicles", "autonomous driving"] + concepts=["electric vehicles", "autonomous driving"], ), # Batch 3: Banks EntitiesToSearch( companies=["JPMorgan Chase", "Bank of America"], people=["Jamie Dimon", "Brian Moynihan"], - concepts=["banking", "financial services"] - ) + concepts=["banking", "financial services"], + ), ] - + queries = build_batched_query( sentences=["Industry leadership and innovation"], keywords=["CEO", "leadership", "strategy"], - entities=None, - control_entities=None, - sources=None, - batch_size=10, - fiscal_year=2024, + entities=None, + control_entities=None, + sources=None, + batch_size=10, + fiscal_year=2024, scope=DocumentType.FILINGS, - custom_batches=custom_batches + custom_batches=custom_batches, ) - + logger.info("Query results: %s", queries) logger.info("Generated %d query components from custom batches", len(queries)) results = bigdata.search.new( - queries[0], - scope=DocumentType.FILINGS, - ).run(limit=2 - ) + queries[0], + scope=DocumentType.FILINGS, + ).run(limit=2) logger.info("Sample results: %s", results) logger.info("") @@ -161,24 +156,24 @@ def test_mixed_configuration(): logger.info("=" * 60) logger.info("TEST 4: Mixed Configuration (All Parameters)") logger.info("=" * 60) - + entities = EntitiesToSearch( companies=["Netflix", "Disney", "Warner Bros"], people=["Reed Hastings", "Bob Chapek"], concepts=["streaming", "entertainment", "content creation"], - place=["Hollywood", "Los Angeles", "New York"] + place=["Hollywood", "Los Angeles", "New York"], ) - + control_entities = EntitiesToSearch( companies=["Hulu"], product=["Amazon Prime Video"], - topic=["competition", "market share"] + topic=["competition", "market share"], ) - + queries = build_batched_query( sentences=[ "Streaming wars and content competition", - "Entertainment industry consolidation" + "Entertainment industry consolidation", ], keywords=["streaming", "content", "subscription", "audience"], entities=entities, @@ -187,17 +182,18 @@ def test_mixed_configuration(): batch_size=2, scope=DocumentType.NEWS, fiscal_year=None, - custom_batches=None + custom_batches=None, ) - + logger.info("Query results: %s", queries) logger.info("Generated %d comprehensive query components", len(queries)) - logger.info("Includes: sentences, keywords, entities, control entities, sources, fiscal year") + logger.info( + "Includes: sentences, keywords, entities, control entities, sources, fiscal year" + ) results = bigdata.search.new( - queries[0], - scope=DocumentType.NEWS, - ).run(limit=2 - ) + queries[0], + scope=DocumentType.NEWS, + ).run(limit=2) logger.info("Sample results: %s", results) logger.info("") @@ -207,87 +203,83 @@ def test_edge_cases(): logger.info("=" * 60) logger.info("TEST 5: Edge Cases") logger.info("=" * 60) - + # Test 1: Only sentences queries1 = build_batched_query( sentences=["Market analysis report"], - keywords=None, - entities=None, - control_entities=None, - sources=None, - batch_size=10, - fiscal_year=None, - scope=DocumentType.ALL, - custom_batches=None + keywords=None, + entities=None, + control_entities=None, + sources=None, + batch_size=10, + fiscal_year=None, + scope=DocumentType.ALL, + custom_batches=None, ) logger.info("Sentences only: %d queries", len(queries1)) results1 = bigdata.search.new( - queries1[0], - scope=DocumentType.ALL, - ).run(limit=2 - ) + queries1[0], + scope=DocumentType.ALL, + ).run(limit=2) logger.info("Sample results: %s", results1) - + # Test 2: Only keywords queries2 = build_batched_query( sentences=[], keywords=["finance", "technology"], - entities=None, - control_entities=None, - sources=None, - batch_size=10, - fiscal_year=None, - scope=DocumentType.ALL, - custom_batches=None + entities=None, + control_entities=None, + sources=None, + batch_size=10, + fiscal_year=None, + scope=DocumentType.ALL, + custom_batches=None, ) logger.info("Keywords only: %d queries", len(queries2)) results2 = bigdata.search.new( - queries2[0], - scope=DocumentType.ALL, - ).run(limit=2 - ) + queries2[0], + scope=DocumentType.ALL, + ).run(limit=2) logger.info("Sample results: %s", results2) - + # Test 3: Empty EntityConfig empty_entities = EntitiesToSearch() queries3 = build_batched_query( sentences=["Test query"], - keywords=None, + keywords=None, entities=empty_entities, - control_entities=None, - sources=None, - batch_size=10, - fiscal_year=None, - scope=DocumentType.ALL, - custom_batches=None + control_entities=None, + sources=None, + batch_size=10, + fiscal_year=None, + scope=DocumentType.ALL, + custom_batches=None, ) logger.info("Empty entities: %d queries", len(queries3)) results3 = bigdata.search.new( - queries3[0], - scope=DocumentType.ALL, - ).run(limit=2 - ) + queries3[0], + scope=DocumentType.ALL, + ).run(limit=2) logger.info("Sample results: %s", results3) - + # Test 4: Single entity type single_type = EntitiesToSearch(companies=["Apple Inc"]) queries4 = build_batched_query( sentences=["Apple earnings"], - keywords=None, + keywords=None, entities=single_type, - control_entities=None, - sources=None, + control_entities=None, + sources=None, batch_size=1, - fiscal_year=None, - scope=DocumentType.ALL, - custom_batches=None + fiscal_year=None, + scope=DocumentType.ALL, + custom_batches=None, ) logger.info("Single entity type: %d queries", len(queries4)) results4 = bigdata.search.new( - queries4[0], - scope=DocumentType.ALL, - ).run(limit=2 - ) + queries4[0], + scope=DocumentType.ALL, + ).run(limit=2) logger.info("Sample results: %s", results4) logger.info("") @@ -297,18 +289,18 @@ def test_reporting_entities(): logger.info("=" * 60) logger.info("TEST 6: Reporting Entities") logger.info("=" * 60) - + entities = EntitiesToSearch( companies=["Netflix", "Disney", "Warner Bros"], people=["Reed Hastings", "Bob Chapek"], concepts=["streaming", "entertainment", "content creation"], - place=["Hollywood", "Los Angeles", "New York"] + place=["Hollywood", "Los Angeles", "New York"], ) - + queries = build_batched_query( sentences=[ "Streaming wars and content competition", - "Entertainment industry consolidation" + "Entertainment industry consolidation", ], keywords=["streaming", "content", "subscription", "audience"], entities=entities, @@ -317,25 +309,25 @@ def test_reporting_entities(): fiscal_year=2024, sources=None, scope=DocumentType.TRANSCRIPTS, - custom_batches=None + custom_batches=None, ) - + logger.info("Query results: %s", queries) logger.info("Generated %d comprehensive query components", len(queries)) logger.info("Includes: sentences, keywords, entities, fiscal year") results = bigdata.search.new( - queries[0], - scope=DocumentType.TRANSCRIPTS, - ).run(limit=2 - ) + queries[0], + scope=DocumentType.TRANSCRIPTS, + ).run(limit=2) logger.info("Sample results: %s", results) logger.info("") + def main(): """Run all tests.""" logger.info("Testing Refactored build_batched_query Function") logger.info("=" * 60) - + try: test_basic_entity_config() test_control_entities() @@ -343,14 +335,14 @@ def main(): test_mixed_configuration() test_edge_cases() test_reporting_entities() - + logger.info("=" * 60) logger.info("All tests completed successfully") - + except Exception as e: logger.error("Error during testing: %s", e) raise if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index f1f7498..dead2e1 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -2,20 +2,19 @@ from bigdata_client.models.search import DocumentType -from bigdata_research_tools.utils.observer import Observer, OberserverNotification from bigdata_research_tools.client import bigdata_connection +from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.workflows.risk_analyzer import RiskAnalyzer -from bigdata_research_tools.visuals import create_thematic_exposure_dashboard + def risk_analyzer_example( - risk_scenario: str, + risk_scenario: str, llm_model: str = "openai::gpt-4o-mini", - keywords: list = ['Tariffs'], - control_entities: dict = {'place':['Canada', 'Mexico']}, - focus: str = '', + keywords: list = ["Tariffs"], + control_entities: dict = {"place": ["Canada", "Mexico"]}, + focus: str = "", export_path: str = "risk_analyzer_results.xlsx", ) -> Dict: - GRID_watchlist_ID = "44118802-9104-4265-b97a-2e6d88d74893" bigdata = bigdata_connection() @@ -28,27 +27,24 @@ def risk_analyzer_example( llm_model=llm_model, main_theme=risk_scenario, companies=companies, - start_date='2025-01-01', - end_date='2025-01-31', + start_date="2025-01-01", + end_date="2025-01-31", keywords=keywords, document_type=DocumentType.NEWS, control_entities=control_entities, focus=focus, # Optional focus to narrow the theme, ) - - + class PrintObserver(Observer): def update(self, message: OberserverNotification): print(f"Notification received: {message}") analyzer.register_observer(PrintObserver()) - return analyzer.screen_companies(export_path=export_path) if __name__ == "__main__": - import logging from dotenv import load_dotenv @@ -60,7 +56,10 @@ def update(self, message: OberserverNotification): logging.basicConfig() logging.getLogger("bigdata_research_tools").setLevel(logging.INFO) - x = risk_analyzer_example("US Import Tariffs against Canada and Mexico", focus="Provide a detailed taxonomy of risks describing how new American import tariffs against Canada and Mexico will impact US companies, their operations and strategy. Cover trade-relations risks, foreign market access risks, supply chain risks, US market sales and revenue risks (including price impacts), and intellectual property risks, provide at least 4 sub-scenarios for each risk factor.") + x = risk_analyzer_example( + "US Import Tariffs against Canada and Mexico", + focus="Provide a detailed taxonomy of risks describing how new American import tariffs against Canada and Mexico will impact US companies, their operations and strategy. Cover trade-relations risks, foreign market access risks, supply chain risks, US market sales and revenue risks (including price impacts), and intellectual property risks, provide at least 4 sub-scenarios for each risk factor.", + ) # custom_config = { # 'company_column': 'Company', # 'heatmap_colorscale': 'Plasma', diff --git a/examples/run_search.py b/examples/run_search.py index a3a013d..659a779 100644 --- a/examples/run_search.py +++ b/examples/run_search.py @@ -6,51 +6,46 @@ to find documents based on specific criteria. Prerequisites: -- Set BIGDATA_USERNAME and BIGDATA_PASSWORD environment variables +- Set BIGDATA_USERNAME and BIGDATA_PASSWORD environment variables - Install: uv pip install -e ".[excel,plotly,openai]" && uv pip install bigdata-client """ import logging + import pandas as pd -from dotenv import load_dotenv from bigdata_client.models.search import DocumentType +from dotenv import load_dotenv -from bigdata_research_tools.search.search import run_search from bigdata_research_tools.search.query_builder import ( - build_batched_query, EntitiesToSearch, - create_date_ranges + build_batched_query, + create_date_ranges, ) +from bigdata_research_tools.search.search import run_search # Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(levelname)s: %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger(__name__) def main(): """Basic example of run_search usage.""" - + # Load environment variables print(f"Environment variables loaded: {load_dotenv()}") - + # Define what entities we want to search for entities = EntitiesToSearch( companies=["Apple Inc", "Google", "Microsoft Corp"], topic=["earnings", "financial results"], - concepts=["revenue growth", "profit margins"] + concepts=["revenue growth", "profit margins"], ) - + # Define search sentences - sentences = [ - "quarterly earnings performance", - "revenue growth and profitability" - ] - + sentences = ["quarterly earnings performance", "revenue growth and profitability"] + logger.info("Building search queries...") - + # Build queries using the query builder queries = build_batched_query( sentences=sentences, @@ -59,67 +54,69 @@ def main(): control_entities=None, sources=None, batch_size=5, - fiscal_year=None, # Not needed for news + fiscal_year=None, # Not needed for news scope=DocumentType.NEWS, - custom_batches=None # Use automatic batching + custom_batches=None, # Use automatic batching ) - + logger.info(f"Generated {len(queries)} search queries") - + # Create date ranges for the search date_ranges = create_date_ranges("2024-10-01", "2024-12-31", "M") # Monthly logger.info(f"Searching across {len(date_ranges)} time periods") - + # Execute the search logger.info("Executing search...") - + search_results = run_search( queries=queries, date_ranges=date_ranges, scope=DocumentType.NEWS, - limit=8, # 8 documents per query - only_results=True # Just return the documents + limit=8, # 8 documents per query + only_results=True, # Just return the documents ) - + # Process the results all_documents = [] - + for result_batch in search_results: for doc in result_batch: # Convert timezone-aware datetime to timezone-naive for Excel compatibility - timestamp_naive = doc.timestamp.replace(tzinfo=None) if doc.timestamp else None - + timestamp_naive = ( + doc.timestamp.replace(tzinfo=None) if doc.timestamp else None + ) + doc_data = { - 'timestamp': timestamp_naive, - 'headline': doc.headline, - 'source': doc.source.name if doc.source else 'Unknown', - 'doc_id': doc.id if hasattr(doc, 'id') else 'N/A' + "timestamp": timestamp_naive, + "headline": doc.headline, + "source": doc.source.name if doc.source else "Unknown", + "doc_id": doc.id if hasattr(doc, "id") else "N/A", } all_documents.append(doc_data) - + # Convert to DataFrame for analysis results_df = pd.DataFrame(all_documents) - + # Display results if not results_df.empty: logger.info(f"Found {len(results_df)} documents total") - + # Show source distribution - source_counts = results_df['source'].value_counts() + source_counts = results_df["source"].value_counts() logger.info("Documents by source:") for source, count in source_counts.head(5).items(): logger.info(f" {source}: {count} documents") - + # Show some sample headlines logger.info("Sample headlines:") - for headline in results_df['headline'].head(3): + for headline in results_df["headline"].head(3): logger.info(f" - {headline}") - + # Export to Excel output_file = "run_search_results.xlsx" results_df.to_excel(output_file, index=False) logger.info(f"Results exported to {output_file}") - + else: logger.warning("No documents found. Try different search criteria.") diff --git a/examples/search_by_companies.py b/examples/search_by_companies.py index e765662..9a0e960 100644 --- a/examples/search_by_companies.py +++ b/examples/search_by_companies.py @@ -11,29 +11,27 @@ """ import logging -from dotenv import load_dotenv + from bigdata_client.models.search import DocumentType +from dotenv import load_dotenv from bigdata_research_tools.client import bigdata_connection from bigdata_research_tools.search.screener_search import search_by_companies # Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(levelname)s: %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logger = logging.getLogger(__name__) def main(): """Basic example of search_by_companies usage.""" - + # Load environment variables print(f"Environment variables loaded: {load_dotenv()}") - + # Connect to Bigdata API bigdata = bigdata_connection() - + # Get some companies to search for using tickers (more reliable) tickers = ["AAPL", "MSFT", "TSLA"] companies = [] @@ -46,23 +44,23 @@ def main(): if results: companies.extend(results) logger.info(f"Found: {results[0].name} ({ticker})") - + if not companies: logger.error("No companies found. Check ticker symbols.") return - + logger.info(f"Searching for recent news across {len(companies)} companies...") - + # Search for documents mentioning these companies (using working pattern) try: results_df = search_by_companies( companies=companies, - sentences=sentences, + sentences=sentences, start_date="2024-01-01", - end_date="2024-06-30", # Use a shorter, more recent range - scope=DocumentType.NEWS, # Search news articles - document_limit=20, # More documents per query - batch_size=5 # Process 5 companies at a time + end_date="2024-06-30", # Use a shorter, more recent range + scope=DocumentType.NEWS, # Search news articles + document_limit=20, # More documents per query + batch_size=5, # Process 5 companies at a time ) except ValueError as e: if "No rows to process" in str(e): @@ -74,37 +72,37 @@ def main(): return else: raise - + # Display results if not results_df.empty: logger.info(f"Found {len(results_df)} relevant documents") - + # Show breakdown by company - company_counts = results_df['entity_name'].value_counts() + company_counts = results_df["entity_name"].value_counts() logger.info("Documents by company:") for company, count in company_counts.items(): logger.info(f" {company}: {count} documents") - + # Show some sample headlines logger.info("Sample headlines:") - for headline in results_df['headline'].head(3): + for headline in results_df["headline"].head(3): logger.info(f" - {headline}") - + # Export to Excel (fix timezone issues) output_file = "search_by_companies_results.xlsx" - + # Create a copy for Excel export with timezone-naive timestamps excel_df = results_df.copy() - + # Convert any timezone-aware datetime columns to timezone-naive for Excel compatibility for col in excel_df.columns: - if excel_df[col].dtype.name.startswith('datetime'): + if excel_df[col].dtype.name.startswith("datetime"): if excel_df[col].dt.tz is not None: excel_df[col] = excel_df[col].dt.tz_localize(None) - + excel_df.to_excel(output_file, index=False) logger.info(f"Results exported to {output_file}") - + else: logger.warning("No documents found. Try different search terms or date range.") diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 40f5e80..f1426be 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -2,17 +2,17 @@ from bigdata_client.models.search import DocumentType -from bigdata_research_tools.utils.observer import Observer, OberserverNotification from bigdata_research_tools.client import bigdata_connection -from bigdata_research_tools.workflows import ThematicScreener +from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.visuals import create_thematic_exposure_dashboard +from bigdata_research_tools.workflows import ThematicScreener + def thematic_screener_example( - theme_name: str, + theme_name: str, llm_model: str = "openai::gpt-4o-mini", export_path: str = "thematic_screener_results.xlsx", ) -> Dict: - GRID_watchlist_ID = "a60c351a-1822-4a88-8c45-a4e78abd979a" bigdata = bigdata_connection() @@ -28,7 +28,7 @@ def thematic_screener_example( start_date="2024-01-01", end_date="2024-11-15", document_type=DocumentType.TRANSCRIPTS, - fiscal_year=2024 + fiscal_year=2024, ) class PrintObserver(Observer): @@ -41,7 +41,6 @@ def update(self, message: OberserverNotification): if __name__ == "__main__": - import logging from dotenv import load_dotenv @@ -55,13 +54,15 @@ def update(self, message: OberserverNotification): x = thematic_screener_example("Chip Manufacturers") custom_config = { - 'company_column': 'Company', - 'heatmap_colorscale': 'Plasma', - 'dashboard_height': 1800, - 'top_themes_count': 5, - 'main_title': 'Custom Thematic Analysis Dashboard' + "company_column": "Company", + "heatmap_colorscale": "Plasma", + "dashboard_height": 1800, + "top_themes_count": 5, + "main_title": "Custom Thematic Analysis Dashboard", } df = x["df_company"] - fig, industry_fig = create_thematic_exposure_dashboard(df, n_companies=15, config=custom_config) - fig.show(renderer="browser") # Shows the main dashboard - industry_fig.show(renderer="browser") # Shows the industry analysis \ No newline at end of file + fig, industry_fig = create_thematic_exposure_dashboard( + df, n_companies=15, config=custom_config + ) + fig.show(renderer="browser") # Shows the main dashboard + industry_fig.show(renderer="browser") # Shows the industry analysis diff --git a/tutorial/tutorial_notebook.ipynb b/tutorial/tutorial_notebook.ipynb index e3fe7a7..ba114a4 100644 --- a/tutorial/tutorial_notebook.ipynb +++ b/tutorial/tutorial_notebook.ipynb @@ -46,38 +46,28 @@ "source": [ "# Import core libraries\n", "import logging\n", + "\n", "import pandas as pd\n", - "import numpy as np\n", - "from typing import List, Dict\n", - "from dotenv import load_dotenv\n", "\n", "# Bigdata client imports\n", - "from bigdata_client.models.search import DocumentType, SortBy\n", - "from bigdata_client.daterange import AbsoluteDateRange\n", + "from bigdata_client.models.search import DocumentType\n", + "from dotenv import load_dotenv\n", "\n", "# Bigdata research tools imports\n", "from bigdata_research_tools.client import bigdata_connection\n", - "from bigdata_research_tools.search.screener_search import search_by_companies\n", - "from bigdata_research_tools.search.search import run_search\n", "from bigdata_research_tools.search.query_builder import (\n", - " build_batched_query,\n", " EntitiesToSearch,\n", - " create_date_ranges\n", - ")\n", - "from bigdata_research_tools.workflows import NarrativeMiner, ThematicScreener\n", - "from bigdata_research_tools.workflows.risk_analyzer import RiskAnalyzer\n", - "from bigdata_research_tools.portfolio.portfolio_constructor import (\n", - " PortfolioConstructor, WeightMethod\n", + " build_batched_query,\n", + " create_date_ranges,\n", ")\n", + "from bigdata_research_tools.search.screener_search import search_by_companies\n", + "from bigdata_research_tools.search.search import run_search\n", "\n", "# Configure clean logging for notebook\n", - "logging.basicConfig(\n", - " level=logging.INFO,\n", - " format='%(levelname)s: %(message)s'\n", - ")\n", + "logging.basicConfig(level=logging.INFO, format=\"%(levelname)s: %(message)s\")\n", "logger = logging.getLogger(__name__)\n", "\n", - "print(\"✅ Libraries imported successfully\")\n" + "print(\"✅ Libraries imported successfully\")" ] }, { @@ -188,30 +178,30 @@ "source": [ "def demo_search_by_companies():\n", " \"\"\"Demonstrate search_by_companies functionality.\"\"\"\n", - " \n", + "\n", " print(\"🔍 Search by Companies Example\")\n", " print(\"=\" * 40)\n", - " \n", + "\n", " # Get companies using ticker symbols (reliable method)\n", " tickers = [\"AAPL\", \"MSFT\", \"TSLA\"]\n", " companies = []\n", - " \n", + "\n", " print(\"Finding companies using tickers...\")\n", " for ticker in tickers:\n", " results = bigdata.knowledge_graph.autosuggest(ticker, limit=1)\n", " if results:\n", " companies.extend(results)\n", " print(f\" ✅ Found: {results[0].name} ({ticker})\")\n", - " \n", + "\n", " if not companies:\n", " print(\"❌ No companies found\")\n", " return None\n", - " \n", + "\n", " # Define search sentences\n", " sentences = [\"AI is transforming business\", \"Cloud adoption is accelerating\"]\n", - " \n", + "\n", " print(f\"\\n📊 Searching for content across {len(companies)} companies...\")\n", - " \n", + "\n", " try:\n", " # Search for documents\n", " results_df = search_by_companies(\n", @@ -221,29 +211,29 @@ " end_date=\"2024-06-30\",\n", " scope=DocumentType.NEWS,\n", " document_limit=20,\n", - " batch_size=5\n", + " batch_size=5,\n", " )\n", - " \n", + "\n", " # Display results\n", " if not results_df.empty:\n", " print(f\"\\n✅ Found {len(results_df)} relevant documents\")\n", - " \n", + "\n", " # Company breakdown\n", - " company_counts = results_df['entity_name'].value_counts()\n", + " company_counts = results_df[\"entity_name\"].value_counts()\n", " print(\"\\n📈 Documents by company:\")\n", " for company, count in company_counts.items():\n", " print(f\" {company}: {count} documents\")\n", - " \n", + "\n", " # Sample headlines\n", " print(\"\\n📰 Sample headlines:\")\n", - " for headline in results_df['headline'].head(3):\n", + " for headline in results_df[\"headline\"].head(3):\n", " print(f\" • {headline}\")\n", - " \n", + "\n", " return results_df\n", " else:\n", " print(\"⚠️ No documents found\")\n", " return None\n", - " \n", + "\n", " except ValueError as e:\n", " if \"No rows to process\" in str(e):\n", " print(\"⚠️ No documents found matching the search criteria\")\n", @@ -252,8 +242,9 @@ " else:\n", " raise\n", "\n", + "\n", "# Run the example\n", - "search_results = demo_search_by_companies()\n" + "search_results = demo_search_by_companies()" ] }, { @@ -645,7 +636,7 @@ " print(\"📊 Results DataFrame:\")\n", " display(search_results.head(10))\n", "else:\n", - " print(\"No results to display\")\n" + " print(\"No results to display\")" ] }, { @@ -707,25 +698,22 @@ "source": [ "def demo_run_search():\n", " \"\"\"Demonstrate run_search with custom query building.\"\"\"\n", - " \n", + "\n", " print(\"🔧 Custom Query Search Example\")\n", " print(\"=\" * 40)\n", - " \n", + "\n", " # Define entities to search for\n", " entities = EntitiesToSearch(\n", " companies=[\"Apple Inc\", \"Google\", \"Microsoft Corp\"],\n", " topic=[\"earnings\", \"financial results\"],\n", - " concepts=[\"revenue growth\", \"profit margins\"]\n", + " concepts=[\"revenue growth\", \"profit margins\"],\n", " )\n", - " \n", + "\n", " # Define search sentences\n", - " sentences = [\n", - " \"quarterly earnings performance\",\n", - " \"revenue growth and profitability\"\n", - " ]\n", - " \n", + " sentences = [\"quarterly earnings performance\", \"revenue growth and profitability\"]\n", + "\n", " print(\"🔨 Building search queries...\")\n", - " \n", + "\n", " # Build queries\n", " queries = build_batched_query(\n", " sentences=sentences,\n", @@ -736,64 +724,67 @@ " batch_size=5,\n", " fiscal_year=None,\n", " scope=DocumentType.NEWS,\n", - " custom_batches=None\n", + " custom_batches=None,\n", " )\n", - " \n", + "\n", " print(f\"✅ Generated {len(queries)} search queries\")\n", - " \n", + "\n", " # Create date ranges\n", " date_ranges = create_date_ranges(\"2024-10-01\", \"2024-12-31\", \"M\")\n", " print(f\"📅 Searching across {len(date_ranges)} time periods\")\n", - " \n", + "\n", " # Execute search\n", " print(\"🔍 Executing search...\")\n", - " \n", + "\n", " search_results = run_search(\n", " queries=queries,\n", " date_ranges=date_ranges,\n", " scope=DocumentType.NEWS,\n", " limit=8,\n", - " only_results=True\n", + " only_results=True,\n", " )\n", - " \n", + "\n", " # Process results\n", " all_documents = []\n", - " \n", + "\n", " for result_batch in search_results:\n", " for doc in result_batch:\n", " # Convert timezone-aware datetime for compatibility\n", - " timestamp_naive = doc.timestamp.replace(tzinfo=None) if doc.timestamp else None\n", - " \n", + " timestamp_naive = (\n", + " doc.timestamp.replace(tzinfo=None) if doc.timestamp else None\n", + " )\n", + "\n", " doc_data = {\n", - " 'timestamp': timestamp_naive,\n", - " 'headline': doc.headline,\n", - " 'source': doc.source.name if doc.source else 'Unknown',\n", - " 'doc_id': doc.id if hasattr(doc, 'id') else 'N/A'\n", + " \"timestamp\": timestamp_naive,\n", + " \"headline\": doc.headline,\n", + " \"source\": doc.source.name if doc.source else \"Unknown\",\n", + " \"doc_id\": doc.id if hasattr(doc, \"id\") else \"N/A\",\n", " }\n", " all_documents.append(doc_data)\n", - " \n", + "\n", " # Convert to DataFrame\n", " results_df = pd.DataFrame(all_documents)\n", - " \n", + "\n", " if not results_df.empty:\n", " print(f\"\\n✅ Found {len(results_df)} documents total\")\n", - " \n", + "\n", " # Source distribution\n", - " source_counts = results_df['source'].value_counts()\n", + " source_counts = results_df[\"source\"].value_counts()\n", " print(\"\\n📊 Documents by source:\")\n", " for source, count in source_counts.head(5).items():\n", " print(f\" {source}: {count} documents\")\n", - " \n", + "\n", " # Sample headlines\n", " print(\"\\n📰 Sample headlines:\")\n", - " for headline in results_df['headline'].head(3):\n", + " for headline in results_df[\"headline\"].head(3):\n", " print(f\" • {headline}\")\n", - " \n", + "\n", " return results_df\n", " else:\n", " print(\"⚠️ No documents found\")\n", " return None\n", "\n", + "\n", "# Run the example\n", "custom_search_results = demo_run_search()" ] From 82f5ee0b1de386e0cd9edf98e082e36260b9baf8 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Fri, 3 Oct 2025 08:59:29 +0000 Subject: [PATCH 19/82] adding entity ticker and id to output df --- src/bigdata_research_tools/labeler/narrative_labeler.py | 6 +++++- src/bigdata_research_tools/search/narrative_search.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 6527098..14c4098 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -134,10 +134,12 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "entity": "Entity", "country_code": "Country Code", "entity_type": "Entity Type", + "entity_id": "Entity ID", + "entity_ticker": "Entity Ticker", } ) - df = df.explode(["Entity", "Entity Type", "Country Code"], ignore_index=True) + df = df.explode(["Entity", "Entity Type", "Country Code", "Entity ID", "Entity Ticker"], ignore_index=True) # Select and order columns export_columns = [ @@ -150,6 +152,8 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "Motivation", "Label", "Entity", + "Entity ID", + "Entity Ticker", "Country Code", "Entity Type", ] diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index a97fa05..09bf440 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -158,7 +158,9 @@ def _process_narrative_search( "text": chunk.text, "entity": [entity["name"] for entity in chunk_entities], "country_code": [entity["country"] for entity in chunk_entities], - "entity_type": [entity["entity_type"] for entity in chunk_entities], + "entity_type": [entity["entity_type"] for entity in chunk_entities], + "entity_id": [entity["key"] for entity in chunk_entities], + "entity_ticker": [entity.get("ticker", '') for entity in chunk_entities], } ) From 1ccbd55b1914bc0e98e72f0e99d340f8e592ecb6 Mon Sep 17 00:00:00 2001 From: jaldana Date: Fri, 3 Oct 2025 11:46:35 +0200 Subject: [PATCH 20/82] Migrate typing to modern types Start solving typing issues --- CHANGELOG.md | 3 + Makefile | 2 +- README.md | 2 +- examples/narrative_miner.py | 4 +- examples/risk_analyzer.py | 4 +- examples/run_search.py | 4 +- examples/thematic_screener.py | 4 +- pyproject.toml | 2 +- src/bigdata_research_tools/client.py | 9 +- src/bigdata_research_tools/excel.py | 5 +- src/bigdata_research_tools/labeler/labeler.py | 17 +- .../labeler/narrative_labeler.py | 7 +- .../labeler/risk_labeler.py | 12 +- .../labeler/screener_labeler.py | 7 +- src/bigdata_research_tools/llm/base.py | 12 +- src/bigdata_research_tools/llm/bedrock.py | 12 +- src/bigdata_research_tools/llm/utils.py | 20 +- .../portfolio/motivation.py | 14 +- .../portfolio/portfolio_constructor.py | 9 +- src/bigdata_research_tools/prompts/labeler.py | 15 +- .../search/narrative_search.py | 23 +- .../search/query_builder.py | 98 ++--- .../search/screener_search.py | 40 +- src/bigdata_research_tools/search/search.py | 73 ++-- .../search/search_utils.py | 21 +- src/bigdata_research_tools/tracing.py | 22 +- src/bigdata_research_tools/tree.py | 41 +- .../{utils.py => utils/files.py} | 3 +- .../visuals/risk_visuals.py | 10 +- .../visuals/thematic_visuals.py | 10 +- src/bigdata_research_tools/visuals/visuals.py | 20 +- .../workflows/narrative_miner.py | 13 +- .../workflows/risk_analyzer.py | 29 +- .../workflows/thematic_screener.py | 15 +- src/bigdata_research_tools/workflows/utils.py | 4 +- tests/test_llm/test_utils.py | 3 +- tutorial/tutorial_notebook.ipynb | 6 +- uv.lock | 364 +----------------- 38 files changed, 322 insertions(+), 637 deletions(-) rename src/bigdata_research_tools/{utils.py => utils/files.py} (86%) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0e1714..1214cfa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,9 @@ Preparation for a first stable release. - Changed build system to use `uv_build` instead of `setuptools` to avoid issues with package data inclusion. - Fix duplicate dependencies in main vs optional dependencies. `openai` is now only optional while `graphviz`, `openpyxl` and `Pillow` is now only in main dependencies. +### Removed +- Removed support for Python 3.9 as it has reached its end of life. The minimum supported version is now Python 3.10. + ## [0.20.1] - 2025-09-16 ### Fix diff --git a/Makefile b/Makefile index 637da19..ecf555f 100644 --- a/Makefile +++ b/Makefile @@ -13,4 +13,4 @@ format: @uvx ruff format src/bigdata_research_tools/ examples/ tutorial/ tests/ type-check: - @uvx ty check src/bigdata_research_tools/ examples/ tutorial/ tests/ \ No newline at end of file + @uvx ty check src/bigdata_research_tools/ # examples/ tutorial/ tests/ \ No newline at end of file diff --git a/README.md b/README.md index a6c51dd..b4ce118 100644 --- a/README.md +++ b/README.md @@ -477,7 +477,7 @@ results = run_search( | Parameter | Type | Default | Description | |-----------|------|---------|-------------| | `queries` | `List[QueryComponent]` | | List of search queries | -| `date_ranges` | `DATE_RANGE_TYPE` | `None` | Date range specifications | +| `date_ranges` | `INPUT_DATE_RANGE` | | Date range specifications | | `limit` | `int` | `10` | Results per query | | `only_results` | `bool` | `True` | Return format control | | `scope` | `DocumentType` | `ALL` | Document type filter | diff --git a/examples/narrative_miner.py b/examples/narrative_miner.py index c4c7818..9a676dc 100644 --- a/examples/narrative_miner.py +++ b/examples/narrative_miner.py @@ -1,12 +1,10 @@ -from typing import Dict - from bigdata_client.models.search import DocumentType from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.workflows import NarrativeMiner -def narrative_miner_example(export_path: str = "narrative_miner_sample.xlsx") -> Dict: +def narrative_miner_example(export_path: str = "narrative_miner_sample.xlsx") -> dict: narrative_miner = NarrativeMiner( narrative_sentences=[ "Supervised Learning Techniques", diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index dead2e1..2af562a 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -1,5 +1,3 @@ -from typing import Dict - from bigdata_client.models.search import DocumentType from bigdata_research_tools.client import bigdata_connection @@ -14,7 +12,7 @@ def risk_analyzer_example( control_entities: dict = {"place": ["Canada", "Mexico"]}, focus: str = "", export_path: str = "risk_analyzer_results.xlsx", -) -> Dict: +) -> dict: GRID_watchlist_ID = "44118802-9104-4265-b97a-2e6d88d74893" bigdata = bigdata_connection() diff --git a/examples/run_search.py b/examples/run_search.py index 659a779..d142e6f 100644 --- a/examples/run_search.py +++ b/examples/run_search.py @@ -62,7 +62,9 @@ def main(): logger.info(f"Generated {len(queries)} search queries") # Create date ranges for the search - date_ranges = create_date_ranges("2024-10-01", "2024-12-31", "M") # Monthly + date_ranges = create_date_ranges( + "2024-10-01", "2024-12-31", "M", return_datetime=True + ) # Monthly logger.info(f"Searching across {len(date_ranges)} time periods") # Execute the search diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index f1426be..9ac06d0 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -1,5 +1,3 @@ -from typing import Dict - from bigdata_client.models.search import DocumentType from bigdata_research_tools.client import bigdata_connection @@ -12,7 +10,7 @@ def thematic_screener_example( theme_name: str, llm_model: str = "openai::gpt-4o-mini", export_path: str = "thematic_screener_results.xlsx", -) -> Dict: +) -> dict: GRID_watchlist_ID = "a60c351a-1822-4a88-8c45-a4e78abd979a" bigdata = bigdata_connection() diff --git a/pyproject.toml b/pyproject.toml index 1382174..6afd273 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ version = "1.0.0-beta-0" description = "Bigdata.com API High-Efficiency Tools at Scale" readme = "README.md" authors = [{ name = "Bigdata.com", email = "support@ravenpack.com" }] -requires-python = ">=3.9,<4.0" +requires-python = ">=3.10,<4.0" dependencies = [ "bigdata-client>=2.15.0", "pandas>=2.2.3,<3.0.0", diff --git a/src/bigdata_research_tools/client.py b/src/bigdata_research_tools/client.py index e7da058..cbbddc1 100644 --- a/src/bigdata_research_tools/client.py +++ b/src/bigdata_research_tools/client.py @@ -1,19 +1,18 @@ from logging import Logger, getLogger from os import environ from time import sleep -from typing import Optional from bigdata_client import Bigdata logger: Logger = getLogger(__name__) -_bigdata_client: Optional[Bigdata] = None +_bigdata_client: Bigdata | None = None def init_bigdata_client( - user: Optional[str] = None, - password: Optional[str] = None, - api_key: Optional[str] = None, + user: str | None = None, + password: str | None = None, + api_key: str | None = None, retries: int = 5, wait_time: int = 3, ) -> Bigdata: diff --git a/src/bigdata_research_tools/excel.py b/src/bigdata_research_tools/excel.py index e80a347..fd77f82 100644 --- a/src/bigdata_research_tools/excel.py +++ b/src/bigdata_research_tools/excel.py @@ -1,9 +1,8 @@ from logging import Logger, getLogger -from typing import List, Tuple import pandas as pd -from bigdata_research_tools.utils import ( +from bigdata_research_tools.utils.files import ( check_libraries_installed, get_resources_path, ) @@ -52,7 +51,7 @@ def __init__( def save_workbook( self, - df_args: List[Tuple[pd.DataFrame, str, Tuple[int, int]]], + df_args: list[tuple[pd.DataFrame, str, tuple[int, int]]], workbook_path: str, ) -> None: """Save DataFrames to Excel workbook.""" diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 8955bec..9e170c2 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -2,7 +2,7 @@ from itertools import zip_longest from json import JSONDecodeError, dumps, loads from logging import Logger, getLogger -from typing import Any, Dict, List, Optional +from typing import Any from json_repair import repair_json from pandas import DataFrame @@ -39,7 +39,7 @@ def __init__( self.unknown_label = unknown_label def _deserialize_label_responses( - self, responses: List[Dict[str, Any]] + self, responses: list[dict[str, Any]] ) -> DataFrame: """ Deserialize labeling responses into a DataFrame. @@ -88,8 +88,8 @@ def _deserialize_label_responses( return df_labels def _run_labeling_prompts( - self, prompts: List[str], system_prompt: str, max_workers: int = 100 - ) -> List: + self, prompts: list[str], system_prompt: str, max_workers: int = 100 + ) -> list: """ Get the labels from the prompts. @@ -125,9 +125,9 @@ def _run_labeling_prompts( def get_prompts_for_labeler( - texts: List[str], - textsconfig: Optional[List[Dict[str, Any]]] = [], -) -> List[str]: + texts: list[str], + textsconfig: list[dict[str, Any]] | None = None, +) -> list[str]: """ Generate a list of user messages for each text to be labelled by the labeling system. @@ -141,6 +141,7 @@ def get_prompts_for_labeler( Returns: A list of prompts for the labeling system. """ + textsconfig = textsconfig or [] return [ dumps({"sentence_id": i, **config, "text": text}) for i, (config, text) in enumerate( @@ -149,7 +150,7 @@ def get_prompts_for_labeler( ] -def parse_labeling_response(response: str) -> Dict: +def parse_labeling_response(response: str) -> dict: """ Parse the response from the LLM model used for labeling. diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 0e014e3..8db6cdc 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -1,5 +1,4 @@ from logging import Logger, getLogger -from typing import List, Optional from pandas import DataFrame @@ -19,7 +18,7 @@ class NarrativeLabeler(Labeler): def __init__( self, llm_model: str, - label_prompt: Optional[str] = None, + label_prompt: str | None = None, unknown_label: str = "unclear", temperature: float = 0, ): @@ -38,8 +37,8 @@ def __init__( def get_labels( self, - theme_labels: List[str], - texts: List[str], + theme_labels: list[str], + texts: list[str], max_workers: int = 50, ) -> DataFrame: """ diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 4738b26..e4211e5 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -1,5 +1,5 @@ from logging import Logger, getLogger -from typing import Any, Dict, List, Optional +from typing import Any from pandas import DataFrame, Series @@ -23,7 +23,7 @@ class RiskLabeler(Labeler): def __init__( self, llm_model: str, - label_prompt: Optional[str] = None, + label_prompt: str | None = None, # TODO (cpinto, 2025.02.07) This value is also in the prompt used. # Changing it here would break the process. unknown_label: str = "unclear", @@ -44,10 +44,10 @@ def __init__( def get_labels( self, main_theme: str, - labels: List[str], - texts: List[str], + labels: list[str], + texts: list[str], max_workers: int = 50, - textsconfig: Optional[List[Dict[str, Any]]] = [], + textsconfig: list[dict[str, Any]] | None = None, ) -> DataFrame: """ Process thematic labels for texts. @@ -81,7 +81,7 @@ def get_labels( return self._deserialize_label_responses(responses) def post_process_dataframe( - self, df: DataFrame, extra_fields: dict, extra_columns: List[str] + self, df: DataFrame, extra_fields: dict, extra_columns: list[str] ) -> DataFrame: """ Post-process the labeled DataFrame. diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 65d11a6..7e06b7c 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -1,5 +1,4 @@ from logging import Logger, getLogger -from typing import List, Optional from pandas import DataFrame, Series @@ -23,7 +22,7 @@ class ScreenerLabeler(Labeler): def __init__( self, llm_model: str, - label_prompt: Optional[str] = None, + label_prompt: str | None = None, unknown_label: str = "unclear", temperature: float = 0, ): @@ -42,8 +41,8 @@ def __init__( def get_labels( self, main_theme: str, - labels: List[str], - texts: List[str], + labels: list[str], + texts: list[str], max_workers: int = 50, ) -> DataFrame: """ diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index ed69df2..b5ff511 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -9,7 +9,7 @@ class AsyncLLMProvider(ABC): - def __init__(self, model: str = None): + def __init__(self, model: str | None = None): self.model = model @abstractmethod @@ -57,9 +57,9 @@ async def get_stream_response( class AsyncLLMEngine: - def __init__(self, model: str = None): + def __init__(self, model: str | None = None): if model is None: - model = os.getenv("BIGDATA_RESEARCH_DEFAULT_LLM") + model = os.getenv("BIGDATA_RESEARCH_DEFAULT_LLM", "openai::gpt-4o-mini") source = "Environment" else: source = "Argument" @@ -136,7 +136,7 @@ async def get_tools_response( class LLMProvider(ABC): - def __init__(self, model: str = None): + def __init__(self, model: str | None = None): self.model = model @abstractmethod @@ -184,9 +184,9 @@ def get_stream_response( class LLMEngine: - def __init__(self, model: str = None): + def __init__(self, model: str | None = None): if model is None: - model = os.getenv("BIGDATA_RESEARCH_DEFAULT_LLM") + model = os.getenv("BIGDATA_RESEARCH_DEFAULT_LLM", "openai::gpt-4o-mini") source = "Environment" else: source = "Argument" diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index aef06d5..762571d 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -15,10 +15,10 @@ class AsyncBedrockProvider(AsyncLLMProvider): # Asynchronous boto3 is tricky, for now use the synchronous client, this will not # provide the benefits from async, but will at least let our workflows run for now - def __init__(self, model: str, region: str = None): + def __init__(self, model: str, region: str | None = None): super().__init__(model) - self.region: str = region - self._client: Session = None + self.region: str | None = region + self._client: Session | None = None self.configure_bedrock_client() def configure_bedrock_client(self) -> None: @@ -176,10 +176,10 @@ async def get_stream_response( class BedrockProvider(LLMProvider): - def __init__(self, model: str, region: str = None): + def __init__(self, model: str, region: str | None = None): super().__init__(model) - self.region: str = region - self._client: Session = None + self.region: str | None = region + self._client: Session | None = None self.configure_bedrock_client() def configure_bedrock_client(self) -> None: diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 134313f..33f0cbd 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -2,7 +2,7 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from logging import Logger, getLogger -from typing import List, Tuple +from typing import Any, Coroutine from openai import APITimeoutError, RateLimitError from tqdm import tqdm @@ -15,11 +15,11 @@ # https://platform.openai.com/docs/guides/batch def run_concurrent_prompts( llm_engine: AsyncLLMEngine, - prompts: List[str], + prompts: list[str], system_prompt: str, max_workers: int = 30, **kwargs, -) -> List[str]: +) -> list[str]: """ Run the LLM on the received prompts, concurrently. @@ -51,7 +51,7 @@ async def _fetch_with_semaphore( system_prompt: str, prompt: str, **kwargs, -) -> Tuple[int, str]: +) -> tuple[int, str]: """ Fetch the response from the LLM engine with a semaphore. @@ -86,10 +86,12 @@ async def _fetch_with_semaphore( return idx, "" -async def _run_with_progress_bar(tasks) -> List: +async def _run_with_progress_bar( + tasks: list[Coroutine[Any, Any, tuple[int, str]]], +) -> list[str]: """Run asyncio tasks with a tqdm progress bar.""" # Pre-allocate a list for results to preserve order - results = [None] * len(tasks) + results = [""] * len(tasks) with tqdm(total=len(tasks), desc="Querying an LLM...") as pbar: for coro in asyncio.as_completed(tasks): idx, result = await coro @@ -103,11 +105,11 @@ async def _run_with_progress_bar(tasks) -> List: # Added function to run synchronous LLM calls in parallel using threads. def run_parallel_prompts( llm_engine, - prompts: List[str], + prompts: list[str], system_prompt: str, max_workers: int = 30, **kwargs, -) -> List[str]: +) -> list[str]: """ Run the LLM on the received prompts concurrently using threads. @@ -139,7 +141,7 @@ def fetch(idx, prompt): logger.error(f"Failed to get response for prompt: {prompt}") return idx, "" - results = [None] * len(prompts) + results = [""] * len(prompts) with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [ executor.submit(fetch, idx, prompt) for idx, prompt in enumerate(prompts) diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index f5aab4e..1be5e69 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Any, Dict, Tuple +from typing import Any import pandas as pd from tqdm import tqdm @@ -13,7 +13,9 @@ class Motivation: A class for generating motivation statements for companies based on thematic analysis. """ - def __init__(self, model: str = None, model_config: Dict[str, Any] = None): + def __init__( + self, model: str | None = None, model_config: dict[str, Any] | None = None + ): """ Initialize the Motivation class. @@ -25,7 +27,7 @@ def __init__(self, model: str = None, model_config: Dict[str, Any] = None): self.llm_engine = LLMEngine(model=model) @staticmethod - def _get_default_model_config() -> Dict[str, Any]: + def _get_default_model_config() -> dict[str, Any]: """Get default LLM model configuration.""" return { "temperature": 0, @@ -36,7 +38,7 @@ def _get_default_model_config() -> Dict[str, Any]: "seed": 42, } - def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> Dict: + def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict: """ Group quotes and labels by company. @@ -120,7 +122,7 @@ def query_llm_for_motivation(self, prompt: str) -> str: return motivation.strip() def generate_company_motivations( - self, df: pd.DataFrame, theme_name: str, word_range: Tuple[int, int] + self, df: pd.DataFrame, theme_name: str, word_range: tuple[int, int] ) -> pd.DataFrame: """ Generates motivation statement with specified verbosity for companies in a thematic watchlist. @@ -167,6 +169,6 @@ def generate_company_motivations( .reset_index(drop=True) ) - def update_model_config(self, config: Dict[str, Any]): + def update_model_config(self, config: dict[str, Any]): """Update the model configuration.""" self.model_config.update(config) diff --git a/src/bigdata_research_tools/portfolio/portfolio_constructor.py b/src/bigdata_research_tools/portfolio/portfolio_constructor.py index e660d46..daff109 100644 --- a/src/bigdata_research_tools/portfolio/portfolio_constructor.py +++ b/src/bigdata_research_tools/portfolio/portfolio_constructor.py @@ -42,8 +42,8 @@ def construct_portfolio( df: pd.DataFrame, score_col: str, balance_col: str, - weight_col: str = None, - size: int = None, + weight_col: str | None = None, + size: int | None = None, max_position_weight: float = 0.05, max_category_weight: float = 0.15, weight_method: WeightMethod = WeightMethod.EQUAL, @@ -149,7 +149,10 @@ def _balance_by_category( return df[mask].copy() def _calculate_weights( - self, portfolio: pd.DataFrame, weight_method: WeightMethod, weight_col: str + self, + portfolio: pd.DataFrame, + weight_method: WeightMethod, + weight_col: str | None, ) -> pd.DataFrame: """ Calculate initial weights based on the specified weighting method. diff --git a/src/bigdata_research_tools/prompts/labeler.py b/src/bigdata_research_tools/prompts/labeler.py index a15013d..f7ae4f0 100644 --- a/src/bigdata_research_tools/prompts/labeler.py +++ b/src/bigdata_research_tools/prompts/labeler.py @@ -1,5 +1,4 @@ from os import environ -from typing import Dict, List def get_other_entity_placeholder() -> str: @@ -87,7 +86,7 @@ def get_target_entity_placeholder() -> str: - Ensure that all strings in the JSON are correctly formatted with proper quotes. """ -patent_prompts: Dict[str, str] = { +patent_prompts: dict[str, str] = { "filing": """ You are analyzing text to detect patent filing activities by "Target Company". Determine if the text describes a legitimate patent filing. @@ -127,7 +126,7 @@ def get_target_entity_placeholder() -> str: } -def get_narrative_system_prompt(theme_labels: List[str]) -> str: +def get_narrative_system_prompt(theme_labels: list[str]) -> str: """Generate a system prompt for labeling sentences with narrative labels.""" return narrative_system_prompt_template.format( theme_labels=theme_labels, @@ -135,7 +134,7 @@ def get_narrative_system_prompt(theme_labels: List[str]) -> str: def get_screener_system_prompt( - main_theme: str, label_summaries: List[str], unknown_label: str + main_theme: str, label_summaries: list[str], unknown_label: str ) -> str: """Generate a system prompt for labeling sentences with thematic labels.""" return screener_system_prompt_template.format( @@ -166,7 +165,7 @@ def get_screener_system_prompt( - Examine whether the text explicitly mentions the Risk Scenario "{main_theme}" or any of its core components. - Ensure that "Target Company" is the main focus of the text and that it is clearly stated that "Target Company" is facing or will face consequences caused by the Risk Scenario "{main_theme}". -- Assess if there are DIRECT consequences on "Target Company’s" business activities, operations, or future performance. +- Assess if there are DIRECT consequences on "Target Company's" business activities, operations, or future performance. - Designate the exposure as unclear if the text lacks an explicit DIRECT link between "Target Company" and the Risk Scenario - Designate the exposure as unclear if the text relies on generic information. @@ -237,7 +236,7 @@ def get_screener_system_prompt( Entity Sector: Retail Entity Industry: Apparel Headline: "Economic Challenges Ahead Due to Tariffs on China" -Text: "Target Company’s analysts report a potential economic downturn linked to new tariffs against China." +Text: "Target Company's analysts report a potential economic downturn linked to new tariffs against China." Risk Scenario: "New Tariffs Against China" Output: @@ -265,7 +264,7 @@ def get_screener_system_prompt( Entity Sector: Finance Entity Industry: Investment Banking Headline: "Market Trends Influence Stock Performance" -Text: "Target Company’s stock is influenced by broad market trends." +Text: "Target Company's stock is influenced by broad market trends." Risk Scenario: "Increased Uncertainty and Volatility" Output: @@ -310,7 +309,7 @@ def get_screener_system_prompt( """ -def get_risk_system_prompt(main_theme: str, label_summaries: List[str]) -> str: +def get_risk_system_prompt(main_theme: str, label_summaries: list[str]) -> str: """Generate a system prompt for labeling sentences with thematic labels.""" return risk_system_prompt_template.format( main_theme=main_theme, label_summaries=label_summaries diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index de0708d..a40f8f9 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -1,8 +1,7 @@ from logging import Logger, getLogger -from typing import List, Optional from bigdata_client.document import Document -from bigdata_client.models.advanced_search_query import ListQueryComponent +from bigdata_client.models.entities import Concept from bigdata_client.models.search import DocumentType, SortBy from pandas import DataFrame from tqdm import tqdm @@ -22,17 +21,17 @@ def search_narratives( - sentences: List[str], + sentences: list[str], start_date: str, end_date: str, scope: DocumentType, - fiscal_year: Optional[int] = None, - sources: Optional[List[str]] = None, - keywords: Optional[List[str]] = None, - control_entities: Optional[List[str]] = None, + fiscal_year: int | None = None, + sources: list[str] | None = None, + keywords: list[str] | None = None, + control_entities: list[str] | None = None, frequency: str = "M", sort_by: SortBy = SortBy.RELEVANCE, - rerank_threshold: Optional[float] = None, + rerank_threshold: float | None = None, document_limit: int = 50, batch_size: int = 10, **kwargs, @@ -91,7 +90,9 @@ def search_narratives( ) # Create list of date ranges - date_ranges = create_date_ranges(start_date, end_date, frequency) + date_ranges = create_date_ranges( + start_date, end_date, frequency, return_datetime=True + ) no_queries = len(batched_query) no_dates = len(date_ranges) @@ -117,8 +118,8 @@ def search_narratives( def _process_narrative_search( - results: List[Document], - entities: List[ListQueryComponent], + results: list[Document], + entities: list[Concept], ) -> DataFrame: """ Build a dataframe for when no companies are specified. diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index f9791fc..60222ef 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -1,6 +1,7 @@ from dataclasses import dataclass +from datetime import datetime from itertools import chain, zip_longest -from typing import Dict, List, Optional, Tuple, Type +from typing import Type import pandas as pd from bigdata_client.daterange import AbsoluteDateRange @@ -23,16 +24,16 @@ @dataclass class EntitiesToSearch: - people: Optional[List[str]] = None - product: Optional[List[str]] = None - org: Optional[List[str]] = None - place: Optional[List[str]] = None - topic: Optional[List[str]] = None - concepts: Optional[List[str]] = None - companies: Optional[List[str]] = None + people: list[str] | None = None + product: list[str] | None = None + org: list[str] | None = None + place: list[str] | None = None + topic: list[str] | None = None + concepts: list[str] | None = None + companies: list[str] | None = None @staticmethod - def get_entity_type_map() -> Dict[str, Type]: + def get_entity_type_map() -> dict[str, Type]: return { "people": Person, "product": Product, @@ -44,7 +45,7 @@ def get_entity_type_map() -> Dict[str, Type]: } -def build_similarity_queries(sentences: List[str]) -> List[Similarity]: +def build_similarity_queries(sentences: list[str]) -> list[Similarity]: """ Processes a list of sentences to create a list of Similarity query objects, ensuring no duplicates. @@ -71,16 +72,16 @@ def build_similarity_queries(sentences: List[str]) -> List[Similarity]: def build_batched_query( - sentences: List[str], - keywords: Optional[List[str]], - entities: Optional[EntitiesToSearch], - control_entities: Optional[EntitiesToSearch], - sources: Optional[List[str]], + sentences: list[str], + keywords: list[str] | None, + entities: EntitiesToSearch | None, + control_entities: EntitiesToSearch | None, + sources: list[str] | None, batch_size: int, - fiscal_year: Optional[int], + fiscal_year: int | None, scope: DocumentType, - custom_batches: Optional[List[EntitiesToSearch]], -) -> List[QueryComponent]: + custom_batches: list[EntitiesToSearch] | None, +) -> list[QueryComponent]: """ Builds a list of batched query objects based on the provided parameters. @@ -146,7 +147,7 @@ def build_batched_query( def _validate_parameters( - document_scope: DocumentType = None, fiscal_year: int = None + document_scope: DocumentType | None = None, fiscal_year: int | None = None ) -> None: """ Validates parameters based on predefined rules. @@ -171,10 +172,10 @@ def _validate_parameters( def _build_base_queries( - sentences: Optional[List[str]], - keywords: Optional[List[str]], - sources: Optional[List[str]], -) -> Tuple[List[QueryComponent], Optional[QueryComponent], Optional[QueryComponent]]: + sentences: list[str] | None, + keywords: list[str] | None, + sources: list[str] | None, +) -> tuple[list[QueryComponent], QueryComponent | None, QueryComponent | None]: """Build the base queries from sentences, keywords, and sources.""" # Create similarity queries from sentences queries = build_similarity_queries(sentences) if sentences else [] @@ -189,7 +190,7 @@ def _build_base_queries( def _get_entity_ids( - entity_names: List[str], + entity_names: list[str], entity_type: Type, ) -> list[Type]: bigdata = bigdata_connection() @@ -270,11 +271,11 @@ def _build_control_entity_query( def _build_entity_batch_queries( - entities: EntitiesToSearch, - custom_batches: List[EntitiesToSearch], + entities: EntitiesToSearch | None, + custom_batches: list[EntitiesToSearch], batch_size: int, scope: DocumentType, -) -> List[Optional[QueryComponent]]: +) -> list[QueryComponent] | list[None]: """Build entity batch queries from either custom batches or auto-batched entities.""" # If no entities specified, return a single None to ensure at least one iteration @@ -299,14 +300,14 @@ def _get_entity_type(scope: DocumentType) -> type: def _build_custom_batch_queries( - custom_batches: List[EntitiesToSearch], scope: DocumentType -) -> List[QueryComponent]: + custom_batches: list[EntitiesToSearch], scope: DocumentType +) -> list[QueryComponent] | list[None]: """Build entity queries from a list of EntitiesToSearch objects.""" entity_type_map = EntitiesToSearch.get_entity_type_map() def get_entity_ids_for_attr( entity_config: EntitiesToSearch, attr_name: str, entity_class - ) -> List[int]: + ) -> list[int]: """Get entity IDs for a specific attribute.""" entity_names = getattr(entity_config, attr_name, None) if not entity_names: @@ -337,7 +338,7 @@ def _auto_batch_entities( entities: EntitiesToSearch, batch_size: int, scope: DocumentType = DocumentType.ALL, -) -> List[QueryComponent]: +) -> list[QueryComponent]: """Auto-batch entities by type using the specified batch size.""" # Create batches for each entity type @@ -374,14 +375,14 @@ def _auto_batch_entities( def _expand_queries( - base_queries_tuple: Tuple[ - List[QueryComponent], Optional[QueryComponent], Optional[QueryComponent] + base_queries_tuple: tuple[ + list[QueryComponent], QueryComponent | None, QueryComponent | None ], - entity_batch_queries: Optional[List[Optional[QueryComponent]]] = None, - control_query: Optional[QueryComponent] = None, - source_query: Optional[QueryComponent] = None, - fiscal_year: Optional[int] = None, -) -> List[QueryComponent]: + entity_batch_queries: list[QueryComponent] | None = None, + control_query: QueryComponent | None = None, + source_query: QueryComponent | None = None, + fiscal_year: int | None = None, +) -> list[QueryComponent]: """Expand all query components into the final list of queries.""" base_queries, keyword_query, source_query = base_queries_tuple queries_expanded = [] @@ -425,7 +426,7 @@ def _expand_queries( def create_date_intervals( start_date: str, end_date: str, frequency: str -) -> List[Tuple[pd.Timestamp, pd.Timestamp]]: +) -> list[tuple[pd.Timestamp, pd.Timestamp]]: """ Generates date intervals based on a specified frequency within a given start and end date range. @@ -465,8 +466,8 @@ def create_date_intervals( - For invalid frequencies, a `ValueError` is raised to indicate the issue. """ # Convert start and end dates to pandas Timestamps - start_date = pd.Timestamp(start_date) - end_date = pd.Timestamp(end_date) + start_date_pd = pd.Timestamp(start_date) + end_date_pd = pd.Timestamp(end_date) # Adjust frequency for yearly and monthly to use appropriate start markers # 'AS' for year start, 'MS' for month start @@ -475,7 +476,7 @@ def create_date_intervals( # Generate date range based on the adjusted frequency try: date_range = pd.date_range( - start=start_date, end=end_date, frequency=adjusted_freq + start=start_date_pd, end=end_date_pd, freq=adjusted_freq ) except ValueError: raise ValueError("Invalid frequency. Use 'Y', 'M', 'W', or 'D'.") @@ -496,7 +497,7 @@ def create_date_intervals( intervals.append( ( date_range[-1].replace(hour=0, minute=0, second=0), - end_date.replace(hour=23, minute=59, second=59), + end_date_pd.replace(hour=23, minute=59, second=59), ) ) @@ -504,8 +505,8 @@ def create_date_intervals( def create_date_ranges( - start_date: str, end_date: str, frequency: str -) -> List[AbsoluteDateRange]: + start_date: str, end_date: str, frequency: str, return_datetime: bool = False +) -> list[AbsoluteDateRange | tuple[datetime, datetime]]: """ Generates a list of `AbsoluteDateRange` objects based on the specified frequency. @@ -520,6 +521,8 @@ def create_date_ranges( - 'M': Monthly. - 'W': Weekly. - 'D': Daily. + return_datetime (bool): + If True, returns a list of start datetime objects instead of AbsoluteDateRange objects. Defaults to False. Returns: List[AbsoluteDateRange]: @@ -532,4 +535,9 @@ def create_date_ranges( 3. Returns a list of these `AbsoluteDateRange` objects. """ intervals = create_date_intervals(start_date, end_date, frequency=frequency) + + if return_datetime: + return [ + (start.to_pydatetime(), end.to_pydatetime()) for start, end in intervals + ] return [AbsoluteDateRange(start, end) for start, end in intervals] diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index 6caf8db..897d1c1 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -1,9 +1,7 @@ from logging import Logger, getLogger -from typing import Dict, List, Optional from bigdata_client.document import Document -from bigdata_client.models.advanced_search_query import ListQueryComponent -from bigdata_client.models.entities import Company +from bigdata_client.models.entities import Company, Concept from bigdata_client.models.search import DocumentType, SortBy from pandas import DataFrame from tqdm import tqdm @@ -26,18 +24,18 @@ def search_by_companies( - companies: List[Company], - sentences: List[str], + companies: list[Company], + sentences: list[str], start_date: str, end_date: str, scope: DocumentType = DocumentType.ALL, - fiscal_year: Optional[int] = None, - sources: Optional[List[str]] = None, - keywords: Optional[List[str]] = None, - control_entities: Optional[Dict] = None, + fiscal_year: int | None = None, + sources: list[str] | None = None, + keywords: list[str] | None = None, + control_entities: dict | None = None, frequency: str = "M", sort_by: SortBy = SortBy.RELEVANCE, - rerank_threshold: Optional[float] = None, + rerank_threshold: float | None = None, document_limit: int = 50, batch_size: int = 10, **kwargs, @@ -135,7 +133,9 @@ def search_by_companies( ) # Create list of date ranges - date_ranges = create_date_ranges(start_date, end_date, frequency) + date_ranges = create_date_ranges( + start_date, end_date, frequency, return_datetime=True + ) no_queries = len(batched_query) no_dates = len(date_ranges) @@ -189,27 +189,27 @@ def search_by_companies( def filter_company_entities( - entities: List[ListQueryComponent], -) -> List[ListQueryComponent]: + entities: list[Concept], +) -> list[Concept]: """ Filter only COMPANY entities from the list of entities. Args: - entities (List[ListQueryComponent]): A list of entities to filter. + entities (List[Concept]): A list of entities to filter. Returns: - List[ListQueryComponent]: A list of COMPANY entities. + List[Concept]: A list of COMPANY entities. """ return [ entity for entity in entities - if hasattr(entity, "entity_type") and getattr(entity, "entity_type") == "COMP" + if hasattr(entity, "entity_type") and entity.entity_type == "COMP" ] def process_screener_search_results( - results: List[Document], - entities: List[ListQueryComponent], - companies: Optional[List[Company]] = None, + results: list[Document], + entities: list[Concept], + companies: list[Company] | None = None, document_type: DocumentType = DocumentType.NEWS, ) -> DataFrame: """ @@ -217,7 +217,7 @@ def process_screener_search_results( Args: results (List[Document]): A list of Bigdata search results. - entities (List[ListQueryComponent]): A list of entities. + entities (List[Entity]): A list of entities. companies (Optional[List[Company]]): A list of companies to filter for. Only used for non-reporting entity documents. document_type (DocumentType): The type of documents being processed. diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index e90c8c4..347fe69 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -12,7 +12,8 @@ import threading import time from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Dict, List, Optional, Tuple, Union +from datetime import datetime +from typing import Union from bigdata_client import Bigdata from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange @@ -24,13 +25,13 @@ from bigdata_research_tools.client import bigdata_connection, init_bigdata_client from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace -DATE_RANGE_TYPE = Union[ - AbsoluteDateRange, +INPUT_DATE_RANGE = Union[ + tuple[datetime, datetime], RollingDateRange, - List[Union[AbsoluteDateRange, RollingDateRange]], + list[tuple[datetime, datetime] | RollingDateRange], ] -SEARCH_QUERY_RESULTS_TYPE = Dict[ - Tuple[QueryComponent, Union[AbsoluteDateRange, RollingDateRange]], List[Document] +SEARCH_QUERY_RESULTS_TYPE = dict[ + tuple[QueryComponent, Union[AbsoluteDateRange, RollingDateRange]], list[Document] ] REQUESTS_PER_MINUTE_LIMIT = 300 @@ -49,8 +50,8 @@ class SearchManager: def __init__( self, rpm: int = REQUESTS_PER_MINUTE_LIMIT, - bucket_size: int = None, - bigdata: Bigdata = None, + bucket_size: int | None = None, + bigdata: Bigdata | None = None, **kwargs, ): """ @@ -85,7 +86,7 @@ def _refill_tokens(self): self.tokens = min(self.bucket_size, self.tokens + new_tokens) self.last_refill = now - def _acquire_token(self, timeout: float = None) -> bool: + def _acquire_token(self, timeout: float | None = None) -> bool: """ Attempt to acquire a token for executing a search request. @@ -112,14 +113,14 @@ def _acquire_token(self, timeout: float = None) -> bool: def _search( self, query: QueryComponent, - date_range: Union[AbsoluteDateRange, RollingDateRange] = None, + date_range: Union[tuple[datetime, datetime], RollingDateRange], sortby: SortBy = SortBy.RELEVANCE, scope: DocumentType = DocumentType.ALL, limit: int = 10, - timeout: float = None, - rerank_threshold: float = None, + timeout: float | None = None, + rerank_threshold: float | None = None, **kwargs, - ) -> Optional[List[Document]]: + ) -> list[Document] | None: """ Execute a single search with rate limiting. @@ -147,13 +148,20 @@ def _search( logging.warning("Timed out attempting to acquire rate limit token") return None - if isinstance(date_range, tuple): - date_range = AbsoluteDateRange(*date_range) + date_filter: AbsoluteDateRange | RollingDateRange + + if date_range and isinstance(date_range, tuple): + date_filter = AbsoluteDateRange( + start=date_range[0], + end=date_range[1], + ) + else: + date_filter = date_range try: query_obj = self.bigdata.search.new( query=query, - date_range=date_range, + date_range=date_filter, sortby=sortby, scope=scope, rerank_threshold=rerank_threshold, @@ -163,19 +171,20 @@ def _search( kwargs["current_trace"].add_query_units(query_obj.get_usage()) return results except Exception as e: + raise e logging.error(f"Search error: {e}") return None def concurrent_search( self, - queries: List[QueryComponent], - date_ranges: DATE_RANGE_TYPE = None, + queries: list[QueryComponent], + date_ranges: list[tuple[datetime, datetime] | RollingDateRange], sortby: SortBy = SortBy.RELEVANCE, scope: DocumentType = DocumentType.ALL, limit: int = 10, max_workers: int = MAX_WORKERS, - timeout: float = None, - rerank_threshold: float = None, + timeout: float | None = None, + rerank_threshold: float | None = None, **kwargs, ) -> SEARCH_QUERY_RESULTS_TYPE: """ @@ -231,33 +240,29 @@ def concurrent_search( try: results[(query, date_range)] = future.result() except Exception as e: + raise e logging.error(f"Error in search {query, date_range}: {e}") return results -def normalize_date_range(date_ranges: DATE_RANGE_TYPE) -> DATE_RANGE_TYPE: +def normalize_date_range( + date_ranges: INPUT_DATE_RANGE, +) -> list[tuple[datetime, datetime] | RollingDateRange]: if not isinstance(date_ranges, list): date_ranges = [date_ranges] - # Convert mutable AbsoluteDateRange into hashable objects - for i, dr in enumerate(date_ranges): - if isinstance(dr, AbsoluteDateRange): - date_ranges[i] = ( - dr.start_dt.strftime("%Y-%m-%d %H:%M:%S"), - dr.end_dt.strftime("%Y-%m-%d %H:%M:%S"), - ) return date_ranges def run_search( - queries: List[QueryComponent], - date_ranges: DATE_RANGE_TYPE = None, + queries: list[QueryComponent], + date_ranges: INPUT_DATE_RANGE = None, sortby: SortBy = SortBy.RELEVANCE, scope: DocumentType = DocumentType.ALL, limit: int = 10, only_results: bool = True, - rerank_threshold: float = None, + rerank_threshold: float | None = None, **kwargs, ) -> Union[SEARCH_QUERY_RESULTS_TYPE, list[list[Document]]]: """ @@ -265,7 +270,7 @@ def run_search( Args: queries (list[QueryComponent]): A list of QueryComponent objects. - date_ranges (Optional[Union[AbsoluteDateRange, RollingDateRange, List[Union[AbsoluteDateRange, RollingDateRange]]]]): + date_ranges (Union[tuple[datetime, datetime], RollingDateRange, list[tuple[datetime, datetime] | RollingDateRange],]): Date range filter for the search results. sortby (SortBy): The sorting criterion for the search results. Defaults to SortBy.RELEVANCE. scope (DocumentType): The scope of the documents to include. Defaults to DocumentType.ALL. @@ -292,8 +297,8 @@ def run_search( current_trace = Trace( event_name=TraceEventNames.RUN_SEARCH, document_type=scope, - start_date=start_date, - end_date=end_date, + start_date=start_date.isoformat() if start_date else None, + end_date=end_date.isoformat() if end_date else None, rerank_threshold=rerank_threshold, llm_model=None, frequency=None, diff --git a/src/bigdata_research_tools/search/search_utils.py b/src/bigdata_research_tools/search/search_utils.py index 3854a4e..ea40bbf 100644 --- a/src/bigdata_research_tools/search/search_utils.py +++ b/src/bigdata_research_tools/search/search_utils.py @@ -3,12 +3,11 @@ from logging import Logger, getLogger from re import findall from time import sleep -from typing import List, Tuple from bigdata_client.connection import RequestMaxLimitExceeds from bigdata_client.document import Document -from bigdata_client.models.advanced_search_query import ListQueryComponent from bigdata_client.models.document import DocumentChunk +from bigdata_client.models.entities import Concept from bigdata_client.query_type import QueryType from pydantic import ValidationError @@ -17,7 +16,7 @@ logger: Logger = getLogger(__name__) -def _collect_entity_keys(results: List[Document]) -> List[str]: +def _collect_entity_keys(results: list[Document]) -> list[str]: """ Collect all entity keys from the search results. @@ -38,8 +37,8 @@ def _collect_entity_keys(results: List[Document]) -> List[str]: def _look_up_entities_binary_search( - entity_keys: List[str], max_batch_size: int = 50 -) -> List[ListQueryComponent]: + entity_keys: list[str], max_batch_size: int = 50 +) -> list[Concept]: """ Look up entities using the Bigdata Knowledge Graph in a binary search manner. @@ -54,7 +53,7 @@ def _look_up_entities_binary_search( entities = [] non_entities = [] - def depth_first_search(batch: List[str]) -> None: + def depth_first_search(batch: list[str]) -> None: """ Recursively lookup entities in a depth-first search manner. @@ -105,8 +104,8 @@ def depth_first_search(batch: List[str]) -> None: def filter_search_results( - results: List[List[Document]], -) -> Tuple[List[Document], List[ListQueryComponent]]: + results: list[list[Document]], +) -> tuple[list[Document], list[Concept]]: """ Postprocess the search results to filter only COMPANY entities. @@ -115,7 +114,7 @@ def filter_search_results( the function `bigdata_research_tools.search.run_search` with the parameter `only_results` set to True Returns: - Tuple[List[Document], List[ListQueryComponent]]: A tuple of the filtered + Tuple[List[Document], List[Concept]]: A tuple of the filtered search results and the entities. """ # Flatten the list of result lists @@ -128,9 +127,7 @@ def filter_search_results( return results, entities -def build_chunk_entities( - chunk: DocumentChunk, entities: List[ListQueryComponent] -) -> List[dict]: +def build_chunk_entities(chunk: DocumentChunk, entities: list[Concept]) -> list[dict]: entity_key_map = {entity.id: entity for entity in entities} chunk_entities = [ diff --git a/src/bigdata_research_tools/tracing.py b/src/bigdata_research_tools/tracing.py index 146b767..cf94cc9 100644 --- a/src/bigdata_research_tools/tracing.py +++ b/src/bigdata_research_tools/tracing.py @@ -22,17 +22,17 @@ class TraceEventNames(Enum): @dataclasses.dataclass class Trace: - event_name: TraceEventNames = None - document_type: str = None - start_date: str = None - end_date: str = None - rerank_threshold: float = None - llm_model: str = None - frequency: str = None - result: str = None - workflow_start_date: datetime = None - workflow_end_date: datetime = None - workflow_usage: str = None + event_name: TraceEventNames + document_type: str + workflow_start_date: datetime + start_date: str | None = None + end_date: str | None = None + rerank_threshold: float | None = None + llm_model: str | None = None + frequency: str | None = None + result: str | None = None + workflow_end_date: datetime | None = None + workflow_usage: str | None = None _query_units_queue: Queue = Queue() # To protect against concurrent access issues diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index 4e92ade..d912c53 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -1,7 +1,7 @@ import ast import json -from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from dataclasses import dataclass, field +from typing import Any import graphviz from json_repair import repair_json @@ -11,7 +11,7 @@ from bigdata_research_tools.prompts.risk import compose_risk_system_prompt_focus from bigdata_research_tools.prompts.themes import compose_themes_system_prompt -themes_default_llm_model_config: Dict[str, Any] = { +themes_default_llm_model_config: dict[str, Any] = { "provider": "openai", "model": "gpt-4o-mini", "kwargs": { @@ -40,18 +40,15 @@ class SemanticTree: summary (str): A brief explanation of the node's relevance. For the root node, this describes the overall relevance of the tree; for sub-nodes, it explains their connection to the parent node. - children (Optional[List[SemanticTree]]): A list of child nodes representing sub-units. - keywords (Optional[List[str]]): A list of keywords summarizing the current node. + children (list[SemanticTree] | None): A list of child nodes representing sub-units. + keywords (list[str] | None): A list of keywords summarizing the current node. """ label: str node: int - summary: str = None - children: List["SemanticTree"] = None - keywords: Optional[List[str]] = None - - def __post_init__(self): - self.children = self.children or [] + summary: str = "" + children: list["SemanticTree"] = field(default_factory=list) + keywords: list[str] = field(default_factory=list) def __str__(self) -> str: return self.as_string() @@ -70,7 +67,7 @@ def from_dict(tree_dict: dict) -> "SemanticTree": # Handle case sensitivity in keys tree_dict = dict_keys_to_lowercase(tree_dict) - tree = SemanticTree(**tree_dict) + tree = SemanticTree(**tree_dict) # ty: ignore[missing-argument] tree.children = [ SemanticTree.from_dict(child) for child in tree_dict.get("children", []) ] @@ -104,7 +101,7 @@ def as_string(self, prefix: str = "") -> str: s += child.as_string(prefix=child_prefix) return s - def get_label_summaries(self) -> Dict[str, str]: + def get_label_summaries(self) -> dict[str, str]: """ Extract the label summaries from the tree. @@ -116,7 +113,7 @@ def get_label_summaries(self) -> Dict[str, str]: label_summary.update(child.get_label_summaries()) return label_summary - def get_summaries(self) -> List[str]: + def get_summaries(self) -> list[str]: """ Extract the node summaries from a SemanticTree. @@ -128,7 +125,7 @@ def get_summaries(self) -> List[str]: summaries.extend(child.get_summaries()) return summaries - def get_terminal_label_summaries(self) -> Dict[str, str]: + def get_terminal_label_summaries(self) -> dict[str, str]: """ Extract the items (labels, summaries) from terminal nodes of the tree. @@ -143,7 +140,7 @@ def get_terminal_label_summaries(self) -> Dict[str, str]: label_summary.update(child.get_terminal_label_summaries()) return label_summary - def get_terminal_labels(self) -> List[str]: + def get_terminal_labels(self) -> list[str]: """ Extract the terminal labels from the tree. @@ -152,7 +149,7 @@ def get_terminal_labels(self) -> List[str]: """ return list(self.get_terminal_label_summaries().keys()) - def get_terminal_summaries(self) -> List[str]: + def get_terminal_summaries(self) -> list[str]: """ Extract summaries from terminal nodes of the tree. @@ -333,7 +330,7 @@ def save_json(self, filepath: str, **kwargs) -> None: def generate_theme_tree( main_theme: str, focus: str = "", - llm_model_config: Dict[str, Any] = None, + llm_model_config: dict[str, Any] | None = None, ) -> SemanticTree: """ Generate a `SemanticTree` class from a main theme and focus. @@ -373,7 +370,7 @@ def generate_theme_tree( return SemanticTree.from_dict(tree_dict) -def dict_keys_to_lowercase(d: Dict[str, Any]) -> Dict[str, Any]: +def dict_keys_to_lowercase(d: dict[str, Any]) -> dict[str, Any]: """ Convert all keys in a dictionary to lowercase, including nested dictionaries. @@ -392,7 +389,7 @@ def dict_keys_to_lowercase(d: Dict[str, Any]) -> Dict[str, Any]: return new_dict -def stringify_label_summaries(label_summaries: Dict[str, str]) -> List[str]: +def stringify_label_summaries(label_summaries: dict[str, str]) -> list[str]: """ Convert the label summaries of a SemanticTree into a list of strings. @@ -409,7 +406,7 @@ def stringify_label_summaries(label_summaries: Dict[str, str]) -> List[str]: def generate_risk_tree( main_theme: str, focus: str = "", - llm_model_config: Dict[str, Any] = None, + llm_model_config: dict[str, Any] | None = None, ) -> SemanticTree: """ Generate a `SemanticTree` class from a main theme and analyst focus. @@ -433,6 +430,8 @@ def generate_risk_tree( SemanticTree: The generated theme tree. """ ll_model_config = llm_model_config or themes_default_llm_model_config + if "kwargs" not in ll_model_config: + ll_model_config["kwargs"] = {} model_str = f"{ll_model_config['provider']}::{ll_model_config['model']}" llm = LLMEngine(model=model_str) diff --git a/src/bigdata_research_tools/utils.py b/src/bigdata_research_tools/utils/files.py similarity index 86% rename from src/bigdata_research_tools/utils.py rename to src/bigdata_research_tools/utils/files.py index 1015c47..540c1a3 100644 --- a/src/bigdata_research_tools/utils.py +++ b/src/bigdata_research_tools/utils/files.py @@ -1,10 +1,9 @@ import importlib.util import os from pathlib import Path -from typing import List -def check_libraries_installed(libraries: List[str]) -> bool: +def check_libraries_installed(libraries: list[str]) -> bool: """ Check if the received Python libraries are installed. """ diff --git a/src/bigdata_research_tools/visuals/risk_visuals.py b/src/bigdata_research_tools/visuals/risk_visuals.py index 7e0e8bf..e551cb9 100644 --- a/src/bigdata_research_tools/visuals/risk_visuals.py +++ b/src/bigdata_research_tools/visuals/risk_visuals.py @@ -1,9 +1,9 @@ -from typing import Any, Dict, Optional, Tuple +from typing import Any import pandas as pd import plotly.graph_objects as go -from bigdata_research_tools.settings import check_libraries_installed +from bigdata_research_tools.utils.files import check_libraries_installed from bigdata_research_tools.visuals.visuals import ExposureDashboard @@ -42,7 +42,7 @@ class RiskExposureDashboard(ExposureDashboard): }, } - def __init__(self, config: Optional[Dict[str, Any]] = None): + def __init__(self, config: dict[str, Any] | None = None): """ Initialize the dashboard with configuration parameters. @@ -59,8 +59,8 @@ def __init__(self, config: Optional[Dict[str, Any]] = None): def create_risk_exposure_dashboard( df_company: pd.DataFrame, n_companies: int = 10, - config: Optional[Dict[str, Any]] = None, -) -> Tuple[go.Figure, go.Figure]: + config: dict[str, Any] | None = None, +) -> tuple[go.Figure, go.Figure]: """ Creates a comprehensive dashboard for analyzing risk exposure of companies. diff --git a/src/bigdata_research_tools/visuals/thematic_visuals.py b/src/bigdata_research_tools/visuals/thematic_visuals.py index ee9b39c..2fa02d9 100644 --- a/src/bigdata_research_tools/visuals/thematic_visuals.py +++ b/src/bigdata_research_tools/visuals/thematic_visuals.py @@ -1,9 +1,9 @@ -from typing import Any, Dict, Optional, Tuple +from typing import Any import pandas as pd import plotly.graph_objects as go -from bigdata_research_tools.settings import check_libraries_installed +from bigdata_research_tools.utils.files import check_libraries_installed from bigdata_research_tools.visuals.visuals import ExposureDashboard @@ -42,7 +42,7 @@ class ThematicExposureDashboard(ExposureDashboard): }, } - def __init__(self, config: Optional[Dict[str, Any]] = None): + def __init__(self, config: dict[str, Any] | None = None): """ Initialize the thematic exposure dashboard with configuration parameters. @@ -60,8 +60,8 @@ def __init__(self, config: Optional[Dict[str, Any]] = None): def create_thematic_exposure_dashboard( df_company: pd.DataFrame, n_companies: int = 10, - config: Optional[Dict[str, Any]] = None, -) -> Tuple[go.Figure, go.Figure]: + config: dict[str, Any] | None = None, +) -> tuple[go.Figure, go.Figure]: """ Creates a comprehensive dashboard for analyzing thematic exposure of companies. diff --git a/src/bigdata_research_tools/visuals/visuals.py b/src/bigdata_research_tools/visuals/visuals.py index 7a4283d..22969fe 100644 --- a/src/bigdata_research_tools/visuals/visuals.py +++ b/src/bigdata_research_tools/visuals/visuals.py @@ -1,11 +1,11 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any import numpy as np import pandas as pd import plotly.graph_objects as go from plotly.subplots import make_subplots -from bigdata_research_tools.settings import check_libraries_installed +from bigdata_research_tools.utils.files import check_libraries_installed def check_plotly_dependencies() -> bool: @@ -54,7 +54,7 @@ class ExposureDashboard: "industry_margin": {"l": 60, "r": 50, "t": 80, "b": 50}, } - def __init__(self, config: Optional[Dict[str, Any]] = None): + def __init__(self, config: dict[str, Any] | None = None): """ Initialize the dashboard with configuration parameters. @@ -69,8 +69,8 @@ def create_dashboard( self, df_company: pd.DataFrame, n_companies: int = 10, - theme_columns: Optional[List[str]] = None, - ) -> Tuple[go.Figure, go.Figure]: + theme_columns: list[str] | None = None, + ) -> tuple[go.Figure, go.Figure]: """ Creates a comprehensive dashboard for analyzing thematic exposure of companies. @@ -129,7 +129,7 @@ def _validate_dataframe(self, df: pd.DataFrame) -> None: if missing_cols: raise ValueError(f"Missing required columns: {missing_cols}") - def _extract_theme_columns(self, df: pd.DataFrame) -> List[str]: + def _extract_theme_columns(self, df: pd.DataFrame) -> list[str]: """Extract theme column names from the dataframe.""" start_idx = self.config["theme_start_col"] end_idx = self.config["theme_end_col"] @@ -160,7 +160,7 @@ def _create_subplot_layout(self) -> go.Figure: ) def _add_raw_scores_heatmap( - self, fig: go.Figure, df: pd.DataFrame, theme_columns: List[str] + self, fig: go.Figure, df: pd.DataFrame, theme_columns: list[str] ) -> None: """Add a heatmap of raw thematic scores to the dashboard.""" company_col = self.config["company_column"] @@ -215,7 +215,7 @@ def _add_total_scores_barchart(self, fig: go.Figure, df: pd.DataFrame) -> None: ) def _add_top_themes_by_company_scatter( - self, fig: go.Figure, df: pd.DataFrame, theme_columns: List[str] + self, fig: go.Figure, df: pd.DataFrame, theme_columns: list[str] ) -> None: """Add a scatter plot showing the top thematic exposures for each company.""" company_col = self.config["company_column"] @@ -276,7 +276,7 @@ def _add_top_themes_by_company_scatter( ) def _add_dominant_themes_barchart( - self, fig: go.Figure, df: pd.DataFrame, theme_columns: List[str] + self, fig: go.Figure, df: pd.DataFrame, theme_columns: list[str] ) -> None: """Add a horizontal bar chart showing the most dominant themes across all companies.""" # Calculate totals for each theme across all companies @@ -308,7 +308,7 @@ def _add_dominant_themes_barchart( ) def _create_industry_analysis( - self, df: pd.DataFrame, theme_columns: List[str] + self, df: pd.DataFrame, theme_columns: list[str] ) -> go.Figure: """Create a separate heatmap showing average thematic scores by industry.""" industry_col = self.config["industry_column"] diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index e7f0e46..70c65a7 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -1,5 +1,4 @@ from logging import Logger, getLogger -from typing import Dict, List, Optional from bigdata_client.models.search import DocumentType from pandas import merge @@ -17,14 +16,14 @@ class NarrativeMiner(Workflow): def __init__( self, - narrative_sentences: List[str], + narrative_sentences: list[str], start_date: str, end_date: str, llm_model: str, document_type: DocumentType, - fiscal_year: Optional[int], - sources: Optional[List[str]] = None, - rerank_threshold: Optional[float] = None, + fiscal_year: int | None, + sources: list[str] | None = None, + rerank_threshold: float | None = None, ): """ This class will track a set of user-defined narratives (specified in narrative_sentences) over @@ -57,8 +56,8 @@ def mine_narratives( document_limit: int = 10, batch_size: int = 10, frequency: str = "3M", - export_path: Optional[str] = None, - ) -> Dict: + export_path: str | None = None, + ) -> dict: """ Mine narratives diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index aa5e642..bfb8574 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -1,5 +1,4 @@ from logging import Logger, getLogger -from typing import Dict, List, Optional, Tuple from bigdata_client.models.entities import Company from bigdata_client.models.search import DocumentType @@ -23,15 +22,15 @@ def __init__( self, llm_model: str, main_theme: str, - companies: List[Company], + companies: list[Company], start_date: str, end_date: str, document_type: DocumentType, - keywords: Optional[List[str]] = None, - control_entities: Optional[Dict[str, List[str]]] = None, - fiscal_year: Optional[int] = None, - sources: Optional[List[str]] = None, - rerank_threshold: Optional[float] = None, + keywords: list[str] | None = None, + control_entities: dict[str, list[str]] | None = None, + fiscal_year: int | None = None, + sources: list[str] | None = None, + rerank_threshold: float | None = None, focus: str = "", ): """ @@ -92,7 +91,7 @@ def create_taxonomy(self): def retrieve_results( self, - sentences: List[str], + sentences: list[str], frequency: str = "3M", document_limit: int = 10, batch_size: int = 10, @@ -132,8 +131,8 @@ def retrieve_results( return df_sentences def _add_prompt_fields( - self, df_sentences: DataFrame, additional_prompt_fields: Optional[List] = None - ) -> List[Dict]: + self, df_sentences: DataFrame, additional_prompt_fields: list[str] | None = None + ) -> list[dict]: """ Add additional fields from the DataFrame for the labeling prompt. @@ -158,7 +157,7 @@ def label_search_results( df_sentences, terminal_labels, risk_tree: SemanticTree, - additional_prompt_fields: Optional[List] = None, + additional_prompt_fields: list[str] | None = None, ): """ Label the search results with our theme labels. @@ -222,7 +221,7 @@ def label_search_results( return df, df_clean def generate_results( - self, df_labeled: DataFrame, word_range: Tuple[int, int] = (50, 100) + self, df_labeled: DataFrame, word_range: tuple[int, int] = (50, 100) ): """Generate the Pivot Tables with factor Scores for companies and industries.""" @@ -288,9 +287,9 @@ def screen_companies( document_limit: int = 10, batch_size: int = 10, frequency: str = "3M", - word_range: Tuple[int, int] = (50, 100), - export_path: str = None, - ) -> Dict: + word_range: tuple[int, int] = (50, 100), + export_path: str | None = None, + ) -> dict: """ Screen companies for the Executive Narrative Factor. diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index cb8c3e8..8f7e7a2 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -1,5 +1,4 @@ from logging import Logger, getLogger -from typing import Dict, List, Optional, Tuple from bigdata_client.models.entities import Company from bigdata_client.models.search import DocumentType @@ -23,13 +22,13 @@ def __init__( self, llm_model: str, main_theme: str, - companies: List[Company], + companies: list[Company], start_date: str, end_date: str, document_type: DocumentType, - fiscal_year: Optional[int] = None, - sources: Optional[List[str]] = None, - rerank_threshold: Optional[float] = None, + fiscal_year: int | None = None, + sources: list[str] | None = None, + rerank_threshold: float | None = None, focus: str = "", ): """ @@ -71,9 +70,9 @@ def screen_companies( document_limit: int = 10, batch_size: int = 10, frequency: str = "3M", - word_range: Tuple[int, int] = (50, 100), - export_path: str = None, - ) -> Dict: + word_range: tuple[int, int] = (50, 100), + export_path: str | None = None, + ) -> dict: """ Screen companies for the Executive Narrative Factor. diff --git a/src/bigdata_research_tools/workflows/utils.py b/src/bigdata_research_tools/workflows/utils.py index bff3829..fbb1c81 100644 --- a/src/bigdata_research_tools/workflows/utils.py +++ b/src/bigdata_research_tools/workflows/utils.py @@ -2,8 +2,6 @@ Script with any common helper functions used across the workflows. """ -from typing import List - from IPython.display import HTML, display from pandas import DataFrame @@ -36,7 +34,7 @@ def display_output_chunks_dataframe(final_df): def get_scored_df( - df: DataFrame, index_columns: List[str], pivot_column: str + df: DataFrame, index_columns: list[str], pivot_column: str ) -> DataFrame: """ Calculate a Composite Score by pivoting the received DataFrame. diff --git a/tests/test_llm/test_utils.py b/tests/test_llm/test_utils.py index ff4c10c..f6ae839 100644 --- a/tests/test_llm/test_utils.py +++ b/tests/test_llm/test_utils.py @@ -1,7 +1,8 @@ +from bigdata_research_tools.llm.base import AsyncLLMEngine from bigdata_research_tools.llm.utils import run_concurrent_prompts -class DummyAsyncLLMEngine: +class DummyAsyncLLMEngine(AsyncLLMEngine): async def get_response(self, chat_history, **kwargs): return "dummy response" diff --git a/tutorial/tutorial_notebook.ipynb b/tutorial/tutorial_notebook.ipynb index ba114a4..7e3f639 100644 --- a/tutorial/tutorial_notebook.ipynb +++ b/tutorial/tutorial_notebook.ipynb @@ -650,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "scrolled": true }, @@ -730,7 +730,9 @@ " print(f\"✅ Generated {len(queries)} search queries\")\n", "\n", " # Create date ranges\n", - " date_ranges = create_date_ranges(\"2024-10-01\", \"2024-12-31\", \"M\")\n", + " date_ranges = create_date_ranges(\n", + " \"2024-10-01\", \"2024-12-31\", \"M\", return_datetime=True\n", + " ) # Monthly\n", " print(f\"📅 Searching across {len(date_ranges)} time periods\")\n", "\n", " # Execute search\n", diff --git a/uv.lock b/uv.lock index e4b3ba6..605fffd 100644 --- a/uv.lock +++ b/uv.lock @@ -1,11 +1,10 @@ version = 1 revision = 2 -requires-python = ">=3.9, <4.0" +requires-python = ">=3.10, <4.0" resolution-markers = [ "python_full_version >= '3.12'", "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", - "python_full_version < '3.10'", + "python_full_version < '3.11'", ] [[package]] @@ -101,23 +100,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/6a/ea199e61b67f25ba688d3ce93f63b49b0a4e3b3d380f03971b4646412fc6/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad702e57dc385cae679c39d318def49aef754455f237499d5b99bea4ef582e51", size = 1710050, upload-time = "2025-07-29T05:51:48.203Z" }, { url = "https://files.pythonhosted.org/packages/b4/2e/ffeb7f6256b33635c29dbed29a22a723ff2dd7401fff42ea60cf2060abfb/aiohttp-3.12.15-cp313-cp313-win32.whl", hash = "sha256:f813c3e9032331024de2eb2e32a88d86afb69291fbc37a3a3ae81cc9917fb3d0", size = 422647, upload-time = "2025-07-29T05:51:50.718Z" }, { url = "https://files.pythonhosted.org/packages/1b/8e/78ee35774201f38d5e1ba079c9958f7629b1fd079459aea9467441dbfbf5/aiohttp-3.12.15-cp313-cp313-win_amd64.whl", hash = "sha256:1a649001580bdb37c6fdb1bebbd7e3bc688e8ec2b5c6f52edbb664662b17dc84", size = 449067, upload-time = "2025-07-29T05:51:52.549Z" }, - { url = "https://files.pythonhosted.org/packages/18/8d/da08099af8db234d1cd43163e6ffc8e9313d0e988cee1901610f2fa5c764/aiohttp-3.12.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:691d203c2bdf4f4637792efbbcdcd157ae11e55eaeb5e9c360c1206fb03d4d98", size = 706829, upload-time = "2025-07-29T05:51:54.434Z" }, - { url = "https://files.pythonhosted.org/packages/4e/94/8eed385cfb60cf4fdb5b8a165f6148f3bebeb365f08663d83c35a5f273ef/aiohttp-3.12.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8e995e1abc4ed2a454c731385bf4082be06f875822adc4c6d9eaadf96e20d406", size = 481806, upload-time = "2025-07-29T05:51:56.355Z" }, - { url = "https://files.pythonhosted.org/packages/38/68/b13e1a34584fbf263151b3a72a084e89f2102afe38df1dce5a05a15b83e9/aiohttp-3.12.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bd44d5936ab3193c617bfd6c9a7d8d1085a8dc8c3f44d5f1dcf554d17d04cf7d", size = 469205, upload-time = "2025-07-29T05:51:58.277Z" }, - { url = "https://files.pythonhosted.org/packages/38/14/3d7348bf53aa4af54416bc64cbef3a2ac5e8b9bfa97cc45f1cf9a94d9c8d/aiohttp-3.12.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46749be6e89cd78d6068cdf7da51dbcfa4321147ab8e4116ee6678d9a056a0cf", size = 1644174, upload-time = "2025-07-29T05:52:00.23Z" }, - { url = "https://files.pythonhosted.org/packages/ba/ed/fd9b5b22b0f6ca1a85c33bb4868cbcc6ae5eae070a0f4c9c5cad003c89d7/aiohttp-3.12.15-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0c643f4d75adea39e92c0f01b3fb83d57abdec8c9279b3078b68a3a52b3933b6", size = 1618672, upload-time = "2025-07-29T05:52:02.272Z" }, - { url = "https://files.pythonhosted.org/packages/39/f7/f6530ab5f8c8c409e44a63fcad35e839c87aabecdfe5b8e96d671ed12f64/aiohttp-3.12.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a23918fedc05806966a2438489dcffccbdf83e921a1170773b6178d04ade142", size = 1692295, upload-time = "2025-07-29T05:52:04.546Z" }, - { url = "https://files.pythonhosted.org/packages/cb/dc/3cf483bb0106566dc97ebaa2bb097f5e44d4bc4ab650a6f107151cd7b193/aiohttp-3.12.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74bdd8c864b36c3673741023343565d95bfbd778ffe1eb4d412c135a28a8dc89", size = 1731609, upload-time = "2025-07-29T05:52:06.552Z" }, - { url = "https://files.pythonhosted.org/packages/de/a4/fd04bf807851197077d9cac9381d58f86d91c95c06cbaf9d3a776ac4467a/aiohttp-3.12.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a146708808c9b7a988a4af3821379e379e0f0e5e466ca31a73dbdd0325b0263", size = 1637852, upload-time = "2025-07-29T05:52:08.975Z" }, - { url = "https://files.pythonhosted.org/packages/98/03/29d626ca3bcdcafbd74b45d77ca42645a5c94d396f2ee3446880ad2405fb/aiohttp-3.12.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7011a70b56facde58d6d26da4fec3280cc8e2a78c714c96b7a01a87930a9530", size = 1572852, upload-time = "2025-07-29T05:52:11.508Z" }, - { url = "https://files.pythonhosted.org/packages/5f/cd/b4777a9e204f4e01091091027e5d1e2fa86decd0fee5067bc168e4fa1e76/aiohttp-3.12.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:3bdd6e17e16e1dbd3db74d7f989e8af29c4d2e025f9828e6ef45fbdee158ec75", size = 1620813, upload-time = "2025-07-29T05:52:13.891Z" }, - { url = "https://files.pythonhosted.org/packages/ae/26/1a44a6e8417e84057beaf8c462529b9e05d4b53b8605784f1eb571f0ff68/aiohttp-3.12.15-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:57d16590a351dfc914670bd72530fd78344b885a00b250e992faea565b7fdc05", size = 1630951, upload-time = "2025-07-29T05:52:15.955Z" }, - { url = "https://files.pythonhosted.org/packages/dd/7f/10c605dbd01c40e2b27df7ef9004bec75d156f0705141e11047ecdfe264d/aiohttp-3.12.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bc9a0f6569ff990e0bbd75506c8d8fe7214c8f6579cca32f0546e54372a3bb54", size = 1607595, upload-time = "2025-07-29T05:52:18.089Z" }, - { url = "https://files.pythonhosted.org/packages/66/f6/2560dcb01731c1d7df1d34b64de95bc4b3ed02bb78830fd82299c1eb314e/aiohttp-3.12.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:536ad7234747a37e50e7b6794ea868833d5220b49c92806ae2d7e8a9d6b5de02", size = 1695194, upload-time = "2025-07-29T05:52:20.255Z" }, - { url = "https://files.pythonhosted.org/packages/e7/02/ee105ae82dc2b981039fd25b0cf6eaa52b493731960f9bc861375a72b463/aiohttp-3.12.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f0adb4177fa748072546fb650d9bd7398caaf0e15b370ed3317280b13f4083b0", size = 1710872, upload-time = "2025-07-29T05:52:22.769Z" }, - { url = "https://files.pythonhosted.org/packages/88/16/70c4e42ed6a04f78fb58d1a46500a6ce560741d13afde2a5f33840746a5f/aiohttp-3.12.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:14954a2988feae3987f1eb49c706bff39947605f4b6fa4027c1d75743723eb09", size = 1640539, upload-time = "2025-07-29T05:52:25.733Z" }, - { url = "https://files.pythonhosted.org/packages/fe/1d/a7eb5fa8a6967117c5c0ad5ab4b1dec0d21e178c89aa08bc442a0b836392/aiohttp-3.12.15-cp39-cp39-win32.whl", hash = "sha256:b784d6ed757f27574dca1c336f968f4e81130b27595e458e69457e6878251f5d", size = 430164, upload-time = "2025-07-29T05:52:27.905Z" }, - { url = "https://files.pythonhosted.org/packages/14/25/e0cf8793aedc41c6d7f2aad646a27e27bdacafe3b402bb373d7651c94d73/aiohttp-3.12.15-cp39-cp39-win_amd64.whl", hash = "sha256:86ceded4e78a992f835209e236617bffae649371c4a50d5e5a3987f237db84b8", size = 453370, upload-time = "2025-07-29T05:52:29.936Z" }, ] [[package]] @@ -248,10 +230,8 @@ source = { editable = "." } dependencies = [ { name = "bigdata-client" }, { name = "graphviz" }, - { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "ipython", version = "8.37.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "json-repair", version = "0.44.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "json-repair", version = "0.50.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "ipython" }, + { name = "json-repair" }, { name = "openpyxl" }, { name = "pandas" }, { name = "pillow" }, @@ -328,8 +308,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, - { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "urllib3", version = "2.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "urllib3" }, ] sdist = { url = "https://files.pythonhosted.org/packages/14/bd/e5fbf58d3624569eedce5b3cc9a842cca62ffa6d0248af556aa80e8d949a/botocore-1.40.22.tar.gz", hash = "sha256:eb800ece2cd67777ebb09a67a0d1628db3aea4f2ccbf1d8bf7dbf8504d1f3b71", size = 14322982, upload-time = "2025-09-02T19:19:53.793Z" } wheels = [ @@ -400,18 +379,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469, upload-time = "2024-09-04T20:44:41.616Z" }, { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475, upload-time = "2024-09-04T20:44:43.733Z" }, { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" }, - { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220, upload-time = "2024-09-04T20:45:01.577Z" }, - { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605, upload-time = "2024-09-04T20:45:03.837Z" }, - { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910, upload-time = "2024-09-04T20:45:05.315Z" }, - { url = "https://files.pythonhosted.org/packages/42/7a/9d086fab7c66bd7c4d0f27c57a1b6b068ced810afc498cc8c49e0088661c/cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", size = 447200, upload-time = "2024-09-04T20:45:06.903Z" }, - { url = "https://files.pythonhosted.org/packages/da/63/1785ced118ce92a993b0ec9e0d0ac8dc3e5dbfbcaa81135be56c69cabbb6/cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", size = 454565, upload-time = "2024-09-04T20:45:08.975Z" }, - { url = "https://files.pythonhosted.org/packages/74/06/90b8a44abf3556599cdec107f7290277ae8901a58f75e6fe8f970cd72418/cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", size = 435635, upload-time = "2024-09-04T20:45:10.64Z" }, - { url = "https://files.pythonhosted.org/packages/bd/62/a1f468e5708a70b1d86ead5bab5520861d9c7eacce4a885ded9faa7729c3/cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", size = 445218, upload-time = "2024-09-04T20:45:12.366Z" }, - { url = "https://files.pythonhosted.org/packages/5b/95/b34462f3ccb09c2594aa782d90a90b045de4ff1f70148ee79c69d37a0a5a/cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", size = 460486, upload-time = "2024-09-04T20:45:13.935Z" }, - { url = "https://files.pythonhosted.org/packages/fc/fc/a1e4bebd8d680febd29cf6c8a40067182b64f00c7d105f8f26b5bc54317b/cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", size = 437911, upload-time = "2024-09-04T20:45:15.696Z" }, - { url = "https://files.pythonhosted.org/packages/e6/c3/21cab7a6154b6a5ea330ae80de386e7665254835b9e98ecc1340b3a7de9a/cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", size = 460632, upload-time = "2024-09-04T20:45:17.284Z" }, - { url = "https://files.pythonhosted.org/packages/cb/b5/fd9f8b5a84010ca169ee49f4e4ad6f8c05f4e3545b72ee041dbbcb159882/cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", size = 171820, upload-time = "2024-09-04T20:45:18.762Z" }, - { url = "https://files.pythonhosted.org/packages/8c/52/b08750ce0bce45c143e1b5d7357ee8c55341b52bdef4b0f081af1eb248c2/cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", size = 181290, upload-time = "2024-09-04T20:45:20.226Z" }, ] [[package]] @@ -484,17 +451,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" }, { url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" }, { url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" }, - { url = "https://files.pythonhosted.org/packages/c2/ca/9a0983dd5c8e9733565cf3db4df2b0a2e9a82659fd8aa2a868ac6e4a991f/charset_normalizer-3.4.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:70bfc5f2c318afece2f5838ea5e4c3febada0be750fcf4775641052bbba14d05", size = 207520, upload-time = "2025-08-09T07:57:11.026Z" }, - { url = "https://files.pythonhosted.org/packages/39/c6/99271dc37243a4f925b09090493fb96c9333d7992c6187f5cfe5312008d2/charset_normalizer-3.4.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23b6b24d74478dc833444cbd927c338349d6ae852ba53a0d02a2de1fce45b96e", size = 147307, upload-time = "2025-08-09T07:57:12.4Z" }, - { url = "https://files.pythonhosted.org/packages/e4/69/132eab043356bba06eb333cc2cc60c6340857d0a2e4ca6dc2b51312886b3/charset_normalizer-3.4.3-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:34a7f768e3f985abdb42841e20e17b330ad3aaf4bb7e7aeeb73db2e70f077b99", size = 160448, upload-time = "2025-08-09T07:57:13.712Z" }, - { url = "https://files.pythonhosted.org/packages/04/9a/914d294daa4809c57667b77470533e65def9c0be1ef8b4c1183a99170e9d/charset_normalizer-3.4.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:fb731e5deb0c7ef82d698b0f4c5bb724633ee2a489401594c5c88b02e6cb15f7", size = 157758, upload-time = "2025-08-09T07:57:14.979Z" }, - { url = "https://files.pythonhosted.org/packages/b0/a8/6f5bcf1bcf63cb45625f7c5cadca026121ff8a6c8a3256d8d8cd59302663/charset_normalizer-3.4.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:257f26fed7d7ff59921b78244f3cd93ed2af1800ff048c33f624c87475819dd7", size = 152487, upload-time = "2025-08-09T07:57:16.332Z" }, - { url = "https://files.pythonhosted.org/packages/c4/72/d3d0e9592f4e504f9dea08b8db270821c909558c353dc3b457ed2509f2fb/charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1ef99f0456d3d46a50945c98de1774da86f8e992ab5c77865ea8b8195341fc19", size = 150054, upload-time = "2025-08-09T07:57:17.576Z" }, - { url = "https://files.pythonhosted.org/packages/20/30/5f64fe3981677fe63fa987b80e6c01042eb5ff653ff7cec1b7bd9268e54e/charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:2c322db9c8c89009a990ef07c3bcc9f011a3269bc06782f916cd3d9eed7c9312", size = 161703, upload-time = "2025-08-09T07:57:20.012Z" }, - { url = "https://files.pythonhosted.org/packages/e1/ef/dd08b2cac9284fd59e70f7d97382c33a3d0a926e45b15fc21b3308324ffd/charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:511729f456829ef86ac41ca78c63a5cb55240ed23b4b737faca0eb1abb1c41bc", size = 159096, upload-time = "2025-08-09T07:57:21.329Z" }, - { url = "https://files.pythonhosted.org/packages/45/8c/dcef87cfc2b3f002a6478f38906f9040302c68aebe21468090e39cde1445/charset_normalizer-3.4.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:88ab34806dea0671532d3f82d82b85e8fc23d7b2dd12fa837978dad9bb392a34", size = 153852, upload-time = "2025-08-09T07:57:22.608Z" }, - { url = "https://files.pythonhosted.org/packages/63/86/9cbd533bd37883d467fcd1bd491b3547a3532d0fbb46de2b99feeebf185e/charset_normalizer-3.4.3-cp39-cp39-win32.whl", hash = "sha256:16a8770207946ac75703458e2c743631c79c59c5890c80011d536248f8eaa432", size = 99840, upload-time = "2025-08-09T07:57:23.883Z" }, - { url = "https://files.pythonhosted.org/packages/ce/d6/7e805c8e5c46ff9729c49950acc4ee0aeb55efb8b3a56687658ad10c3216/charset_normalizer-3.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:d22dbedd33326a4a5190dd4fe9e9e693ef12160c77382d9e87919bce54f3d4ca", size = 107438, upload-time = "2025-08-09T07:57:25.287Z" }, { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" }, ] @@ -603,18 +559,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/dc/101f3fa3a45146db0cb03f5b4376e24c0aac818309da23e2de0c75295a91/coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235", size = 221784, upload-time = "2025-09-21T20:03:24.769Z" }, { url = "https://files.pythonhosted.org/packages/4c/a1/74c51803fc70a8a40d7346660379e144be772bab4ac7bb6e6b905152345c/coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d", size = 222905, upload-time = "2025-09-21T20:03:26.93Z" }, { url = "https://files.pythonhosted.org/packages/12/65/f116a6d2127df30bcafbceef0302d8a64ba87488bf6f73a6d8eebf060873/coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a", size = 220922, upload-time = "2025-09-21T20:03:28.672Z" }, - { url = "https://files.pythonhosted.org/packages/a3/ad/d1c25053764b4c42eb294aae92ab617d2e4f803397f9c7c8295caa77a260/coverage-7.10.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fff7b9c3f19957020cac546c70025331113d2e61537f6e2441bc7657913de7d3", size = 217978, upload-time = "2025-09-21T20:03:30.362Z" }, - { url = "https://files.pythonhosted.org/packages/52/2f/b9f9daa39b80ece0b9548bbb723381e29bc664822d9a12c2135f8922c22b/coverage-7.10.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:bc91b314cef27742da486d6839b677b3f2793dfe52b51bbbb7cf736d5c29281c", size = 218370, upload-time = "2025-09-21T20:03:32.147Z" }, - { url = "https://files.pythonhosted.org/packages/dd/6e/30d006c3b469e58449650642383dddf1c8fb63d44fdf92994bfd46570695/coverage-7.10.7-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:567f5c155eda8df1d3d439d40a45a6a5f029b429b06648235f1e7e51b522b396", size = 244802, upload-time = "2025-09-21T20:03:33.919Z" }, - { url = "https://files.pythonhosted.org/packages/b0/49/8a070782ce7e6b94ff6a0b6d7c65ba6bc3091d92a92cef4cd4eb0767965c/coverage-7.10.7-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2af88deffcc8a4d5974cf2d502251bc3b2db8461f0b66d80a449c33757aa9f40", size = 246625, upload-time = "2025-09-21T20:03:36.09Z" }, - { url = "https://files.pythonhosted.org/packages/6a/92/1c1c5a9e8677ce56d42b97bdaca337b2d4d9ebe703d8c174ede52dbabd5f/coverage-7.10.7-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7315339eae3b24c2d2fa1ed7d7a38654cba34a13ef19fbcb9425da46d3dc594", size = 248399, upload-time = "2025-09-21T20:03:38.342Z" }, - { url = "https://files.pythonhosted.org/packages/c0/54/b140edee7257e815de7426d5d9846b58505dffc29795fff2dfb7f8a1c5a0/coverage-7.10.7-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:912e6ebc7a6e4adfdbb1aec371ad04c68854cd3bf3608b3514e7ff9062931d8a", size = 245142, upload-time = "2025-09-21T20:03:40.591Z" }, - { url = "https://files.pythonhosted.org/packages/e4/9e/6d6b8295940b118e8b7083b29226c71f6154f7ff41e9ca431f03de2eac0d/coverage-7.10.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f49a05acd3dfe1ce9715b657e28d138578bc40126760efb962322c56e9ca344b", size = 246284, upload-time = "2025-09-21T20:03:42.355Z" }, - { url = "https://files.pythonhosted.org/packages/db/e5/5e957ca747d43dbe4d9714358375c7546cb3cb533007b6813fc20fce37ad/coverage-7.10.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cce2109b6219f22ece99db7644b9622f54a4e915dad65660ec435e89a3ea7cc3", size = 244353, upload-time = "2025-09-21T20:03:44.218Z" }, - { url = "https://files.pythonhosted.org/packages/9a/45/540fc5cc92536a1b783b7ef99450bd55a4b3af234aae35a18a339973ce30/coverage-7.10.7-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:f3c887f96407cea3916294046fc7dab611c2552beadbed4ea901cbc6a40cc7a0", size = 244430, upload-time = "2025-09-21T20:03:46.065Z" }, - { url = "https://files.pythonhosted.org/packages/75/0b/8287b2e5b38c8fe15d7e3398849bb58d382aedc0864ea0fa1820e8630491/coverage-7.10.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:635adb9a4507c9fd2ed65f39693fa31c9a3ee3a8e6dc64df033e8fdf52a7003f", size = 245311, upload-time = "2025-09-21T20:03:48.19Z" }, - { url = "https://files.pythonhosted.org/packages/0c/1d/29724999984740f0c86d03e6420b942439bf5bd7f54d4382cae386a9d1e9/coverage-7.10.7-cp39-cp39-win32.whl", hash = "sha256:5a02d5a850e2979b0a014c412573953995174743a3f7fa4ea5a6e9a3c5617431", size = 220500, upload-time = "2025-09-21T20:03:50.024Z" }, - { url = "https://files.pythonhosted.org/packages/43/11/4b1e6b129943f905ca54c339f343877b55b365ae2558806c1be4f7476ed5/coverage-7.10.7-cp39-cp39-win_amd64.whl", hash = "sha256:c134869d5ffe34547d14e174c866fd8fe2254918cc0a95e99052903bc1543e07", size = 221408, upload-time = "2025-09-21T20:03:51.803Z" }, { url = "https://files.pythonhosted.org/packages/ec/16/114df1c291c22cac3b0c127a73e0af5c12ed7bbb6558d310429a0ae24023/coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260", size = 209952, upload-time = "2025-09-21T20:03:53.918Z" }, ] @@ -711,7 +655,7 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } wheels = [ @@ -827,23 +771,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/37/5f9f3c3fd7f7746082ec67bcdc204db72dad081f4f83a503d33220a92973/frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1a85e345b4c43db8b842cab1feb41be5cc0b10a1830e6295b69d7310f99becaf", size = 282620, upload-time = "2025-06-09T23:02:00.493Z" }, { url = "https://files.pythonhosted.org/packages/0b/31/8fbc5af2d183bff20f21aa743b4088eac4445d2bb1cdece449ae80e4e2d1/frozenlist-1.7.0-cp313-cp313t-win32.whl", hash = "sha256:3a14027124ddb70dfcee5148979998066897e79f89f64b13328595c4bdf77c81", size = 43059, upload-time = "2025-06-09T23:02:02.072Z" }, { url = "https://files.pythonhosted.org/packages/bb/ed/41956f52105b8dbc26e457c5705340c67c8cc2b79f394b79bffc09d0e938/frozenlist-1.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3bf8010d71d4507775f658e9823210b7427be36625b387221642725b515dcf3e", size = 47516, upload-time = "2025-06-09T23:02:03.779Z" }, - { url = "https://files.pythonhosted.org/packages/dd/b1/ee59496f51cd244039330015d60f13ce5a54a0f2bd8d79e4a4a375ab7469/frozenlist-1.7.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cea3dbd15aea1341ea2de490574a4a37ca080b2ae24e4b4f4b51b9057b4c3630", size = 82434, upload-time = "2025-06-09T23:02:05.195Z" }, - { url = "https://files.pythonhosted.org/packages/75/e1/d518391ce36a6279b3fa5bc14327dde80bcb646bb50d059c6ca0756b8d05/frozenlist-1.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7d536ee086b23fecc36c2073c371572374ff50ef4db515e4e503925361c24f71", size = 48232, upload-time = "2025-06-09T23:02:07.728Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8d/a0d04f28b6e821a9685c22e67b5fb798a5a7b68752f104bfbc2dccf080c4/frozenlist-1.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dfcebf56f703cb2e346315431699f00db126d158455e513bd14089d992101e44", size = 47186, upload-time = "2025-06-09T23:02:09.243Z" }, - { url = "https://files.pythonhosted.org/packages/93/3a/a5334c0535c8b7c78eeabda1579179e44fe3d644e07118e59a2276dedaf1/frozenlist-1.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:974c5336e61d6e7eb1ea5b929cb645e882aadab0095c5a6974a111e6479f8878", size = 226617, upload-time = "2025-06-09T23:02:10.949Z" }, - { url = "https://files.pythonhosted.org/packages/0a/67/8258d971f519dc3f278c55069a775096cda6610a267b53f6248152b72b2f/frozenlist-1.7.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c70db4a0ab5ab20878432c40563573229a7ed9241506181bba12f6b7d0dc41cb", size = 224179, upload-time = "2025-06-09T23:02:12.603Z" }, - { url = "https://files.pythonhosted.org/packages/fc/89/8225905bf889b97c6d935dd3aeb45668461e59d415cb019619383a8a7c3b/frozenlist-1.7.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1137b78384eebaf70560a36b7b229f752fb64d463d38d1304939984d5cb887b6", size = 235783, upload-time = "2025-06-09T23:02:14.678Z" }, - { url = "https://files.pythonhosted.org/packages/54/6e/ef52375aa93d4bc510d061df06205fa6dcfd94cd631dd22956b09128f0d4/frozenlist-1.7.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e793a9f01b3e8b5c0bc646fb59140ce0efcc580d22a3468d70766091beb81b35", size = 229210, upload-time = "2025-06-09T23:02:16.313Z" }, - { url = "https://files.pythonhosted.org/packages/ee/55/62c87d1a6547bfbcd645df10432c129100c5bd0fd92a384de6e3378b07c1/frozenlist-1.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74739ba8e4e38221d2c5c03d90a7e542cb8ad681915f4ca8f68d04f810ee0a87", size = 215994, upload-time = "2025-06-09T23:02:17.9Z" }, - { url = "https://files.pythonhosted.org/packages/45/d2/263fea1f658b8ad648c7d94d18a87bca7e8c67bd6a1bbf5445b1bd5b158c/frozenlist-1.7.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e63344c4e929b1a01e29bc184bbb5fd82954869033765bfe8d65d09e336a677", size = 225122, upload-time = "2025-06-09T23:02:19.479Z" }, - { url = "https://files.pythonhosted.org/packages/7b/22/7145e35d12fb368d92124f679bea87309495e2e9ddf14c6533990cb69218/frozenlist-1.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2ea2a7369eb76de2217a842f22087913cdf75f63cf1307b9024ab82dfb525938", size = 224019, upload-time = "2025-06-09T23:02:20.969Z" }, - { url = "https://files.pythonhosted.org/packages/44/1e/7dae8c54301beb87bcafc6144b9a103bfd2c8f38078c7902984c9a0c4e5b/frozenlist-1.7.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:836b42f472a0e006e02499cef9352ce8097f33df43baaba3e0a28a964c26c7d2", size = 239925, upload-time = "2025-06-09T23:02:22.466Z" }, - { url = "https://files.pythonhosted.org/packages/4b/1e/99c93e54aa382e949a98976a73b9b20c3aae6d9d893f31bbe4991f64e3a8/frozenlist-1.7.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e22b9a99741294b2571667c07d9f8cceec07cb92aae5ccda39ea1b6052ed4319", size = 220881, upload-time = "2025-06-09T23:02:24.521Z" }, - { url = "https://files.pythonhosted.org/packages/5e/9c/ca5105fa7fb5abdfa8837581be790447ae051da75d32f25c8f81082ffc45/frozenlist-1.7.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:9a19e85cc503d958abe5218953df722748d87172f71b73cf3c9257a91b999890", size = 234046, upload-time = "2025-06-09T23:02:26.206Z" }, - { url = "https://files.pythonhosted.org/packages/8d/4d/e99014756093b4ddbb67fb8f0df11fe7a415760d69ace98e2ac6d5d43402/frozenlist-1.7.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f22dac33bb3ee8fe3e013aa7b91dc12f60d61d05b7fe32191ffa84c3aafe77bd", size = 235756, upload-time = "2025-06-09T23:02:27.79Z" }, - { url = "https://files.pythonhosted.org/packages/8b/72/a19a40bcdaa28a51add2aaa3a1a294ec357f36f27bd836a012e070c5e8a5/frozenlist-1.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9ccec739a99e4ccf664ea0775149f2749b8a6418eb5b8384b4dc0a7d15d304cb", size = 222894, upload-time = "2025-06-09T23:02:29.848Z" }, - { url = "https://files.pythonhosted.org/packages/08/49/0042469993e023a758af81db68c76907cd29e847d772334d4d201cbe9a42/frozenlist-1.7.0-cp39-cp39-win32.whl", hash = "sha256:b3950f11058310008a87757f3eee16a8e1ca97979833239439586857bc25482e", size = 39848, upload-time = "2025-06-09T23:02:31.413Z" }, - { url = "https://files.pythonhosted.org/packages/5a/45/827d86ee475c877f5f766fbc23fb6acb6fada9e52f1c9720e2ba3eae32da/frozenlist-1.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:43a82fce6769c70f2f5a06248b614a7d268080a9d20f7457ef10ecee5af82b63", size = 44102, upload-time = "2025-06-09T23:02:32.808Z" }, { url = "https://files.pythonhosted.org/packages/ee/45/b82e3c16be2182bff01179db177fe144d58b5dc787a7d4492c6ed8b9317f/frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e", size = 13106, upload-time = "2025-06-09T23:02:34.204Z" }, ] @@ -920,52 +847,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] -[[package]] -name = "ipython" -version = "8.18.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -dependencies = [ - { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version < '3.10'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.10'" }, - { name = "jedi", marker = "python_full_version < '3.10'" }, - { name = "matplotlib-inline", marker = "python_full_version < '3.10'" }, - { name = "pexpect", marker = "python_full_version < '3.10' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version < '3.10'" }, - { name = "pygments", marker = "python_full_version < '3.10'" }, - { name = "stack-data", marker = "python_full_version < '3.10'" }, - { name = "traitlets", marker = "python_full_version < '3.10'" }, - { name = "typing-extensions", marker = "python_full_version < '3.10'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b1/b9/3ba6c45a6df813c09a48bac313c22ff83efa26cbb55011218d925a46e2ad/ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", size = 5486330, upload-time = "2023-11-27T09:58:34.596Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/6b/d9fdcdef2eb6a23f391251fde8781c38d42acd82abe84d054cb74f7863b0/ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397", size = 808161, upload-time = "2023-11-27T09:58:30.538Z" }, -] - [[package]] name = "ipython" version = "8.37.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] dependencies = [ - { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version >= '3.10'" }, - { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" }, - { name = "jedi", marker = "python_full_version >= '3.10'" }, - { name = "matplotlib-inline", marker = "python_full_version >= '3.10'" }, - { name = "pexpect", marker = "python_full_version >= '3.10' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version >= '3.10'" }, - { name = "pygments", marker = "python_full_version >= '3.10'" }, - { name = "stack-data", marker = "python_full_version >= '3.10'" }, - { name = "traitlets", marker = "python_full_version >= '3.10'" }, - { name = "typing-extensions", marker = "python_full_version >= '3.10' and python_full_version < '3.12'" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "decorator" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "jedi" }, + { name = "matplotlib-inline" }, + { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "stack-data" }, + { name = "traitlets" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/85/31/10ac88f3357fc276dc8a64e8880c82e80e7459326ae1d0a211b40abf6665/ipython-8.37.0.tar.gz", hash = "sha256:ca815841e1a41a1e6b73a0b08f3038af9b2252564d01fc405356d34033012216", size = 5606088, upload-time = "2025-05-31T16:39:09.613Z" } wheels = [ @@ -1054,18 +951,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/b3/2bd02071c5a2430d0b70403a34411fc519c2f227da7b03da9ba6a956f931/jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357", size = 210127, upload-time = "2025-05-18T19:04:38.837Z" }, { url = "https://files.pythonhosted.org/packages/03/0c/5fe86614ea050c3ecd728ab4035534387cd41e7c1855ef6c031f1ca93e3f/jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00", size = 318527, upload-time = "2025-05-18T19:04:40.612Z" }, { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213, upload-time = "2025-05-18T19:04:41.894Z" }, - { url = "https://files.pythonhosted.org/packages/98/fd/aced428e2bd3c6c1132f67c5a708f9e7fd161d0ca8f8c5862b17b93cdf0a/jiter-0.10.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bd6292a43c0fc09ce7c154ec0fa646a536b877d1e8f2f96c19707f65355b5a4d", size = 317665, upload-time = "2025-05-18T19:04:43.417Z" }, - { url = "https://files.pythonhosted.org/packages/b6/2e/47d42f15d53ed382aef8212a737101ae2720e3697a954f9b95af06d34e89/jiter-0.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:39de429dcaeb6808d75ffe9effefe96a4903c6a4b376b2f6d08d77c1aaee2f18", size = 312152, upload-time = "2025-05-18T19:04:44.797Z" }, - { url = "https://files.pythonhosted.org/packages/7b/02/aae834228ef4834fc18718724017995ace8da5f70aa1ec225b9bc2b2d7aa/jiter-0.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ce124f13a7a616fad3bb723f2bfb537d78239d1f7f219566dc52b6f2a9e48d", size = 346708, upload-time = "2025-05-18T19:04:46.127Z" }, - { url = "https://files.pythonhosted.org/packages/35/d4/6ff39dee2d0a9abd69d8a3832ce48a3aa644eed75e8515b5ff86c526ca9a/jiter-0.10.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:166f3606f11920f9a1746b2eea84fa2c0a5d50fd313c38bdea4edc072000b0af", size = 371360, upload-time = "2025-05-18T19:04:47.448Z" }, - { url = "https://files.pythonhosted.org/packages/a9/67/c749d962b4eb62445867ae4e64a543cbb5d63cc7d78ada274ac515500a7f/jiter-0.10.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:28dcecbb4ba402916034fc14eba7709f250c4d24b0c43fc94d187ee0580af181", size = 492105, upload-time = "2025-05-18T19:04:48.792Z" }, - { url = "https://files.pythonhosted.org/packages/f6/d3/8fe1b1bae5161f27b1891c256668f598fa4c30c0a7dacd668046a6215fca/jiter-0.10.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86c5aa6910f9bebcc7bc4f8bc461aff68504388b43bfe5e5c0bd21efa33b52f4", size = 389577, upload-time = "2025-05-18T19:04:50.13Z" }, - { url = "https://files.pythonhosted.org/packages/ef/28/ecb19d789b4777898a4252bfaac35e3f8caf16c93becd58dcbaac0dc24ad/jiter-0.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ceeb52d242b315d7f1f74b441b6a167f78cea801ad7c11c36da77ff2d42e8a28", size = 353849, upload-time = "2025-05-18T19:04:51.443Z" }, - { url = "https://files.pythonhosted.org/packages/77/69/261f798f84790da6482ebd8c87ec976192b8c846e79444d0a2e0d33ebed8/jiter-0.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ff76d8887c8c8ee1e772274fcf8cc1071c2c58590d13e33bd12d02dc9a560397", size = 392029, upload-time = "2025-05-18T19:04:52.792Z" }, - { url = "https://files.pythonhosted.org/packages/cb/08/b8d15140d4d91f16faa2f5d416c1a71ab1bbe2b66c57197b692d04c0335f/jiter-0.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a9be4d0fa2b79f7222a88aa488bd89e2ae0a0a5b189462a12def6ece2faa45f1", size = 524386, upload-time = "2025-05-18T19:04:54.203Z" }, - { url = "https://files.pythonhosted.org/packages/9b/1d/23c41765cc95c0e23ac492a88450d34bf0fd87a37218d1b97000bffe0f53/jiter-0.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ab7fd8738094139b6c1ab1822d6f2000ebe41515c537235fd45dabe13ec9324", size = 515234, upload-time = "2025-05-18T19:04:55.838Z" }, - { url = "https://files.pythonhosted.org/packages/9f/14/381d8b151132e79790579819c3775be32820569f23806769658535fe467f/jiter-0.10.0-cp39-cp39-win32.whl", hash = "sha256:5f51e048540dd27f204ff4a87f5d79294ea0aa3aa552aca34934588cf27023cf", size = 211436, upload-time = "2025-05-18T19:04:57.183Z" }, - { url = "https://files.pythonhosted.org/packages/59/66/f23ae51dea8ee8ce429027b60008ca895d0fa0704f0c7fe5f09014a6cffb/jiter-0.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:1b28302349dc65703a9e4ead16f163b1c339efffbe1049c30a44b001a2a4fff9", size = 208777, upload-time = "2025-05-18T19:04:58.454Z" }, ] [[package]] @@ -1077,27 +962,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, ] -[[package]] -name = "json-repair" -version = "0.44.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -sdist = { url = "https://files.pythonhosted.org/packages/a8/6b/ed6e92efc5acfbc9c35ccae1676b70e4adb1552421e64f838c2a3f097d9a/json_repair-0.44.1.tar.gz", hash = "sha256:1130eb9733b868dac1340b43cb2effebb519ae6d52dd2d0728c6cca517f1e0b4", size = 32886, upload-time = "2025-04-30T16:09:38.54Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/82/b4/3cbd27a3240b2962c3b87bbb1c20eb6c56e5b26cde61f141f86ca98e2f68/json_repair-0.44.1-py3-none-any.whl", hash = "sha256:51d82532c3b8263782a301eb7904c75dce5fee8c0d1aba490287fc0ab779ac50", size = 22478, upload-time = "2025-04-30T16:09:37.303Z" }, -] - [[package]] name = "json-repair" version = "0.50.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/2f/2c/bfdb1886bdda03e248d597922013eeb20d62188cb48d394590ae6e0c8975/json_repair-0.50.0.tar.gz", hash = "sha256:1d42a3f353e389cf6051941b45fa44b6d130af3c91406a749e88586d830adb89", size = 34815, upload-time = "2025-08-20T15:01:58.126Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/35/c2/93368d4c9355e8ad1f6d62b804de241939d0796b2a3a73737f665b802808/json_repair-0.50.0-py3-none-any.whl", hash = "sha256:b15da2c42deb43419b182d97dcfde6cd86d0b18ccd18ed1a887104ce85e7a364", size = 25985, upload-time = "2025-08-20T15:01:56.567Z" }, @@ -1240,24 +1108,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/b0/a6fae46071b645ae98786ab738447de1ef53742eaad949f27e960864bb49/multidict-6.6.4-cp313-cp313t-win32.whl", hash = "sha256:f93b2b2279883d1d0a9e1bd01f312d6fc315c5e4c1f09e112e4736e2f650bc4e", size = 47775, upload-time = "2025-08-11T12:08:12.439Z" }, { url = "https://files.pythonhosted.org/packages/b2/0a/2436550b1520091af0600dff547913cb2d66fbac27a8c33bc1b1bccd8d98/multidict-6.6.4-cp313-cp313t-win_amd64.whl", hash = "sha256:6d46a180acdf6e87cc41dc15d8f5c2986e1e8739dc25dbb7dac826731ef381a4", size = 53100, upload-time = "2025-08-11T12:08:13.823Z" }, { url = "https://files.pythonhosted.org/packages/97/ea/43ac51faff934086db9c072a94d327d71b7d8b40cd5dcb47311330929ef0/multidict-6.6.4-cp313-cp313t-win_arm64.whl", hash = "sha256:756989334015e3335d087a27331659820d53ba432befdef6a718398b0a8493ad", size = 45501, upload-time = "2025-08-11T12:08:15.173Z" }, - { url = "https://files.pythonhosted.org/packages/d4/d3/f04c5db316caee9b5b2cbba66270b358c922a959855995bedde87134287c/multidict-6.6.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:af7618b591bae552b40dbb6f93f5518328a949dac626ee75927bba1ecdeea9f4", size = 76977, upload-time = "2025-08-11T12:08:16.667Z" }, - { url = "https://files.pythonhosted.org/packages/70/39/a6200417d883e510728ab3caec02d3b66ff09e1c85e0aab2ba311abfdf06/multidict-6.6.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b6819f83aef06f560cb15482d619d0e623ce9bf155115150a85ab11b8342a665", size = 44878, upload-time = "2025-08-11T12:08:18.157Z" }, - { url = "https://files.pythonhosted.org/packages/6f/7e/815be31ed35571b137d65232816f61513fcd97b2717d6a9d7800b5a0c6e0/multidict-6.6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4d09384e75788861e046330308e7af54dd306aaf20eb760eb1d0de26b2bea2cb", size = 44546, upload-time = "2025-08-11T12:08:19.694Z" }, - { url = "https://files.pythonhosted.org/packages/e2/f1/21b5bff6a8c3e2aff56956c241941ace6b8820e1abe6b12d3c52868a773d/multidict-6.6.4-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:a59c63061f1a07b861c004e53869eb1211ffd1a4acbca330e3322efa6dd02978", size = 223020, upload-time = "2025-08-11T12:08:21.554Z" }, - { url = "https://files.pythonhosted.org/packages/15/59/37083f1dd3439979a0ffeb1906818d978d88b4cc7f4600a9f89b1cb6713c/multidict-6.6.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:350f6b0fe1ced61e778037fdc7613f4051c8baf64b1ee19371b42a3acdb016a0", size = 240528, upload-time = "2025-08-11T12:08:23.45Z" }, - { url = "https://files.pythonhosted.org/packages/d1/f0/f054d123c87784307a27324c829eb55bcfd2e261eb785fcabbd832c8dc4a/multidict-6.6.4-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0c5cbac6b55ad69cb6aa17ee9343dfbba903118fd530348c330211dc7aa756d1", size = 219540, upload-time = "2025-08-11T12:08:24.965Z" }, - { url = "https://files.pythonhosted.org/packages/e8/26/8f78ce17b7118149c17f238f28fba2a850b660b860f9b024a34d0191030f/multidict-6.6.4-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:630f70c32b8066ddfd920350bc236225814ad94dfa493fe1910ee17fe4365cbb", size = 251182, upload-time = "2025-08-11T12:08:26.511Z" }, - { url = "https://files.pythonhosted.org/packages/00/c3/a21466322d69f6594fe22d9379200f99194d21c12a5bbf8c2a39a46b83b6/multidict-6.6.4-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8d4916a81697faec6cb724a273bd5457e4c6c43d82b29f9dc02c5542fd21fc9", size = 249371, upload-time = "2025-08-11T12:08:28.075Z" }, - { url = "https://files.pythonhosted.org/packages/c2/8e/2e673124eb05cf8dc82e9265eccde01a36bcbd3193e27799b8377123c976/multidict-6.6.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e42332cf8276bb7645d310cdecca93a16920256a5b01bebf747365f86a1675b", size = 239235, upload-time = "2025-08-11T12:08:29.937Z" }, - { url = "https://files.pythonhosted.org/packages/2b/2d/bdd9f05e7c89e30a4b0e4faf0681a30748f8d1310f68cfdc0e3571e75bd5/multidict-6.6.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f3be27440f7644ab9a13a6fc86f09cdd90b347c3c5e30c6d6d860de822d7cb53", size = 237410, upload-time = "2025-08-11T12:08:31.872Z" }, - { url = "https://files.pythonhosted.org/packages/46/4c/3237b83f8ca9a2673bb08fc340c15da005a80f5cc49748b587c8ae83823b/multidict-6.6.4-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:21f216669109e02ef3e2415ede07f4f8987f00de8cdfa0cc0b3440d42534f9f0", size = 232979, upload-time = "2025-08-11T12:08:33.399Z" }, - { url = "https://files.pythonhosted.org/packages/55/a6/a765decff625ae9bc581aed303cd1837955177dafc558859a69f56f56ba8/multidict-6.6.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:d9890d68c45d1aeac5178ded1d1cccf3bc8d7accf1f976f79bf63099fb16e4bd", size = 240979, upload-time = "2025-08-11T12:08:35.02Z" }, - { url = "https://files.pythonhosted.org/packages/6b/2d/9c75975cb0c66ea33cae1443bb265b2b3cd689bffcbc68872565f401da23/multidict-6.6.4-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:edfdcae97cdc5d1a89477c436b61f472c4d40971774ac4729c613b4b133163cb", size = 246849, upload-time = "2025-08-11T12:08:37.038Z" }, - { url = "https://files.pythonhosted.org/packages/3e/71/d21ac0843c1d8751fb5dcf8a1f436625d39d4577bc27829799d09b419af7/multidict-6.6.4-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0b2e886624be5773e69cf32bcb8534aecdeb38943520b240fed3d5596a430f2f", size = 241798, upload-time = "2025-08-11T12:08:38.669Z" }, - { url = "https://files.pythonhosted.org/packages/94/3d/1d8911e53092837bd11b1c99d71de3e2a9a26f8911f864554677663242aa/multidict-6.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:be5bf4b3224948032a845d12ab0f69f208293742df96dc14c4ff9b09e508fc17", size = 235315, upload-time = "2025-08-11T12:08:40.266Z" }, - { url = "https://files.pythonhosted.org/packages/86/c5/4b758df96376f73e936b1942c6c2dfc17e37ed9d5ff3b01a811496966ca0/multidict-6.6.4-cp39-cp39-win32.whl", hash = "sha256:10a68a9191f284fe9d501fef4efe93226e74df92ce7a24e301371293bd4918ae", size = 41434, upload-time = "2025-08-11T12:08:41.965Z" }, - { url = "https://files.pythonhosted.org/packages/58/16/f1dfa2a0f25f2717a5e9e5fe8fd30613f7fe95e3530cec8d11f5de0b709c/multidict-6.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:ee25f82f53262f9ac93bd7e58e47ea1bdcc3393cef815847e397cba17e284210", size = 46186, upload-time = "2025-08-11T12:08:43.367Z" }, - { url = "https://files.pythonhosted.org/packages/88/7d/a0568bac65438c494cb6950b29f394d875a796a237536ac724879cf710c9/multidict-6.6.4-cp39-cp39-win_arm64.whl", hash = "sha256:f9867e55590e0855bcec60d4f9a092b69476db64573c9fe17e92b0c50614c16a", size = 43115, upload-time = "2025-08-11T12:08:45.126Z" }, { url = "https://files.pythonhosted.org/packages/fd/69/b547032297c7e63ba2af494edba695d781af8a0c6e89e4d06cf848b21d80/multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c", size = 12313, upload-time = "2025-08-11T12:08:46.891Z" }, ] @@ -1288,67 +1138,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, ] -[[package]] -name = "numpy" -version = "2.0.2" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015, upload-time = "2024-08-26T20:19:40.945Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/21/91/3495b3237510f79f5d81f2508f9f13fea78ebfdf07538fc7444badda173d/numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece", size = 21165245, upload-time = "2024-08-26T20:04:14.625Z" }, - { url = "https://files.pythonhosted.org/packages/05/33/26178c7d437a87082d11019292dce6d3fe6f0e9026b7b2309cbf3e489b1d/numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04", size = 13738540, upload-time = "2024-08-26T20:04:36.784Z" }, - { url = "https://files.pythonhosted.org/packages/ec/31/cc46e13bf07644efc7a4bf68df2df5fb2a1a88d0cd0da9ddc84dc0033e51/numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66", size = 5300623, upload-time = "2024-08-26T20:04:46.491Z" }, - { url = "https://files.pythonhosted.org/packages/6e/16/7bfcebf27bb4f9d7ec67332ffebee4d1bf085c84246552d52dbb548600e7/numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b", size = 6901774, upload-time = "2024-08-26T20:04:58.173Z" }, - { url = "https://files.pythonhosted.org/packages/f9/a3/561c531c0e8bf082c5bef509d00d56f82e0ea7e1e3e3a7fc8fa78742a6e5/numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd", size = 13907081, upload-time = "2024-08-26T20:05:19.098Z" }, - { url = "https://files.pythonhosted.org/packages/fa/66/f7177ab331876200ac7563a580140643d1179c8b4b6a6b0fc9838de2a9b8/numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318", size = 19523451, upload-time = "2024-08-26T20:05:47.479Z" }, - { url = "https://files.pythonhosted.org/packages/25/7f/0b209498009ad6453e4efc2c65bcdf0ae08a182b2b7877d7ab38a92dc542/numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8", size = 19927572, upload-time = "2024-08-26T20:06:17.137Z" }, - { url = "https://files.pythonhosted.org/packages/3e/df/2619393b1e1b565cd2d4c4403bdd979621e2c4dea1f8532754b2598ed63b/numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326", size = 14400722, upload-time = "2024-08-26T20:06:39.16Z" }, - { url = "https://files.pythonhosted.org/packages/22/ad/77e921b9f256d5da36424ffb711ae79ca3f451ff8489eeca544d0701d74a/numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97", size = 6472170, upload-time = "2024-08-26T20:06:50.361Z" }, - { url = "https://files.pythonhosted.org/packages/10/05/3442317535028bc29cf0c0dd4c191a4481e8376e9f0db6bcf29703cadae6/numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131", size = 15905558, upload-time = "2024-08-26T20:07:13.881Z" }, - { url = "https://files.pythonhosted.org/packages/8b/cf/034500fb83041aa0286e0fb16e7c76e5c8b67c0711bb6e9e9737a717d5fe/numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448", size = 21169137, upload-time = "2024-08-26T20:07:45.345Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d9/32de45561811a4b87fbdee23b5797394e3d1504b4a7cf40c10199848893e/numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195", size = 13703552, upload-time = "2024-08-26T20:08:06.666Z" }, - { url = "https://files.pythonhosted.org/packages/c1/ca/2f384720020c7b244d22508cb7ab23d95f179fcfff33c31a6eeba8d6c512/numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57", size = 5298957, upload-time = "2024-08-26T20:08:15.83Z" }, - { url = "https://files.pythonhosted.org/packages/0e/78/a3e4f9fb6aa4e6fdca0c5428e8ba039408514388cf62d89651aade838269/numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a", size = 6905573, upload-time = "2024-08-26T20:08:27.185Z" }, - { url = "https://files.pythonhosted.org/packages/a0/72/cfc3a1beb2caf4efc9d0b38a15fe34025230da27e1c08cc2eb9bfb1c7231/numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669", size = 13914330, upload-time = "2024-08-26T20:08:48.058Z" }, - { url = "https://files.pythonhosted.org/packages/ba/a8/c17acf65a931ce551fee11b72e8de63bf7e8a6f0e21add4c937c83563538/numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951", size = 19534895, upload-time = "2024-08-26T20:09:16.536Z" }, - { url = "https://files.pythonhosted.org/packages/ba/86/8767f3d54f6ae0165749f84648da9dcc8cd78ab65d415494962c86fac80f/numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9", size = 19937253, upload-time = "2024-08-26T20:09:46.263Z" }, - { url = "https://files.pythonhosted.org/packages/df/87/f76450e6e1c14e5bb1eae6836478b1028e096fd02e85c1c37674606ab752/numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15", size = 14414074, upload-time = "2024-08-26T20:10:08.483Z" }, - { url = "https://files.pythonhosted.org/packages/5c/ca/0f0f328e1e59f73754f06e1adfb909de43726d4f24c6a3f8805f34f2b0fa/numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4", size = 6470640, upload-time = "2024-08-26T20:10:19.732Z" }, - { url = "https://files.pythonhosted.org/packages/eb/57/3a3f14d3a759dcf9bf6e9eda905794726b758819df4663f217d658a58695/numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc", size = 15910230, upload-time = "2024-08-26T20:10:43.413Z" }, - { url = "https://files.pythonhosted.org/packages/45/40/2e117be60ec50d98fa08c2f8c48e09b3edea93cfcabd5a9ff6925d54b1c2/numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b", size = 20895803, upload-time = "2024-08-26T20:11:13.916Z" }, - { url = "https://files.pythonhosted.org/packages/46/92/1b8b8dee833f53cef3e0a3f69b2374467789e0bb7399689582314df02651/numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e", size = 13471835, upload-time = "2024-08-26T20:11:34.779Z" }, - { url = "https://files.pythonhosted.org/packages/7f/19/e2793bde475f1edaea6945be141aef6c8b4c669b90c90a300a8954d08f0a/numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c", size = 5038499, upload-time = "2024-08-26T20:11:43.902Z" }, - { url = "https://files.pythonhosted.org/packages/e3/ff/ddf6dac2ff0dd50a7327bcdba45cb0264d0e96bb44d33324853f781a8f3c/numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c", size = 6633497, upload-time = "2024-08-26T20:11:55.09Z" }, - { url = "https://files.pythonhosted.org/packages/72/21/67f36eac8e2d2cd652a2e69595a54128297cdcb1ff3931cfc87838874bd4/numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692", size = 13621158, upload-time = "2024-08-26T20:12:14.95Z" }, - { url = "https://files.pythonhosted.org/packages/39/68/e9f1126d757653496dbc096cb429014347a36b228f5a991dae2c6b6cfd40/numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a", size = 19236173, upload-time = "2024-08-26T20:12:44.049Z" }, - { url = "https://files.pythonhosted.org/packages/d1/e9/1f5333281e4ebf483ba1c888b1d61ba7e78d7e910fdd8e6499667041cc35/numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c", size = 19634174, upload-time = "2024-08-26T20:13:13.634Z" }, - { url = "https://files.pythonhosted.org/packages/71/af/a469674070c8d8408384e3012e064299f7a2de540738a8e414dcfd639996/numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded", size = 14099701, upload-time = "2024-08-26T20:13:34.851Z" }, - { url = "https://files.pythonhosted.org/packages/d0/3d/08ea9f239d0e0e939b6ca52ad403c84a2bce1bde301a8eb4888c1c1543f1/numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5", size = 6174313, upload-time = "2024-08-26T20:13:45.653Z" }, - { url = "https://files.pythonhosted.org/packages/b2/b5/4ac39baebf1fdb2e72585c8352c56d063b6126be9fc95bd2bb5ef5770c20/numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a", size = 15606179, upload-time = "2024-08-26T20:14:08.786Z" }, - { url = "https://files.pythonhosted.org/packages/43/c1/41c8f6df3162b0c6ffd4437d729115704bd43363de0090c7f913cfbc2d89/numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c", size = 21169942, upload-time = "2024-08-26T20:14:40.108Z" }, - { url = "https://files.pythonhosted.org/packages/39/bc/fd298f308dcd232b56a4031fd6ddf11c43f9917fbc937e53762f7b5a3bb1/numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd", size = 13711512, upload-time = "2024-08-26T20:15:00.985Z" }, - { url = "https://files.pythonhosted.org/packages/96/ff/06d1aa3eeb1c614eda245c1ba4fb88c483bee6520d361641331872ac4b82/numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b", size = 5306976, upload-time = "2024-08-26T20:15:10.876Z" }, - { url = "https://files.pythonhosted.org/packages/2d/98/121996dcfb10a6087a05e54453e28e58694a7db62c5a5a29cee14c6e047b/numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729", size = 6906494, upload-time = "2024-08-26T20:15:22.055Z" }, - { url = "https://files.pythonhosted.org/packages/15/31/9dffc70da6b9bbf7968f6551967fc21156207366272c2a40b4ed6008dc9b/numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1", size = 13912596, upload-time = "2024-08-26T20:15:42.452Z" }, - { url = "https://files.pythonhosted.org/packages/b9/14/78635daab4b07c0930c919d451b8bf8c164774e6a3413aed04a6d95758ce/numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd", size = 19526099, upload-time = "2024-08-26T20:16:11.048Z" }, - { url = "https://files.pythonhosted.org/packages/26/4c/0eeca4614003077f68bfe7aac8b7496f04221865b3a5e7cb230c9d055afd/numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d", size = 19932823, upload-time = "2024-08-26T20:16:40.171Z" }, - { url = "https://files.pythonhosted.org/packages/f1/46/ea25b98b13dccaebddf1a803f8c748680d972e00507cd9bc6dcdb5aa2ac1/numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d", size = 14404424, upload-time = "2024-08-26T20:17:02.604Z" }, - { url = "https://files.pythonhosted.org/packages/c8/a6/177dd88d95ecf07e722d21008b1b40e681a929eb9e329684d449c36586b2/numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa", size = 6476809, upload-time = "2024-08-26T20:17:13.553Z" }, - { url = "https://files.pythonhosted.org/packages/ea/2b/7fc9f4e7ae5b507c1a3a21f0f15ed03e794c1242ea8a242ac158beb56034/numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73", size = 15911314, upload-time = "2024-08-26T20:17:36.72Z" }, - { url = "https://files.pythonhosted.org/packages/8f/3b/df5a870ac6a3be3a86856ce195ef42eec7ae50d2a202be1f5a4b3b340e14/numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8", size = 21025288, upload-time = "2024-08-26T20:18:07.732Z" }, - { url = "https://files.pythonhosted.org/packages/2c/97/51af92f18d6f6f2d9ad8b482a99fb74e142d71372da5d834b3a2747a446e/numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4", size = 6762793, upload-time = "2024-08-26T20:18:19.125Z" }, - { url = "https://files.pythonhosted.org/packages/12/46/de1fbd0c1b5ccaa7f9a005b66761533e2f6a3e560096682683a223631fe9/numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c", size = 19334885, upload-time = "2024-08-26T20:18:47.237Z" }, - { url = "https://files.pythonhosted.org/packages/cc/dc/d330a6faefd92b446ec0f0dfea4c3207bb1fef3c4771d19cf4543efd2c78/numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385", size = 15828784, upload-time = "2024-08-26T20:19:11.19Z" }, -] - [[package]] name = "numpy" version = "2.2.6" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version == '3.10.*'", + "python_full_version < '3.11'", ] sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ @@ -1538,8 +1333,7 @@ name = "pandas" version = "2.3.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "python-dateutil" }, { name = "pytz" }, @@ -1581,13 +1375,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/d5/f0486090eb18dd8710bf60afeaf638ba6817047c0c8ae5c6a25598665609/pandas-2.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b37205ad6f00d52f16b6d09f406434ba928c1a1966e2771006a9033c736d30d2", size = 11883216, upload-time = "2025-08-21T10:27:59.302Z" }, { url = "https://files.pythonhosted.org/packages/10/86/692050c119696da19e20245bbd650d8dfca6ceb577da027c3a73c62a047e/pandas-2.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:837248b4fc3a9b83b9c6214699a13f069dc13510a6a6d7f9ba33145d2841a012", size = 12699743, upload-time = "2025-08-21T10:28:02.447Z" }, { url = "https://files.pythonhosted.org/packages/cd/d7/612123674d7b17cf345aad0a10289b2a384bff404e0463a83c4a3a59d205/pandas-2.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d2c3554bd31b731cd6490d94a28f3abb8dd770634a9e06eb6d2911b9827db370", size = 13186141, upload-time = "2025-08-21T10:28:05.377Z" }, - { url = "https://files.pythonhosted.org/packages/e0/c3/b37e090d0aceda9b4dd85c8dbd1bea65b1de9e7a4f690d6bd3a40bd16390/pandas-2.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88080a0ff8a55eac9c84e3ff3c7665b3b5476c6fbc484775ca1910ce1c3e0b87", size = 11551511, upload-time = "2025-08-21T10:28:11.111Z" }, - { url = "https://files.pythonhosted.org/packages/b9/47/381fb1e7adcfcf4230fa6dc3a741acbac6c6fe072f19f4e7a46bddf3e5f6/pandas-2.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d4a558c7620340a0931828d8065688b3cc5b4c8eb674bcaf33d18ff4a6870b4a", size = 10797930, upload-time = "2025-08-21T10:28:13.436Z" }, - { url = "https://files.pythonhosted.org/packages/36/ca/d42467829080b92fc46d451288af8068f129fbcfb6578d573f45120de5cf/pandas-2.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45178cf09d1858a1509dc73ec261bf5b25a625a389b65be2e47b559905f0ab6a", size = 11738470, upload-time = "2025-08-21T10:28:16.065Z" }, - { url = "https://files.pythonhosted.org/packages/60/76/7d0f0a0deed7867c51163982d7b79c0a089096cd7ad50e1b87c2c82220e9/pandas-2.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77cefe00e1b210f9c76c697fedd8fdb8d3dd86563e9c8adc9fa72b90f5e9e4c2", size = 12366640, upload-time = "2025-08-21T10:28:18.557Z" }, - { url = "https://files.pythonhosted.org/packages/21/31/56784743e421cf51e34358fe7e5954345e5942168897bf8eb5707b71eedb/pandas-2.3.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:13bd629c653856f00c53dc495191baa59bcafbbf54860a46ecc50d3a88421a96", size = 13211567, upload-time = "2025-08-21T10:28:20.998Z" }, - { url = "https://files.pythonhosted.org/packages/7a/4e/50a399dc7d9dd4aa09a03b163751d428026cf0f16c419b4010f6aca26ebd/pandas-2.3.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:36d627906fd44b5fd63c943264e11e96e923f8de77d6016dc2f667b9ad193438", size = 13854073, upload-time = "2025-08-21T10:28:24.056Z" }, - { url = "https://files.pythonhosted.org/packages/29/72/8978a84861a5124e56ce1048376569545412501fcb9a83f035393d6d85bc/pandas-2.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:a9d7ec92d71a420185dec44909c32e9a362248c4ae2238234b76d5be37f208cc", size = 11346452, upload-time = "2025-08-21T10:28:26.691Z" }, ] [[package]] @@ -1697,17 +1484,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12", size = 6377370, upload-time = "2025-07-01T09:15:46.673Z" }, { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db", size = 7121500, upload-time = "2025-07-01T09:15:48.512Z" }, { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8e/9c089f01677d1264ab8648352dcb7773f37da6ad002542760c80107da816/pillow-11.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:48d254f8a4c776de343051023eb61ffe818299eeac478da55227d96e241de53f", size = 5316478, upload-time = "2025-07-01T09:15:52.209Z" }, - { url = "https://files.pythonhosted.org/packages/b5/a9/5749930caf674695867eb56a581e78eb5f524b7583ff10b01b6e5048acb3/pillow-11.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7aee118e30a4cf54fdd873bd3a29de51e29105ab11f9aad8c32123f58c8f8081", size = 4686522, upload-time = "2025-07-01T09:15:54.162Z" }, - { url = "https://files.pythonhosted.org/packages/43/46/0b85b763eb292b691030795f9f6bb6fcaf8948c39413c81696a01c3577f7/pillow-11.3.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:23cff760a9049c502721bdb743a7cb3e03365fafcdfc2ef9784610714166e5a4", size = 5853376, upload-time = "2025-07-03T13:11:01.066Z" }, - { url = "https://files.pythonhosted.org/packages/5e/c6/1a230ec0067243cbd60bc2dad5dc3ab46a8a41e21c15f5c9b52b26873069/pillow-11.3.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6359a3bc43f57d5b375d1ad54a0074318a0844d11b76abccf478c37c986d3cfc", size = 7626020, upload-time = "2025-07-03T13:11:06.479Z" }, - { url = "https://files.pythonhosted.org/packages/63/dd/f296c27ffba447bfad76c6a0c44c1ea97a90cb9472b9304c94a732e8dbfb/pillow-11.3.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:092c80c76635f5ecb10f3f83d76716165c96f5229addbd1ec2bdbbda7d496e06", size = 5956732, upload-time = "2025-07-01T09:15:56.111Z" }, - { url = "https://files.pythonhosted.org/packages/a5/a0/98a3630f0b57f77bae67716562513d3032ae70414fcaf02750279c389a9e/pillow-11.3.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cadc9e0ea0a2431124cde7e1697106471fc4c1da01530e679b2391c37d3fbb3a", size = 6624404, upload-time = "2025-07-01T09:15:58.245Z" }, - { url = "https://files.pythonhosted.org/packages/de/e6/83dfba5646a290edd9a21964da07674409e410579c341fc5b8f7abd81620/pillow-11.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6a418691000f2a418c9135a7cf0d797c1bb7d9a485e61fe8e7722845b95ef978", size = 6067760, upload-time = "2025-07-01T09:16:00.003Z" }, - { url = "https://files.pythonhosted.org/packages/bc/41/15ab268fe6ee9a2bc7391e2bbb20a98d3974304ab1a406a992dcb297a370/pillow-11.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:97afb3a00b65cc0804d1c7abddbf090a81eaac02768af58cbdcaaa0a931e0b6d", size = 6700534, upload-time = "2025-07-01T09:16:02.29Z" }, - { url = "https://files.pythonhosted.org/packages/64/79/6d4f638b288300bed727ff29f2a3cb63db054b33518a95f27724915e3fbc/pillow-11.3.0-cp39-cp39-win32.whl", hash = "sha256:ea944117a7974ae78059fcc1800e5d3295172bb97035c0c1d9345fca1419da71", size = 6277091, upload-time = "2025-07-01T09:16:04.4Z" }, - { url = "https://files.pythonhosted.org/packages/46/05/4106422f45a05716fd34ed21763f8ec182e8ea00af6e9cb05b93a247361a/pillow-11.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:e5c5858ad8ec655450a7c7df532e9842cf8df7cc349df7225c60d5d348c8aada", size = 6986091, upload-time = "2025-07-01T09:16:06.342Z" }, - { url = "https://files.pythonhosted.org/packages/63/c6/287fd55c2c12761d0591549d48885187579b7c257bef0c6660755b0b59ae/pillow-11.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:6abdbfd3aea42be05702a8dd98832329c167ee84400a1d1f61ab11437f1717eb", size = 2422632, upload-time = "2025-07-01T09:16:08.142Z" }, { url = "https://files.pythonhosted.org/packages/6f/8b/209bd6b62ce8367f47e68a218bffac88888fdf2c9fcf1ecadc6c3ec1ebc7/pillow-11.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3cee80663f29e3843b68199b9d6f4f54bd1d4a6b59bdd91bceefc51238bcb967", size = 5270556, upload-time = "2025-07-01T09:16:09.961Z" }, { url = "https://files.pythonhosted.org/packages/2e/e6/231a0b76070c2cfd9e260a7a5b504fb72da0a95279410fa7afd99d9751d6/pillow-11.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b5f56c3f344f2ccaf0dd875d3e180f631dc60a51b314295a3e681fe8cf851fbe", size = 4654625, upload-time = "2025-07-01T09:16:11.913Z" }, { url = "https://files.pythonhosted.org/packages/13/f4/10cf94fda33cb12765f2397fc285fa6d8eb9c29de7f3185165b702fc7386/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e67d793d180c9df62f1f40aee3accca4829d3794c95098887edc18af4b8b780c", size = 4874207, upload-time = "2025-07-03T13:11:10.201Z" }, @@ -1869,22 +1645,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/91/9cb56efbb428b006bb85db28591e40b7736847b8331d43fe335acf95f6c8/propcache-0.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4445542398bd0b5d32df908031cb1b30d43ac848e20470a878b770ec2dcc6330", size = 265778, upload-time = "2025-06-09T22:55:36.45Z" }, { url = "https://files.pythonhosted.org/packages/9a/4c/b0fe775a2bdd01e176b14b574be679d84fc83958335790f7c9a686c1f468/propcache-0.3.2-cp313-cp313t-win32.whl", hash = "sha256:f86e5d7cd03afb3a1db8e9f9f6eff15794e79e791350ac48a8c924e6f439f394", size = 41175, upload-time = "2025-06-09T22:55:38.436Z" }, { url = "https://files.pythonhosted.org/packages/a4/ff/47f08595e3d9b5e149c150f88d9714574f1a7cbd89fe2817158a952674bf/propcache-0.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9704bedf6e7cbe3c65eca4379a9b53ee6a83749f047808cbb5044d40d7d72198", size = 44857, upload-time = "2025-06-09T22:55:39.687Z" }, - { url = "https://files.pythonhosted.org/packages/6c/39/8ea9bcfaaff16fd0b0fc901ee522e24c9ec44b4ca0229cfffb8066a06959/propcache-0.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a7fad897f14d92086d6b03fdd2eb844777b0c4d7ec5e3bac0fbae2ab0602bbe5", size = 74678, upload-time = "2025-06-09T22:55:41.227Z" }, - { url = "https://files.pythonhosted.org/packages/d3/85/cab84c86966e1d354cf90cdc4ba52f32f99a5bca92a1529d666d957d7686/propcache-0.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1f43837d4ca000243fd7fd6301947d7cb93360d03cd08369969450cc6b2ce3b4", size = 43829, upload-time = "2025-06-09T22:55:42.417Z" }, - { url = "https://files.pythonhosted.org/packages/23/f7/9cb719749152d8b26d63801b3220ce2d3931312b2744d2b3a088b0ee9947/propcache-0.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:261df2e9474a5949c46e962065d88eb9b96ce0f2bd30e9d3136bcde84befd8f2", size = 43729, upload-time = "2025-06-09T22:55:43.651Z" }, - { url = "https://files.pythonhosted.org/packages/a2/a2/0b2b5a210ff311260002a315f6f9531b65a36064dfb804655432b2f7d3e3/propcache-0.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e514326b79e51f0a177daab1052bc164d9d9e54133797a3a58d24c9c87a3fe6d", size = 204483, upload-time = "2025-06-09T22:55:45.327Z" }, - { url = "https://files.pythonhosted.org/packages/3f/e0/7aff5de0c535f783b0c8be5bdb750c305c1961d69fbb136939926e155d98/propcache-0.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4a996adb6904f85894570301939afeee65f072b4fd265ed7e569e8d9058e4ec", size = 217425, upload-time = "2025-06-09T22:55:46.729Z" }, - { url = "https://files.pythonhosted.org/packages/92/1d/65fa889eb3b2a7d6e4ed3c2b568a9cb8817547a1450b572de7bf24872800/propcache-0.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76cace5d6b2a54e55b137669b30f31aa15977eeed390c7cbfb1dafa8dfe9a701", size = 214723, upload-time = "2025-06-09T22:55:48.342Z" }, - { url = "https://files.pythonhosted.org/packages/9a/e2/eecf6989870988dfd731de408a6fa366e853d361a06c2133b5878ce821ad/propcache-0.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31248e44b81d59d6addbb182c4720f90b44e1efdc19f58112a3c3a1615fb47ef", size = 200166, upload-time = "2025-06-09T22:55:49.775Z" }, - { url = "https://files.pythonhosted.org/packages/12/06/c32be4950967f18f77489268488c7cdc78cbfc65a8ba8101b15e526b83dc/propcache-0.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abb7fa19dbf88d3857363e0493b999b8011eea856b846305d8c0512dfdf8fbb1", size = 194004, upload-time = "2025-06-09T22:55:51.335Z" }, - { url = "https://files.pythonhosted.org/packages/46/6c/17b521a6b3b7cbe277a4064ff0aa9129dd8c89f425a5a9b6b4dd51cc3ff4/propcache-0.3.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d81ac3ae39d38588ad0549e321e6f773a4e7cc68e7751524a22885d5bbadf886", size = 203075, upload-time = "2025-06-09T22:55:52.681Z" }, - { url = "https://files.pythonhosted.org/packages/62/cb/3bdba2b736b3e45bc0e40f4370f745b3e711d439ffbffe3ae416393eece9/propcache-0.3.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:cc2782eb0f7a16462285b6f8394bbbd0e1ee5f928034e941ffc444012224171b", size = 195407, upload-time = "2025-06-09T22:55:54.048Z" }, - { url = "https://files.pythonhosted.org/packages/29/bd/760c5c6a60a4a2c55a421bc34a25ba3919d49dee411ddb9d1493bb51d46e/propcache-0.3.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:db429c19a6c7e8a1c320e6a13c99799450f411b02251fb1b75e6217cf4a14fcb", size = 196045, upload-time = "2025-06-09T22:55:55.485Z" }, - { url = "https://files.pythonhosted.org/packages/76/58/ced2757a46f55b8c84358d6ab8de4faf57cba831c51e823654da7144b13a/propcache-0.3.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:21d8759141a9e00a681d35a1f160892a36fb6caa715ba0b832f7747da48fb6ea", size = 208432, upload-time = "2025-06-09T22:55:56.884Z" }, - { url = "https://files.pythonhosted.org/packages/bb/ec/d98ea8d5a4d8fe0e372033f5254eddf3254344c0c5dc6c49ab84349e4733/propcache-0.3.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2ca6d378f09adb13837614ad2754fa8afaee330254f404299611bce41a8438cb", size = 210100, upload-time = "2025-06-09T22:55:58.498Z" }, - { url = "https://files.pythonhosted.org/packages/56/84/b6d8a7ecf3f62d7dd09d9d10bbf89fad6837970ef868b35b5ffa0d24d9de/propcache-0.3.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:34a624af06c048946709f4278b4176470073deda88d91342665d95f7c6270fbe", size = 200712, upload-time = "2025-06-09T22:55:59.906Z" }, - { url = "https://files.pythonhosted.org/packages/bf/32/889f4903ddfe4a9dc61da71ee58b763758cf2d608fe1decede06e6467f8d/propcache-0.3.2-cp39-cp39-win32.whl", hash = "sha256:4ba3fef1c30f306b1c274ce0b8baaa2c3cdd91f645c48f06394068f37d3837a1", size = 38187, upload-time = "2025-06-09T22:56:01.212Z" }, - { url = "https://files.pythonhosted.org/packages/67/74/d666795fb9ba1dc139d30de64f3b6fd1ff9c9d3d96ccfdb992cd715ce5d2/propcache-0.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:7a2368eed65fc69a7a7a40b27f22e85e7627b74216f0846b04ba5c116e191ec9", size = 42025, upload-time = "2025-06-09T22:56:02.875Z" }, { url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" }, ] @@ -1997,19 +1757,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" }, { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" }, { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" }, - { url = "https://files.pythonhosted.org/packages/53/ea/bbe9095cdd771987d13c82d104a9c8559ae9aec1e29f139e286fd2e9256e/pydantic_core-2.33.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a2b911a5b90e0374d03813674bf0a5fbbb7741570dcd4b4e85a2e48d17def29d", size = 2028677, upload-time = "2025-04-23T18:32:27.227Z" }, - { url = "https://files.pythonhosted.org/packages/49/1d/4ac5ed228078737d457a609013e8f7edc64adc37b91d619ea965758369e5/pydantic_core-2.33.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6fa6dfc3e4d1f734a34710f391ae822e0a8eb8559a85c6979e14e65ee6ba2954", size = 1864735, upload-time = "2025-04-23T18:32:29.019Z" }, - { url = "https://files.pythonhosted.org/packages/23/9a/2e70d6388d7cda488ae38f57bc2f7b03ee442fbcf0d75d848304ac7e405b/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c54c939ee22dc8e2d545da79fc5381f1c020d6d3141d3bd747eab59164dc89fb", size = 1898467, upload-time = "2025-04-23T18:32:31.119Z" }, - { url = "https://files.pythonhosted.org/packages/ff/2e/1568934feb43370c1ffb78a77f0baaa5a8b6897513e7a91051af707ffdc4/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53a57d2ed685940a504248187d5685e49eb5eef0f696853647bf37c418c538f7", size = 1983041, upload-time = "2025-04-23T18:32:33.655Z" }, - { url = "https://files.pythonhosted.org/packages/01/1a/1a1118f38ab64eac2f6269eb8c120ab915be30e387bb561e3af904b12499/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09fb9dd6571aacd023fe6aaca316bd01cf60ab27240d7eb39ebd66a3a15293b4", size = 2136503, upload-time = "2025-04-23T18:32:35.519Z" }, - { url = "https://files.pythonhosted.org/packages/5c/da/44754d1d7ae0f22d6d3ce6c6b1486fc07ac2c524ed8f6eca636e2e1ee49b/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e6116757f7959a712db11f3e9c0a99ade00a5bbedae83cb801985aa154f071b", size = 2736079, upload-time = "2025-04-23T18:32:37.659Z" }, - { url = "https://files.pythonhosted.org/packages/4d/98/f43cd89172220ec5aa86654967b22d862146bc4d736b1350b4c41e7c9c03/pydantic_core-2.33.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d55ab81c57b8ff8548c3e4947f119551253f4e3787a7bbc0b6b3ca47498a9d3", size = 2006508, upload-time = "2025-04-23T18:32:39.637Z" }, - { url = "https://files.pythonhosted.org/packages/2b/cc/f77e8e242171d2158309f830f7d5d07e0531b756106f36bc18712dc439df/pydantic_core-2.33.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c20c462aa4434b33a2661701b861604913f912254e441ab8d78d30485736115a", size = 2113693, upload-time = "2025-04-23T18:32:41.818Z" }, - { url = "https://files.pythonhosted.org/packages/54/7a/7be6a7bd43e0a47c147ba7fbf124fe8aaf1200bc587da925509641113b2d/pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44857c3227d3fb5e753d5fe4a3420d6376fa594b07b621e220cd93703fe21782", size = 2074224, upload-time = "2025-04-23T18:32:44.033Z" }, - { url = "https://files.pythonhosted.org/packages/2a/07/31cf8fadffbb03be1cb520850e00a8490c0927ec456e8293cafda0726184/pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:eb9b459ca4df0e5c87deb59d37377461a538852765293f9e6ee834f0435a93b9", size = 2245403, upload-time = "2025-04-23T18:32:45.836Z" }, - { url = "https://files.pythonhosted.org/packages/b6/8d/bbaf4c6721b668d44f01861f297eb01c9b35f612f6b8e14173cb204e6240/pydantic_core-2.33.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9fcd347d2cc5c23b06de6d3b7b8275be558a0c90549495c699e379a80bf8379e", size = 2242331, upload-time = "2025-04-23T18:32:47.618Z" }, - { url = "https://files.pythonhosted.org/packages/bb/93/3cc157026bca8f5006250e74515119fcaa6d6858aceee8f67ab6dc548c16/pydantic_core-2.33.2-cp39-cp39-win32.whl", hash = "sha256:83aa99b1285bc8f038941ddf598501a86f1536789740991d7d8756e34f1e74d9", size = 1910571, upload-time = "2025-04-23T18:32:49.401Z" }, - { url = "https://files.pythonhosted.org/packages/5b/90/7edc3b2a0d9f0dda8806c04e511a67b0b7a41d2187e2003673a996fb4310/pydantic_core-2.33.2-cp39-cp39-win_amd64.whl", hash = "sha256:f481959862f57f29601ccced557cc2e817bce7533ab8e01a797a48b49c9692b3", size = 1956504, upload-time = "2025-04-23T18:32:51.287Z" }, { url = "https://files.pythonhosted.org/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982, upload-time = "2025-04-23T18:32:53.14Z" }, { url = "https://files.pythonhosted.org/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412, upload-time = "2025-04-23T18:32:55.52Z" }, { url = "https://files.pythonhosted.org/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749, upload-time = "2025-04-23T18:32:57.546Z" }, @@ -2028,15 +1775,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" }, { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" }, { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" }, - { url = "https://files.pythonhosted.org/packages/08/98/dbf3fdfabaf81cda5622154fda78ea9965ac467e3239078e0dcd6df159e7/pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:87acbfcf8e90ca885206e98359d7dca4bcbb35abdc0ff66672a293e1d7a19101", size = 2024034, upload-time = "2025-04-23T18:33:32.843Z" }, - { url = "https://files.pythonhosted.org/packages/8d/99/7810aa9256e7f2ccd492590f86b79d370df1e9292f1f80b000b6a75bd2fb/pydantic_core-2.33.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:7f92c15cd1e97d4b12acd1cc9004fa092578acfa57b67ad5e43a197175d01a64", size = 1858578, upload-time = "2025-04-23T18:33:34.912Z" }, - { url = "https://files.pythonhosted.org/packages/d8/60/bc06fa9027c7006cc6dd21e48dbf39076dc39d9abbaf718a1604973a9670/pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3f26877a748dc4251cfcfda9dfb5f13fcb034f5308388066bcfe9031b63ae7d", size = 1892858, upload-time = "2025-04-23T18:33:36.933Z" }, - { url = "https://files.pythonhosted.org/packages/f2/40/9d03997d9518816c68b4dfccb88969756b9146031b61cd37f781c74c9b6a/pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac89aea9af8cd672fa7b510e7b8c33b0bba9a43186680550ccf23020f32d535", size = 2068498, upload-time = "2025-04-23T18:33:38.997Z" }, - { url = "https://files.pythonhosted.org/packages/d8/62/d490198d05d2d86672dc269f52579cad7261ced64c2df213d5c16e0aecb1/pydantic_core-2.33.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:970919794d126ba8645f3837ab6046fb4e72bbc057b3709144066204c19a455d", size = 2108428, upload-time = "2025-04-23T18:33:41.18Z" }, - { url = "https://files.pythonhosted.org/packages/9a/ec/4cd215534fd10b8549015f12ea650a1a973da20ce46430b68fc3185573e8/pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3eb3fe62804e8f859c49ed20a8451342de53ed764150cb14ca71357c765dc2a6", size = 2069854, upload-time = "2025-04-23T18:33:43.446Z" }, - { url = "https://files.pythonhosted.org/packages/1a/1a/abbd63d47e1d9b0d632fee6bb15785d0889c8a6e0a6c3b5a8e28ac1ec5d2/pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:3abcd9392a36025e3bd55f9bd38d908bd17962cc49bc6da8e7e96285336e2bca", size = 2237859, upload-time = "2025-04-23T18:33:45.56Z" }, - { url = "https://files.pythonhosted.org/packages/80/1c/fa883643429908b1c90598fd2642af8839efd1d835b65af1f75fba4d94fe/pydantic_core-2.33.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3a1c81334778f9e3af2f8aeb7a960736e5cab1dfebfb26aabca09afd2906c039", size = 2239059, upload-time = "2025-04-23T18:33:47.735Z" }, - { url = "https://files.pythonhosted.org/packages/d4/29/3cade8a924a61f60ccfa10842f75eb12787e1440e2b8660ceffeb26685e7/pydantic_core-2.33.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2807668ba86cb38c6817ad9bc66215ab8584d1d304030ce4f0887336f28a5e27", size = 2066661, upload-time = "2025-04-23T18:33:49.995Z" }, ] [[package]] @@ -2101,7 +1839,6 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" }, { name = "pytest" }, - { name = "typing-extensions", marker = "python_full_version < '3.10'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4e/51/f8794af39eeb870e87a8c8068642fc07bce0c854d6865d7dd0f2a9d338c2/pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea", size = 46652, upload-time = "2025-07-16T04:29:26.393Z" } wheels = [ @@ -2194,15 +1931,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" }, { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" }, { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, - { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777, upload-time = "2024-08-06T20:33:25.896Z" }, - { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318, upload-time = "2024-08-06T20:33:27.212Z" }, - { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891, upload-time = "2024-08-06T20:33:28.974Z" }, - { url = "https://files.pythonhosted.org/packages/e9/6c/6e1b7f40181bc4805e2e07f4abc10a88ce4648e7e95ff1abe4ae4014a9b2/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", size = 722614, upload-time = "2024-08-06T20:33:34.157Z" }, - { url = "https://files.pythonhosted.org/packages/3d/32/e7bd8535d22ea2874cef6a81021ba019474ace0d13a4819c2a4bce79bd6a/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", size = 737360, upload-time = "2024-08-06T20:33:35.84Z" }, - { url = "https://files.pythonhosted.org/packages/d7/12/7322c1e30b9be969670b672573d45479edef72c9a0deac3bb2868f5d7469/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", size = 699006, upload-time = "2024-08-06T20:33:37.501Z" }, - { url = "https://files.pythonhosted.org/packages/82/72/04fcad41ca56491995076630c3ec1e834be241664c0c09a64c9a2589b507/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", size = 723577, upload-time = "2024-08-06T20:33:39.389Z" }, - { url = "https://files.pythonhosted.org/packages/ed/5e/46168b1f2757f1fcd442bc3029cd8767d88a98c9c05770d8b420948743bb/PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", size = 144593, upload-time = "2024-08-06T20:33:46.63Z" }, - { url = "https://files.pythonhosted.org/packages/19/87/5124b1c1f2412bb95c59ec481eaf936cd32f0fe2a7b16b97b81c4c017a6a/PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", size = 162312, upload-time = "2024-08-06T20:33:49.073Z" }, ] [[package]] @@ -2213,8 +1941,7 @@ dependencies = [ { name = "certifi" }, { name = "charset-normalizer" }, { name = "idna" }, - { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "urllib3", version = "2.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "urllib3" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } wheels = [ @@ -2364,27 +2091,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, ] -[[package]] -name = "urllib3" -version = "1.26.20" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.10'", -] -sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380, upload-time = "2024-08-29T15:43:11.37Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload-time = "2024-08-29T15:43:08.921Z" }, -] - [[package]] name = "urllib3" version = "2.5.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12'", - "python_full_version == '3.11.*'", - "python_full_version == '3.10.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, @@ -2464,29 +2174,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" }, { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" }, { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" }, - { url = "https://files.pythonhosted.org/packages/36/db/3fff0bcbe339a6fa6a3b9e3fbc2bfb321ec2f4cd233692272c5a8d6cf801/websockets-15.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5f4c04ead5aed67c8a1a20491d54cdfba5884507a48dd798ecaf13c74c4489f5", size = 175424, upload-time = "2025-03-05T20:02:56.505Z" }, - { url = "https://files.pythonhosted.org/packages/46/e6/519054c2f477def4165b0ec060ad664ed174e140b0d1cbb9fafa4a54f6db/websockets-15.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abdc0c6c8c648b4805c5eacd131910d2a7f6455dfd3becab248ef108e89ab16a", size = 173077, upload-time = "2025-03-05T20:02:58.37Z" }, - { url = "https://files.pythonhosted.org/packages/1a/21/c0712e382df64c93a0d16449ecbf87b647163485ca1cc3f6cbadb36d2b03/websockets-15.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a625e06551975f4b7ea7102bc43895b90742746797e2e14b70ed61c43a90f09b", size = 173324, upload-time = "2025-03-05T20:02:59.773Z" }, - { url = "https://files.pythonhosted.org/packages/1c/cb/51ba82e59b3a664df54beed8ad95517c1b4dc1a913730e7a7db778f21291/websockets-15.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d591f8de75824cbb7acad4e05d2d710484f15f29d4a915092675ad3456f11770", size = 182094, upload-time = "2025-03-05T20:03:01.827Z" }, - { url = "https://files.pythonhosted.org/packages/fb/0f/bf3788c03fec679bcdaef787518dbe60d12fe5615a544a6d4cf82f045193/websockets-15.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47819cea040f31d670cc8d324bb6435c6f133b8c7a19ec3d61634e62f8d8f9eb", size = 181094, upload-time = "2025-03-05T20:03:03.123Z" }, - { url = "https://files.pythonhosted.org/packages/5e/da/9fb8c21edbc719b66763a571afbaf206cb6d3736d28255a46fc2fe20f902/websockets-15.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac017dd64572e5c3bd01939121e4d16cf30e5d7e110a119399cf3133b63ad054", size = 181397, upload-time = "2025-03-05T20:03:04.443Z" }, - { url = "https://files.pythonhosted.org/packages/2e/65/65f379525a2719e91d9d90c38fe8b8bc62bd3c702ac651b7278609b696c4/websockets-15.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4a9fac8e469d04ce6c25bb2610dc535235bd4aa14996b4e6dbebf5e007eba5ee", size = 181794, upload-time = "2025-03-05T20:03:06.708Z" }, - { url = "https://files.pythonhosted.org/packages/d9/26/31ac2d08f8e9304d81a1a7ed2851c0300f636019a57cbaa91342015c72cc/websockets-15.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363c6f671b761efcb30608d24925a382497c12c506b51661883c3e22337265ed", size = 181194, upload-time = "2025-03-05T20:03:08.844Z" }, - { url = "https://files.pythonhosted.org/packages/98/72/1090de20d6c91994cd4b357c3f75a4f25ee231b63e03adea89671cc12a3f/websockets-15.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2034693ad3097d5355bfdacfffcbd3ef5694f9718ab7f29c29689a9eae841880", size = 181164, upload-time = "2025-03-05T20:03:10.242Z" }, - { url = "https://files.pythonhosted.org/packages/2d/37/098f2e1c103ae8ed79b0e77f08d83b0ec0b241cf4b7f2f10edd0126472e1/websockets-15.0.1-cp39-cp39-win32.whl", hash = "sha256:3b1ac0d3e594bf121308112697cf4b32be538fb1444468fb0a6ae4feebc83411", size = 176381, upload-time = "2025-03-05T20:03:12.77Z" }, - { url = "https://files.pythonhosted.org/packages/75/8b/a32978a3ab42cebb2ebdd5b05df0696a09f4d436ce69def11893afa301f0/websockets-15.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b7643a03db5c95c799b89b31c036d5f27eeb4d259c798e878d6937d71832b1e4", size = 176841, upload-time = "2025-03-05T20:03:14.367Z" }, { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" }, { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" }, { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" }, { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" }, { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" }, { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" }, - { url = "https://files.pythonhosted.org/packages/b7/48/4b67623bac4d79beb3a6bb27b803ba75c1bdedc06bd827e465803690a4b2/websockets-15.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7f493881579c90fc262d9cdbaa05a6b54b3811c2f300766748db79f098db9940", size = 173106, upload-time = "2025-03-05T20:03:29.404Z" }, - { url = "https://files.pythonhosted.org/packages/ed/f0/adb07514a49fe5728192764e04295be78859e4a537ab8fcc518a3dbb3281/websockets-15.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:47b099e1f4fbc95b701b6e85768e1fcdaf1630f3cbe4765fa216596f12310e2e", size = 173339, upload-time = "2025-03-05T20:03:30.755Z" }, - { url = "https://files.pythonhosted.org/packages/87/28/bd23c6344b18fb43df40d0700f6d3fffcd7cef14a6995b4f976978b52e62/websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f2b6de947f8c757db2db9c71527933ad0019737ec374a8a6be9a956786aaf9", size = 174597, upload-time = "2025-03-05T20:03:32.247Z" }, - { url = "https://files.pythonhosted.org/packages/6d/79/ca288495863d0f23a60f546f0905ae8f3ed467ad87f8b6aceb65f4c013e4/websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d08eb4c2b7d6c41da6ca0600c077e93f5adcfd979cd777d747e9ee624556da4b", size = 174205, upload-time = "2025-03-05T20:03:33.731Z" }, - { url = "https://files.pythonhosted.org/packages/04/e4/120ff3180b0872b1fe6637f6f995bcb009fb5c87d597c1fc21456f50c848/websockets-15.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b826973a4a2ae47ba357e4e82fa44a463b8f168e1ca775ac64521442b19e87f", size = 174150, upload-time = "2025-03-05T20:03:35.757Z" }, - { url = "https://files.pythonhosted.org/packages/cb/c3/30e2f9c539b8da8b1d76f64012f3b19253271a63413b2d3adb94b143407f/websockets-15.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:21c1fa28a6a7e3cbdc171c694398b6df4744613ce9b36b1a498e816787e28123", size = 176877, upload-time = "2025-03-05T20:03:37.199Z" }, { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" }, ] @@ -2586,22 +2279,5 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/ed/c5fb04869b99b717985e244fd93029c7a8e8febdfcffa06093e32d7d44e7/yarl-1.20.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:88cab98aa4e13e1ade8c141daeedd300a4603b7132819c484841bb7af3edce9e", size = 341709, upload-time = "2025-06-10T00:45:23.221Z" }, { url = "https://files.pythonhosted.org/packages/24/fd/725b8e73ac2a50e78a4534ac43c6addf5c1c2d65380dd48a9169cc6739a9/yarl-1.20.1-cp313-cp313t-win32.whl", hash = "sha256:b121ff6a7cbd4abc28985b6028235491941b9fe8fe226e6fdc539c977ea1739d", size = 86591, upload-time = "2025-06-10T00:45:25.793Z" }, { url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" }, - { url = "https://files.pythonhosted.org/packages/01/75/0d37402d208d025afa6b5b8eb80e466d267d3fd1927db8e317d29a94a4cb/yarl-1.20.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e42ba79e2efb6845ebab49c7bf20306c4edf74a0b20fc6b2ccdd1a219d12fad3", size = 134259, upload-time = "2025-06-10T00:45:29.882Z" }, - { url = "https://files.pythonhosted.org/packages/73/84/1fb6c85ae0cf9901046f07d0ac9eb162f7ce6d95db541130aa542ed377e6/yarl-1.20.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:41493b9b7c312ac448b7f0a42a089dffe1d6e6e981a2d76205801a023ed26a2b", size = 91269, upload-time = "2025-06-10T00:45:32.917Z" }, - { url = "https://files.pythonhosted.org/packages/f3/9c/eae746b24c4ea29a5accba9a06c197a70fa38a49c7df244e0d3951108861/yarl-1.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f5a5928ff5eb13408c62a968ac90d43f8322fd56d87008b8f9dabf3c0f6ee983", size = 89995, upload-time = "2025-06-10T00:45:35.066Z" }, - { url = "https://files.pythonhosted.org/packages/fb/30/693e71003ec4bc1daf2e4cf7c478c417d0985e0a8e8f00b2230d517876fc/yarl-1.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30c41ad5d717b3961b2dd785593b67d386b73feca30522048d37298fee981805", size = 325253, upload-time = "2025-06-10T00:45:37.052Z" }, - { url = "https://files.pythonhosted.org/packages/0f/a2/5264dbebf90763139aeb0b0b3154763239398400f754ae19a0518b654117/yarl-1.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:59febc3969b0781682b469d4aca1a5cab7505a4f7b85acf6db01fa500fa3f6ba", size = 320897, upload-time = "2025-06-10T00:45:39.962Z" }, - { url = "https://files.pythonhosted.org/packages/e7/17/77c7a89b3c05856489777e922f41db79ab4faf58621886df40d812c7facd/yarl-1.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2b6fb3622b7e5bf7a6e5b679a69326b4279e805ed1699d749739a61d242449e", size = 340696, upload-time = "2025-06-10T00:45:41.915Z" }, - { url = "https://files.pythonhosted.org/packages/6d/55/28409330b8ef5f2f681f5b478150496ec9cf3309b149dab7ec8ab5cfa3f0/yarl-1.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:749d73611db8d26a6281086f859ea7ec08f9c4c56cec864e52028c8b328db723", size = 335064, upload-time = "2025-06-10T00:45:43.893Z" }, - { url = "https://files.pythonhosted.org/packages/85/58/cb0257cbd4002828ff735f44d3c5b6966c4fd1fc8cc1cd3cd8a143fbc513/yarl-1.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9427925776096e664c39e131447aa20ec738bdd77c049c48ea5200db2237e000", size = 327256, upload-time = "2025-06-10T00:45:46.393Z" }, - { url = "https://files.pythonhosted.org/packages/53/f6/c77960370cfa46f6fb3d6a5a79a49d3abfdb9ef92556badc2dcd2748bc2a/yarl-1.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff70f32aa316393eaf8222d518ce9118148eddb8a53073c2403863b41033eed5", size = 316389, upload-time = "2025-06-10T00:45:48.358Z" }, - { url = "https://files.pythonhosted.org/packages/64/ab/be0b10b8e029553c10905b6b00c64ecad3ebc8ace44b02293a62579343f6/yarl-1.20.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c7ddf7a09f38667aea38801da8b8d6bfe81df767d9dfc8c88eb45827b195cd1c", size = 340481, upload-time = "2025-06-10T00:45:50.663Z" }, - { url = "https://files.pythonhosted.org/packages/c5/c3/3f327bd3905a4916029bf5feb7f86dcf864c7704f099715f62155fb386b2/yarl-1.20.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:57edc88517d7fc62b174fcfb2e939fbc486a68315d648d7e74d07fac42cec240", size = 336941, upload-time = "2025-06-10T00:45:52.554Z" }, - { url = "https://files.pythonhosted.org/packages/d1/42/040bdd5d3b3bb02b4a6ace4ed4075e02f85df964d6e6cb321795d2a6496a/yarl-1.20.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:dab096ce479d5894d62c26ff4f699ec9072269d514b4edd630a393223f45a0ee", size = 339936, upload-time = "2025-06-10T00:45:54.919Z" }, - { url = "https://files.pythonhosted.org/packages/0d/1c/911867b8e8c7463b84dfdc275e0d99b04b66ad5132b503f184fe76be8ea4/yarl-1.20.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:14a85f3bd2d7bb255be7183e5d7d6e70add151a98edf56a770d6140f5d5f4010", size = 360163, upload-time = "2025-06-10T00:45:56.87Z" }, - { url = "https://files.pythonhosted.org/packages/e2/31/8c389f6c6ca0379b57b2da87f1f126c834777b4931c5ee8427dd65d0ff6b/yarl-1.20.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:2c89b5c792685dd9cd3fa9761c1b9f46fc240c2a3265483acc1565769996a3f8", size = 359108, upload-time = "2025-06-10T00:45:58.869Z" }, - { url = "https://files.pythonhosted.org/packages/7f/09/ae4a649fb3964324c70a3e2b61f45e566d9ffc0affd2b974cbf628957673/yarl-1.20.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:69e9b141de5511021942a6866990aea6d111c9042235de90e08f94cf972ca03d", size = 351875, upload-time = "2025-06-10T00:46:01.45Z" }, - { url = "https://files.pythonhosted.org/packages/8d/43/bbb4ed4c34d5bb62b48bf957f68cd43f736f79059d4f85225ab1ef80f4b9/yarl-1.20.1-cp39-cp39-win32.whl", hash = "sha256:b5f307337819cdfdbb40193cad84978a029f847b0a357fbe49f712063cfc4f06", size = 82293, upload-time = "2025-06-10T00:46:03.763Z" }, - { url = "https://files.pythonhosted.org/packages/d7/cd/ce185848a7dba68ea69e932674b5c1a42a1852123584bccc5443120f857c/yarl-1.20.1-cp39-cp39-win_amd64.whl", hash = "sha256:eae7bfe2069f9c1c5b05fc7fe5d612e5bbc089a39309904ee8b829e322dcad00", size = 87385, upload-time = "2025-06-10T00:46:05.655Z" }, { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, ] From cd40f6da92e11af230fe247a383b0137fcd4d342 Mon Sep 17 00:00:00 2001 From: jaldana Date: Fri, 3 Oct 2025 12:41:22 +0200 Subject: [PATCH 21/82] Make fiscal year support several values --- CHANGELOG.md | 6 ++++ .../search/narrative_search.py | 4 +-- .../search/query_builder.py | 28 +++++++++++++------ .../search/screener_search.py | 4 +-- .../workflows/narrative_miner.py | 2 +- .../workflows/risk_analyzer.py | 4 +-- .../workflows/thematic_screener.py | 4 +-- 7 files changed, 35 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1214cfa..e59deb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.99.0] - Unreleased Preparation for a first stable release. +## Added +- Added support for providing several fiscal years to any workflow or search function that + accepts a `fiscal_year` parameter. The parameter can now be a single integer or a list + of integers. When a list is provided, the workflow or function will search for the union + of all values provided. + ### Changed - Refactor the `themes` submodule to `tree`, allowing for a more generic tree structure that can be re-used accross different workflows. diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index a40f8f9..89c7340 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -25,7 +25,7 @@ def search_narratives( start_date: str, end_date: str, scope: DocumentType, - fiscal_year: int | None = None, + fiscal_year: int | list[int] = None, sources: list[str] | None = None, keywords: list[str] | None = None, control_entities: list[str] | None = None, @@ -45,7 +45,7 @@ def search_narratives( end_date (str): The end date for the search. scope (DocumentType): The document type scope (e.g., `DocumentType.NEWS`, `DocumentType.TRANSCRIPTS`). - fiscal_year (Optional[int]): The fiscal year to filter queries. + fiscal_year (int | list[int] | None): The fiscal year to filter queries. If None, no fiscal year filter is applied. sources (Optional[List[str]]): List of sources to filter on. If none, we search across all sources. keywords (Optional[List[str]]): A list of keywords for constructing keyword queries. diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 60222ef..62ddccd 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -78,7 +78,7 @@ def build_batched_query( control_entities: EntitiesToSearch | None, sources: list[str] | None, batch_size: int, - fiscal_year: int | None, + fiscal_year: int | list[int], scope: DocumentType, custom_batches: list[EntitiesToSearch] | None, ) -> list[QueryComponent]: @@ -98,7 +98,7 @@ def build_batched_query( Config of entities of different types (people, companies, organisations..) batch_size (int, optional): Number of entities per batch when auto-batching. Defaults to 10. - fiscal_year (int, optional): + fiscal_year (int | list[int], optional): Fiscal year to filter queries. scope (DocumentType, optional): Document type scope (e.g., ALL, TRANSCRIPTS). Defaults to ALL. @@ -147,7 +147,7 @@ def build_batched_query( def _validate_parameters( - document_scope: DocumentType | None = None, fiscal_year: int | None = None + document_scope: DocumentType | None = None, fiscal_year: int | list[int] = None, ) -> None: """ Validates parameters based on predefined rules. @@ -159,7 +159,7 @@ def _validate_parameters( return if document_scope in [DocumentType.FILINGS, DocumentType.TRANSCRIPTS]: - if fiscal_year is None: + if fiscal_year is None or (isinstance(fiscal_year, list) and len(fiscal_year) == 0): raise ValueError( f"`fiscal_year` is required when `document_scope` is `{document_scope.value}`" ) @@ -381,7 +381,7 @@ def _expand_queries( entity_batch_queries: list[QueryComponent] | None = None, control_query: QueryComponent | None = None, source_query: QueryComponent | None = None, - fiscal_year: int | None = None, + fiscal_year: int | list[int] = None, ) -> list[QueryComponent]: """Expand all query components into the final list of queries.""" base_queries, keyword_query, source_query = base_queries_tuple @@ -414,9 +414,21 @@ def _expand_queries( # Add fiscal year filter if provided if fiscal_year: - expanded_query = ( - expanded_query & FiscalYear(fiscal_year) if expanded_query else None - ) + if isinstance(fiscal_year, list): + fiscal_year_queries = [ + FiscalYear(year) for year in fiscal_year if isinstance(year, int) + ] + if fiscal_year_queries: + fiscal_year_query = Any(fiscal_year_queries) + expanded_query = ( + expanded_query & fiscal_year_query + if expanded_query + else fiscal_year_query + ) + else: + expanded_query = ( + expanded_query & FiscalYear(fiscal_year) if expanded_query else None + ) # Append the expanded query to the final list queries_expanded.append(expanded_query) diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index 897d1c1..abb28c2 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -29,7 +29,7 @@ def search_by_companies( start_date: str, end_date: str, scope: DocumentType = DocumentType.ALL, - fiscal_year: int | None = None, + fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, keywords: list[str] | None = None, control_entities: dict | None = None, @@ -50,7 +50,7 @@ def search_by_companies( end_date (str): The end date for the search. scope (DocumentType): The document type scope (e.g., `DocumentType.ALL`, `DocumentType.TRANSCRIPTS`). - fiscal_year (int): The fiscal year to filter queries. + fiscal_year (int | list[int] | None): The fiscal year to filter queries. If None, no fiscal year filter is applied. sources (Optional[List[str]]): List of sources to filter on. If none, we search across all sources. keywords (List[str]): A list of keywords for constructing keyword queries. diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index 70c65a7..8b31f5e 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -21,7 +21,7 @@ def __init__( end_date: str, llm_model: str, document_type: DocumentType, - fiscal_year: int | None, + fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, rerank_threshold: float | None = None, ): diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index bfb8574..fd43005 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -28,7 +28,7 @@ def __init__( document_type: DocumentType, keywords: list[str] | None = None, control_entities: dict[str, list[str]] | None = None, - fiscal_year: int | None = None, + fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, rerank_threshold: float | None = None, focus: str = "", @@ -47,7 +47,7 @@ def __init__( end_date (str): The end date for searching relevant documents. Format: YYYY-MM-DD. document_type (DocumentType): Specifies the type of documents to search over - fiscal_year (int): The fiscal year that will be analyzed. + fiscal_year (int | list[int] | None): The fiscal year that will be analyzed. sources (Optional[List[str]]): Used to filter search results by the sources of the documents. If not provided, the search is run across all available sources. rerank_threshold (Optional[float]): The threshold for reranking the search results. diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 8f7e7a2..22c28e3 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -26,7 +26,7 @@ def __init__( start_date: str, end_date: str, document_type: DocumentType, - fiscal_year: int | None = None, + fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, rerank_threshold: float | None = None, focus: str = "", @@ -45,7 +45,7 @@ def __init__( end_date (str): The end date for searching relevant documents. Format: YYYY-MM-DD. document_type (DocumentType): Specifies the type of documents to search over - fiscal_year (int): The fiscal year that will be analyzed. + fiscal_year (int | list[int] | None): The fiscal year that will be analyzed. sources (Optional[List[str]]): Used to filter search results by the sources of the documents. If not provided, the search is run across all available sources. rerank_threshold (Optional[float]): The threshold for reranking the search results. From 3a7bd3526e58422c924741020c4257f110ee6138 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 8 Oct 2025 17:07:02 +0200 Subject: [PATCH 22/82] Reduce number of type check errors and improve parameter validation --- .../labeler/risk_labeler.py | 4 +- src/bigdata_research_tools/llm/azure.py | 33 ++++++++++---- src/bigdata_research_tools/llm/bedrock.py | 25 ++++++----- src/bigdata_research_tools/llm/openai.py | 2 +- .../search/narrative_search.py | 2 +- .../search/query_builder.py | 43 ++++++++++++------- src/bigdata_research_tools/search/search.py | 4 +- .../workflows/thematic_screener.py | 16 ++++++- 8 files changed, 85 insertions(+), 44 deletions(-) diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index e4211e5..ab5b6d6 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -199,7 +199,9 @@ def post_process_dataframe( return df[export_columns] -def replace_company_placeholders(row: Series, col_name: str = "motivation") -> str: +def replace_company_placeholders( + row: Series, col_name: str = "motivation" +) -> str | list[str]: """ Replace company placeholders in text. diff --git a/src/bigdata_research_tools/llm/azure.py b/src/bigdata_research_tools/llm/azure.py index ec73e3f..0d2148a 100644 --- a/src/bigdata_research_tools/llm/azure.py +++ b/src/bigdata_research_tools/llm/azure.py @@ -7,12 +7,19 @@ from typing import AsyncGenerator, Generator try: - from azure.identity import DefaultAzureCredential, get_bearer_token_provider - from openai import AsyncAzureOpenAI, AzureOpenAI, OpenAIError + from azure.identity import ( # ty: ignore[unresolved-import] + DefaultAzureCredential, + get_bearer_token_provider, + ) + from openai import ( # ty: ignore[unresolved-import] + AsyncAzureOpenAI, + AzureOpenAI, + OpenAIError, + ) except ImportError: raise ImportError( "Missing optional dependency for Azure LLM OpenAI provider, " - "please install `bigdata_research_tools[azure]` to enable them." + "please install `bigdata_research_tools[azure,openai]` to enable them." ) from bigdata_research_tools.llm.base import AsyncLLMProvider, LLMProvider @@ -62,6 +69,7 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st """ max_retries = 5 delay = 1 + random.random() # initial delay in seconds + last_exception = None for attempt in range(max_retries): try: chat_completion = await self._client.chat.completions.create( @@ -69,11 +77,14 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st ) return chat_completion.choices[0].message.content - except Exception: - if attempt == max_retries - 1: - raise + except Exception as e: await asyncio.sleep(delay) delay = 2 * delay + random.random() # exponential backoff + last_exception = e + + raise RuntimeError( + f"Max retries exceeded when calling {AsyncAzureProvider.__name__}" + ) from last_exception async def get_tools_response( self, @@ -188,6 +199,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: max_retries = 5 delay = 1 + random.random() # initial delay in seconds + last_exception = None for attempt in range(max_retries): try: chat_completion = self._client.chat.completions.create( @@ -195,11 +207,14 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: ) return chat_completion.choices[0].message.content - except Exception: - if attempt == max_retries - 1: - raise + except Exception as e: time.sleep(delay) delay = 2 * delay + random.random() # exponential backoff + last_exception = e + + raise RuntimeError( + f"Max retries exceeded when calling {AsyncAzureProvider.__name__}" + ) from last_exception def get_tools_response( self, diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index 762571d..1825b89 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -2,7 +2,7 @@ from typing import Any, Generator try: - from boto3 import Session + from boto3 import Session # ty: ignore[unresolved-import] except ImportError: raise ImportError( "Missing optional dependency for LLM Bedrock provider, " @@ -33,14 +33,16 @@ def configure_bedrock_client(self) -> None: region_name=self.region or environ.get("AWS_DEFAULT_REGION") ) + def _get_bedrock_client(self) -> Session: + return self._client.client("bedrock-runtime") + def _get_bedrock_input( self, chat_history: list[dict[str, str]], **kwargs - ) -> tuple[Session, dict[str, Any], str]: + ) -> tuple[dict[str, Any], str]: """ Get the input for the Bedrock API. :param chat_history: the chat history to get the input from. """ - bedrock_client = self._client.client("bedrock-runtime") default_kwargs = { "temperature": 0.01, "max_tokens": 2048, @@ -76,7 +78,7 @@ def _get_bedrock_input( "latency": "optimized" if kwargs.get("low_latency") else "standard" }, } - return bedrock_client, model_kwargs, response_prefix + return model_kwargs, response_prefix async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: """ @@ -94,9 +96,8 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st Only implemented for a few models. See https://docs.aws.amazon.com/bedrock/latest/userguide/latency-optimized-inference.html """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( - chat_history, **kwargs - ) + bedrock_client = self._get_bedrock_client() + model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) response = bedrock_client.converse(**model_kwargs) output_message = ( @@ -129,9 +130,8 @@ async def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( - chat_history, **kwargs - ) + bedrock_client = self._get_bedrock_client() + model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) if tools: model_kwargs["toolConfig"] = {"tools": tools} response = bedrock_client.converse(**model_kwargs) @@ -196,12 +196,11 @@ def configure_bedrock_client(self) -> None: def _get_bedrock_input( self, chat_history: list[dict[str, str]], **kwargs - ) -> tuple[Session, dict[str, Any]]: + ) -> tuple[dict[str, Any], str]: """ Get the input for the Bedrock API. :param chat_history: the chat history to get the input from. """ - bedrock_client = self._client.client("bedrock-runtime") default_kwargs = { "temperature": 0.01, "max_tokens": 2048, @@ -237,7 +236,7 @@ def _get_bedrock_input( "latency": "optimized" if kwargs.get("low_latency") else "standard" }, } - return bedrock_client, model_kwargs, response_prefix + return model_kwargs, response_prefix def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: """ diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index b16a583..4b8519c 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -4,7 +4,7 @@ from typing import AsyncGenerator, Generator try: - from openai import AsyncOpenAI, OpenAI + from openai import AsyncOpenAI, OpenAI # ty: ignore[unresolved-import] except ImportError: raise ImportError( "Missing optional dependency for LLM OpenAI provider, " diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index 89c7340..9b87a59 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -25,7 +25,7 @@ def search_narratives( start_date: str, end_date: str, scope: DocumentType, - fiscal_year: int | list[int] = None, + fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, keywords: list[str] | None = None, control_entities: list[str] | None = None, diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 62ddccd..d25f64e 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -78,7 +78,7 @@ def build_batched_query( control_entities: EntitiesToSearch | None, sources: list[str] | None, batch_size: int, - fiscal_year: int | list[int], + fiscal_year: int | list[int] | None, scope: DocumentType, custom_batches: list[EntitiesToSearch] | None, ) -> list[QueryComponent]: @@ -147,7 +147,8 @@ def build_batched_query( def _validate_parameters( - document_scope: DocumentType | None = None, fiscal_year: int | list[int] = None, + document_scope: DocumentType | None = None, + fiscal_year: int | list[int] | None = None, ) -> None: """ Validates parameters based on predefined rules. @@ -159,7 +160,9 @@ def _validate_parameters( return if document_scope in [DocumentType.FILINGS, DocumentType.TRANSCRIPTS]: - if fiscal_year is None or (isinstance(fiscal_year, list) and len(fiscal_year) == 0): + if fiscal_year is None or ( + isinstance(fiscal_year, list) and len(fiscal_year) == 0 + ): raise ValueError( f"`fiscal_year` is required when `document_scope` is `{document_scope.value}`" ) @@ -272,22 +275,20 @@ def _build_control_entity_query( def _build_entity_batch_queries( entities: EntitiesToSearch | None, - custom_batches: list[EntitiesToSearch], + custom_batches: list[EntitiesToSearch] | None, batch_size: int, scope: DocumentType, ) -> list[QueryComponent] | list[None]: """Build entity batch queries from either custom batches or auto-batched entities.""" - # If no entities specified, return a single None to ensure at least one iteration - if not entities and not custom_batches: - return [None] - - # If using custom batches, process them + # Prioritize custom batches if provided, else auto-batch entities if custom_batches: return _build_custom_batch_queries(custom_batches, scope) - - # Otherwise, auto-batch the entities - return _auto_batch_entities(entities, batch_size, scope) + elif entities: + return _auto_batch_entities(entities, batch_size, scope) + else: + # If no entities specified, return a single None to ensure at least one iteration + return [None] def _get_entity_type(scope: DocumentType) -> type: @@ -381,7 +382,7 @@ def _expand_queries( entity_batch_queries: list[QueryComponent] | None = None, control_query: QueryComponent | None = None, source_query: QueryComponent | None = None, - fiscal_year: int | list[int] = None, + fiscal_year: int | list[int] | None = None, ) -> list[QueryComponent]: """Expand all query components into the final list of queries.""" base_queries, keyword_query, source_query = base_queries_tuple @@ -416,7 +417,9 @@ def _expand_queries( if fiscal_year: if isinstance(fiscal_year, list): fiscal_year_queries = [ - FiscalYear(year) for year in fiscal_year if isinstance(year, int) + FiscalYear(year) + for year in fiscal_year + if isinstance(year, int) ] if fiscal_year_queries: fiscal_year_query = Any(fiscal_year_queries) @@ -427,7 +430,9 @@ def _expand_queries( ) else: expanded_query = ( - expanded_query & FiscalYear(fiscal_year) if expanded_query else None + expanded_query & FiscalYear(fiscal_year) + if expanded_query + else None ) # Append the expanded query to the final list @@ -481,6 +486,14 @@ def create_date_intervals( start_date_pd = pd.Timestamp(start_date) end_date_pd = pd.Timestamp(end_date) + # Invalid dates will be pd.NaT, which can be tested as a NA value + if pd.isna(start_date_pd): + raise ValueError("Invalid start_date format. Use 'YYYY-MM-DD'.") + if pd.isna(end_date_pd): + raise ValueError("Invalid end_date format. Use 'YYYY-MM-DD'.") + if start_date_pd > end_date_pd: + raise ValueError("start_date must be earlier than or equal to end_date.") + # Adjust frequency for yearly and monthly to use appropriate start markers # 'AS' for year start, 'MS' for month start adjusted_freq = frequency.replace("Y", "AS").replace("M", "MS") diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 347fe69..e824d4a 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -113,7 +113,7 @@ def _acquire_token(self, timeout: float | None = None) -> bool: def _search( self, query: QueryComponent, - date_range: Union[tuple[datetime, datetime], RollingDateRange], + date_range: tuple[datetime, datetime] | RollingDateRange, sortby: SortBy = SortBy.RELEVANCE, scope: DocumentType = DocumentType.ALL, limit: int = 10, @@ -257,7 +257,7 @@ def normalize_date_range( def run_search( queries: list[QueryComponent], - date_ranges: INPUT_DATE_RANGE = None, + date_ranges: INPUT_DATE_RANGE, sortby: SortBy = SortBy.RELEVANCE, scope: DocumentType = DocumentType.ALL, limit: int = 10, diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 22c28e3..37f8916 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -29,7 +29,7 @@ def __init__( fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, rerank_threshold: float | None = None, - focus: str = "", + focus: str | None = None, ): """ This class will screen a universe's (specified in 'companies') exposure to a given theme ('main_theme'). @@ -56,14 +56,26 @@ def __init__( super().__init__() self.llm_model = llm_model self.main_theme = main_theme + if not companies: + raise ValueError( + "Thematic screener parameter `companies` cannot be None or an empty list" + ) self.companies = companies + if not start_date: + raise ValueError( + "Thematic screener parameter `start_date` cannot be None or an empty string" + ) self.start_date = start_date + if not end_date: + raise ValueError( + "Thematic screener parameter `end_date` cannot be None or an empty string" + ) self.end_date = end_date self.fiscal_year = fiscal_year self.document_type = document_type self.sources = sources self.rerank_threshold = rerank_threshold - self.focus = focus + self.focus = focus or "" def screen_companies( self, From 591e1cb5cb046acba8128ee4c32cb8e9d7f8503b Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 8 Oct 2025 17:12:58 +0200 Subject: [PATCH 23/82] Use overload to fix typing issue --- .../search/narrative_search.py | 1 + src/bigdata_research_tools/search/search.py | 29 +++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index 9b87a59..dd2209b 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -107,6 +107,7 @@ def search_narratives( limit=document_limit, scope=scope, sortby=sort_by, + only_results=True, rerank_threshold=rerank_threshold, **kwargs, ) diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index e824d4a..82a31e5 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -13,7 +13,7 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime -from typing import Union +from typing import Literal, Union, overload from bigdata_client import Bigdata from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange @@ -254,6 +254,31 @@ def normalize_date_range( return date_ranges +@overload +def run_search( + queries: list[QueryComponent], + date_ranges: INPUT_DATE_RANGE, + sortby: SortBy = SortBy.RELEVANCE, + scope: DocumentType = DocumentType.ALL, + limit: int = 10, + only_results: Literal[False] = False, + rerank_threshold: float | None = None, + **kwargs, +) -> SEARCH_QUERY_RESULTS_TYPE: + ... + +@overload +def run_search( + queries: list[QueryComponent], + date_ranges: INPUT_DATE_RANGE, + sortby: SortBy = SortBy.RELEVANCE, + scope: DocumentType = DocumentType.ALL, + limit: int = 10, + only_results: Literal[True] = True, + rerank_threshold: float | None = None, + **kwargs, +) -> list[list[Document]]: + ... def run_search( queries: list[QueryComponent], @@ -264,7 +289,7 @@ def run_search( only_results: bool = True, rerank_threshold: float | None = None, **kwargs, -) -> Union[SEARCH_QUERY_RESULTS_TYPE, list[list[Document]]]: +) -> SEARCH_QUERY_RESULTS_TYPE | list[list[Document]]: """ Execute multiple searches concurrently using the Bigdata client, with rate limiting. From 926b74940bc134a4ff8c8a92bc43d1c366645596 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 9 Oct 2025 15:22:45 +0200 Subject: [PATCH 24/82] Remove type checker errors --- .../search/query_builder.py | 22 +++++++++---------- .../search/screener_search.py | 1 + src/bigdata_research_tools/search/search.py | 9 ++++---- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index d25f64e..2af93c0 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -45,7 +45,7 @@ def get_entity_type_map() -> dict[str, Type]: } -def build_similarity_queries(sentences: list[str]) -> list[Similarity]: +def build_similarity_queries(sentences: list[str]) -> list[QueryComponent]: """ Processes a list of sentences to create a list of Similarity query objects, ensuring no duplicates. @@ -178,10 +178,10 @@ def _build_base_queries( sentences: list[str] | None, keywords: list[str] | None, sources: list[str] | None, -) -> tuple[list[QueryComponent], QueryComponent | None, QueryComponent | None]: +) -> tuple[list[QueryComponent] | None, QueryComponent | None, QueryComponent | None]: """Build the base queries from sentences, keywords, and sources.""" # Create similarity queries from sentences - queries = build_similarity_queries(sentences) if sentences else [] + queries = build_similarity_queries(sentences) if sentences else None # Create keyword query keyword_query = Any([Keyword(word) for word in keywords]) if keywords else None @@ -195,7 +195,7 @@ def _build_base_queries( def _get_entity_ids( entity_names: list[str], entity_type: Type, -) -> list[Type]: +) -> list[QueryComponent]: bigdata = bigdata_connection() entity_ids = [] @@ -278,7 +278,7 @@ def _build_entity_batch_queries( custom_batches: list[EntitiesToSearch] | None, batch_size: int, scope: DocumentType, -) -> list[QueryComponent] | list[None]: +) -> list[QueryComponent] | None: """Build entity batch queries from either custom batches or auto-batched entities.""" # Prioritize custom batches if provided, else auto-batch entities @@ -287,11 +287,11 @@ def _build_entity_batch_queries( elif entities: return _auto_batch_entities(entities, batch_size, scope) else: - # If no entities specified, return a single None to ensure at least one iteration - return [None] + # If no entities specified, return None + return None -def _get_entity_type(scope: DocumentType) -> type: +def _get_entity_type(scope: DocumentType) -> Type[Entity | ReportingEntity]: """Determine the entity type based on document scope.""" return ( ReportingEntity @@ -302,13 +302,13 @@ def _get_entity_type(scope: DocumentType) -> type: def _build_custom_batch_queries( custom_batches: list[EntitiesToSearch], scope: DocumentType -) -> list[QueryComponent] | list[None]: +) -> list[QueryComponent] | None: """Build entity queries from a list of EntitiesToSearch objects.""" entity_type_map = EntitiesToSearch.get_entity_type_map() def get_entity_ids_for_attr( entity_config: EntitiesToSearch, attr_name: str, entity_class - ) -> list[int]: + ) -> list[QueryComponent]: """Get entity IDs for a specific attribute.""" entity_names = getattr(entity_config, attr_name, None) if not entity_names: @@ -377,7 +377,7 @@ def _auto_batch_entities( def _expand_queries( base_queries_tuple: tuple[ - list[QueryComponent], QueryComponent | None, QueryComponent | None + list[QueryComponent] | None, QueryComponent | None, QueryComponent | None ], entity_batch_queries: list[QueryComponent] | None = None, control_query: QueryComponent | None = None, diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index abb28c2..11c72f2 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -150,6 +150,7 @@ def search_by_companies( limit=document_limit, scope=scope, sortby=sort_by, + only_results=True, rerank_threshold=rerank_threshold, **kwargs, ) diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 82a31e5..3177be4 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -254,6 +254,7 @@ def normalize_date_range( return date_ranges + @overload def run_search( queries: list[QueryComponent], @@ -264,8 +265,8 @@ def run_search( only_results: Literal[False] = False, rerank_threshold: float | None = None, **kwargs, -) -> SEARCH_QUERY_RESULTS_TYPE: - ... +) -> SEARCH_QUERY_RESULTS_TYPE: ... + @overload def run_search( @@ -277,8 +278,8 @@ def run_search( only_results: Literal[True] = True, rerank_threshold: float | None = None, **kwargs, -) -> list[list[Document]]: - ... +) -> list[list[Document]]: ... + def run_search( queries: list[QueryComponent], From 36a99b1dccc5a42eafcd15d578d8f1fb535f2f6a Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 9 Oct 2025 15:25:41 +0200 Subject: [PATCH 25/82] Ignore all output excel files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index b77a58d..11e31a5 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,6 @@ __pycache__/ dist/ build/ .ipynb_checkpoints + +*.xlsx +*.xls \ No newline at end of file From fe28efcf1aca914e9a49600b2fb75615deec3fee Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 9 Oct 2025 15:26:03 +0200 Subject: [PATCH 26/82] Finish fixing type errors and enable again the ty step on the CI pipeline --- .github/workflows/test.yml | 7 +++---- Makefile | 2 +- src/bigdata_research_tools/search/query_builder.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 555195b..9ddea80 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,9 +18,8 @@ jobs: uses: astral-sh/setup-uv@v6 - name: Install dependencies run: uv sync --locked --dev -# Disable type checking for now, the project is not mature enough to pass all checks -# - name: Type check the code -# run: make type-check + - name: Type check the code + run: make type-check - name: Lint the code run: make lint-check - name: Format the code @@ -44,6 +43,6 @@ jobs: enable-cache: true - name: Install dependencies run: uv sync --locked --dev -# Disable type checking for now, the project is not mature enough to pass all checks +# Disable type checking for now, v1 is not mature enough to pass all tests # - name: Test with pytest # run: make tests \ No newline at end of file diff --git a/Makefile b/Makefile index ecf555f..13a5cfb 100644 --- a/Makefile +++ b/Makefile @@ -13,4 +13,4 @@ format: @uvx ruff format src/bigdata_research_tools/ examples/ tutorial/ tests/ type-check: - @uvx ty check src/bigdata_research_tools/ # examples/ tutorial/ tests/ \ No newline at end of file + @uvx ty check src/bigdata_research_tools/ examples/ tests/ # tutorial/ # Ignore tutorials, the issues come from this open issuehttps://github.com/astral-sh/ty/issues/1297 \ No newline at end of file diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 2af93c0..6e1ac43 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -332,7 +332,7 @@ def get_entity_ids_for_attr( if all_entities: batch_queries.append(Any(all_entities)) - return batch_queries if batch_queries else [None] + return batch_queries if batch_queries else None def _auto_batch_entities( From d9ddeb6d1a86aa3afe65ae25c5d4f3009a08ff8a Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 9 Oct 2025 15:27:43 +0200 Subject: [PATCH 27/82] Remove openai only exception control on generic module --- src/bigdata_research_tools/llm/utils.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 33f0cbd..dc79f02 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -4,7 +4,6 @@ from logging import Logger, getLogger from typing import Any, Coroutine -from openai import APITimeoutError, RateLimitError from tqdm import tqdm from bigdata_research_tools.llm.base import AsyncLLMEngine @@ -74,15 +73,19 @@ async def _fetch_with_semaphore( async with semaphore: retry_delay = 1 # Initial delay in seconds max_retries = 5 + last_exception = None for attempt in range(max_retries): try: response = await llm_engine.get_response(chat_history, **kwargs) return idx, response - except (APITimeoutError, RateLimitError): + except Exception as e: + last_exception = e await asyncio.sleep(retry_delay) # Exponential backoff retry_delay = min(retry_delay * 2, 60) - logger.error(f"Failed to get response for prompt: {prompt}") + logger.error( + f"Failed to get response for prompt: {prompt} Error: {last_exception}" + ) return idx, "" @@ -131,14 +134,18 @@ def fetch(idx, prompt): ] retry_delay = 1 max_retries = 5 + last_exception = None for attempt in range(max_retries): try: response = llm_engine.get_response(chat_history, **kwargs) return idx, response - except (APITimeoutError, RateLimitError): + except Exception as e: + last_exception = e time.sleep(retry_delay) retry_delay = min(retry_delay * 2, 60) - logger.error(f"Failed to get response for prompt: {prompt}") + logger.error( + f"Failed to get response for prompt: {prompt} Error: {last_exception}" + ) return idx, "" results = [""] * len(prompts) From 755e3f636efaf40a0986934623ee4c7c879ca6cf Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 14 Oct 2025 10:13:47 +0200 Subject: [PATCH 28/82] Fix type errors --- src/bigdata_research_tools/llm/bedrock.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index 1825b89..f71c82e 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -61,6 +61,7 @@ def _get_bedrock_input( system.append({"text": message["content"]}) if ( "response_format" in kwargs + and isinstance(kwargs["response_format"], dict) and kwargs["response_format"].get("type") == "json" ): formatted_history.append({"role": "assistant", "content": [{"text": "{"}]}) @@ -219,6 +220,7 @@ def _get_bedrock_input( system.append({"text": message["content"]}) if ( "response_format" in kwargs + and isinstance(kwargs["response_format"], dict) and kwargs["response_format"].get("type") == "json" ): formatted_history.append({"role": "assistant", "content": [{"text": "{"}]}) From 494f51bf39ac32702497a4dd236b8c643df3da90 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 14 Oct 2025 10:14:37 +0200 Subject: [PATCH 29/82] Make examples output go to gitignored folder --- .gitignore | 1 + examples/narrative_miner.py | 7 ++++++- examples/risk_analyzer.py | 6 ++++++ examples/run_search.py | 11 ++++++++--- examples/search_by_companies.py | 11 ++++++++--- examples/thematic_screener.py | 7 ++++++- 6 files changed, 35 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 11e31a5..7436885 100644 --- a/.gitignore +++ b/.gitignore @@ -17,5 +17,6 @@ dist/ build/ .ipynb_checkpoints +outputs/* *.xlsx *.xls \ No newline at end of file diff --git a/examples/narrative_miner.py b/examples/narrative_miner.py index 9a676dc..f3c4a4f 100644 --- a/examples/narrative_miner.py +++ b/examples/narrative_miner.py @@ -1,3 +1,5 @@ +from pathlib import Path + from bigdata_client.models.search import DocumentType from bigdata_research_tools.utils.observer import OberserverNotification, Observer @@ -53,4 +55,7 @@ def update(self, message: OberserverNotification): logging.basicConfig() logging.getLogger("bigdata_research_tools").setLevel(logging.INFO) - narrative_miner_example() + output_path = Path("outputs/narrative_miner_sample.xlsx") + output_path.parent.mkdir(parents=True, exist_ok=True) + + narrative_miner_example(export_path=str(output_path)) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index 2af562a..26dad14 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -1,3 +1,5 @@ +from pathlib import Path + from bigdata_client.models.search import DocumentType from bigdata_research_tools.client import bigdata_connection @@ -54,9 +56,13 @@ def update(self, message: OberserverNotification): logging.basicConfig() logging.getLogger("bigdata_research_tools").setLevel(logging.INFO) + output_path = Path("outputs/risk_analyzer_results.xlsx") + output_path.parent.mkdir(parents=True, exist_ok=True) + x = risk_analyzer_example( "US Import Tariffs against Canada and Mexico", focus="Provide a detailed taxonomy of risks describing how new American import tariffs against Canada and Mexico will impact US companies, their operations and strategy. Cover trade-relations risks, foreign market access risks, supply chain risks, US market sales and revenue risks (including price impacts), and intellectual property risks, provide at least 4 sub-scenarios for each risk factor.", + export_path=str(output_path), ) # custom_config = { # 'company_column': 'Company', diff --git a/examples/run_search.py b/examples/run_search.py index d142e6f..fe60a5a 100644 --- a/examples/run_search.py +++ b/examples/run_search.py @@ -11,6 +11,7 @@ """ import logging +from pathlib import Path import pandas as pd from bigdata_client.models.search import DocumentType @@ -28,7 +29,9 @@ logger = logging.getLogger(__name__) -def main(): +def main( + output_file: str = "run_search_results.xlsx", +): """Basic example of run_search usage.""" # Load environment variables @@ -115,7 +118,6 @@ def main(): logger.info(f" - {headline}") # Export to Excel - output_file = "run_search_results.xlsx" results_df.to_excel(output_file, index=False) logger.info(f"Results exported to {output_file}") @@ -124,4 +126,7 @@ def main(): if __name__ == "__main__": - main() + output_path = Path("outputs/run_search_results.xlsx") + output_path.parent.mkdir(parents=True, exist_ok=True) + + main(output_file=str(output_path)) diff --git a/examples/search_by_companies.py b/examples/search_by_companies.py index 9a0e960..7b503d8 100644 --- a/examples/search_by_companies.py +++ b/examples/search_by_companies.py @@ -11,6 +11,7 @@ """ import logging +from pathlib import Path from bigdata_client.models.search import DocumentType from dotenv import load_dotenv @@ -23,7 +24,9 @@ logger = logging.getLogger(__name__) -def main(): +def main( + output_file: str = "search_by_companies_results.xlsx", +): """Basic example of search_by_companies usage.""" # Load environment variables @@ -89,7 +92,6 @@ def main(): logger.info(f" - {headline}") # Export to Excel (fix timezone issues) - output_file = "search_by_companies_results.xlsx" # Create a copy for Excel export with timezone-naive timestamps excel_df = results_df.copy() @@ -108,4 +110,7 @@ def main(): if __name__ == "__main__": - main() + output_path = Path("outputs/run_search_results.xlsx") + output_path.parent.mkdir(parents=True, exist_ok=True) + + main(output_file=str(output_path)) diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 9ac06d0..6d87248 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -1,3 +1,5 @@ +from pathlib import Path + from bigdata_client.models.search import DocumentType from bigdata_research_tools.client import bigdata_connection @@ -50,7 +52,10 @@ def update(self, message: OberserverNotification): logging.basicConfig() logging.getLogger("bigdata_research_tools").setLevel(logging.INFO) - x = thematic_screener_example("Chip Manufacturers") + output_path = Path("outputs/thematic_screener_results.xlsx") + output_path.parent.mkdir(parents=True, exist_ok=True) + + x = thematic_screener_example("Chip Manufacturers", export_path=str(output_path)) custom_config = { "company_column": "Company", "heatmap_colorscale": "Plasma", From 28861cd021cb96aaee6f2c772246417c7c093064 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 14 Oct 2025 10:21:43 +0200 Subject: [PATCH 30/82] Implement custom motivations prompts for different use cases --- CHANGELOG.md | 1 + .../portfolio/motivation.py | 18 +++++-- .../prompts/motivation.py | 53 ++++++++++++++++++- .../workflows/risk_analyzer.py | 2 + .../workflows/thematic_screener.py | 6 ++- 5 files changed, 74 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e59deb0..26aaa70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Preparation for a first stable release. - Renamed `settings` to `utils` to better reflect its purpose as a utility module and free the name for future use. - Renamed all `freq` parameters to `frequency` for better clarity and consistency accross workflows. +- Implement custom motivation prompts for different use cases, including thematic screening and risk analysis. ### Fixed - Changed build system to use `uv_build` instead of `setuptools` to avoid issues with package data inclusion. diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index 1be5e69..e983cfe 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -5,7 +5,10 @@ from tqdm import tqdm from bigdata_research_tools.llm.base import LLMEngine -from bigdata_research_tools.prompts.motivation import get_motivation_prompt +from bigdata_research_tools.prompts.motivation import ( + MotivationType, + get_motivation_prompt, +) class Motivation: @@ -122,7 +125,11 @@ def query_llm_for_motivation(self, prompt: str) -> str: return motivation.strip() def generate_company_motivations( - self, df: pd.DataFrame, theme_name: str, word_range: tuple[int, int] + self, + df: pd.DataFrame, + theme_name: str, + word_range: tuple[int, int], + use_case: MotivationType = MotivationType.THEMATIC_SCREENER, ) -> pd.DataFrame: """ Generates motivation statement with specified verbosity for companies in a thematic watchlist. @@ -148,7 +155,12 @@ def generate_company_motivations( ): # Create prompt for this word range prompt = get_motivation_prompt( - company, data, theme_name, word_range[0], word_range[1] + company, + data, + theme_name, + word_range[0], + word_range[1], + use_case=use_case, ) # Generate motivation with this word range diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index 9792c8c..9680506 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -1,6 +1,13 @@ +from enum import Enum + import pandas as pd +class MotivationType(str, Enum): + THEMATIC_SCREENER = "thematic_screener" + RISK_ANALYZER = "risk_analyzer" + + def generate_prompt_template() -> str: """ Returns the base prompt template with placeholders for formatting. @@ -30,8 +37,44 @@ def generate_prompt_template() -> str: """ +def generate_prompt_template_risk() -> str: + """ + Returns the base prompt template with placeholders for formatting. + """ + return """ + You are an expert financial analyst with specialized knowledge in corporate risk assessment. + Your task is to generate a concise risk statement explaining the key risks this company is exposed to based on the provided data. + + Inputs: + Theme: {theme} + Company: {company} + + This company has {total_quotes} quotes related to the theme, indicating exposure to the following risk categories: + {label_summary} + + Here are the quotes with their corresponding labels: + {quotes_and_labels} + + Your task: + Generate a concise risk statement (2-4 sentences) that: + 1. ALWAYS begins with the company name + 2. Summarizes the key risks the company faces within the specified theme + 3. References the specific risk categories where exposure is most significant + 4. For any numerical figures, make sure to quote the exact metric correctly + 5. Uses objective, evidence-based language referring to the company's actual activities + 6. Maintains a neutral, analytical tone without subjective judgments + 7. Focuses on facts rather than predictions or recommendations + 8. Keeps the statement concise ({min_words}-{max_words} words) + """ + + def get_motivation_prompt( - company: str, data: pd.DataFrame, theme_name: str, min_words: int, max_words: int + company: str, + data: pd.DataFrame, + theme_name: str, + min_words: int, + max_words: int, + use_case: MotivationType = MotivationType.THEMATIC_SCREENER, ) -> str: """ Formats the motivation prompt using company data and the prompt template. @@ -54,7 +97,13 @@ def get_motivation_prompt( for i, item in enumerate(data["quotes_and_labels"]): quotes_text += f'{i + 1}. "{item["quote"]}" [Label: {item["label"]}]\n' - prompt_template = generate_prompt_template() + match use_case: + case MotivationType.RISK_ANALYZER: + prompt_template = generate_prompt_template_risk() + case MotivationType.THEMATIC_SCREENER: + prompt_template = generate_prompt_template() + case _: + raise ValueError(f"Unsupported use_case: {use_case}") return prompt_template.format( theme=theme_name, diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index fd43005..73d1d15 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -8,6 +8,7 @@ from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category from bigdata_research_tools.portfolio.motivation import Motivation +from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace from bigdata_research_tools.tree import SemanticTree, generate_risk_tree @@ -243,6 +244,7 @@ def generate_results( df=df_labeled.rename(columns={"Sub-Scenario": "Theme"}), theme_name=self.main_theme, word_range=word_range, + use_case=MotivationType.RISK_ANALYZER, ) return df_company, df_industry, motivation_df diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 37f8916..74e7f61 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -8,6 +8,7 @@ from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler from bigdata_research_tools.portfolio.motivation import Motivation +from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies from bigdata_research_tools.tracing import Trace, TraceEventNames, send_trace from bigdata_research_tools.tree import generate_theme_tree @@ -218,7 +219,10 @@ def screen_companies( ) motivation_generator = Motivation(model=self.llm_model) motivation_df = motivation_generator.generate_company_motivations( - df=df, theme_name=self.main_theme, word_range=word_range + df=df, + theme_name=self.main_theme, + word_range=word_range, + use_case=MotivationType.THEMATIC_SCREENER, ) self.notify_observers("Motivations generated") From 1ca24da8e189def1a9f0fade2ba8de0f1abca4aa Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 14 Oct 2025 12:23:04 +0200 Subject: [PATCH 31/82] Removed ipython dependency and unused code --- CHANGELOG.md | 1 + pyproject.toml | 1 - src/bigdata_research_tools/workflows/utils.py | 28 --- uv.lock | 160 +----------------- 4 files changed, 2 insertions(+), 188 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26aaa70..0f4e1d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ Preparation for a first stable release. ### Removed - Removed support for Python 3.9 as it has reached its end of life. The minimum supported version is now Python 3.10. +- Removed `ipython` from main dependencies and removed unused function `bigdata_research_tools.workflows.utils.display_output_chunks_dataframe`. ## [0.20.1] - 2025-09-16 diff --git a/pyproject.toml b/pyproject.toml index 6afd273..6af73d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ dependencies = [ "pillow>=11.1.0,<12.0.0", "graphviz>=0.20.3,<0.21.0", "tqdm>=4.67.1", - "ipython>=8.0.0,<9.0.0", "json-repair>=0.44.1", "tabulate>=0.9.0,<1.0.0", "plotly>=6.0.0,<7.0.0", diff --git a/src/bigdata_research_tools/workflows/utils.py b/src/bigdata_research_tools/workflows/utils.py index fbb1c81..3ad9ab6 100644 --- a/src/bigdata_research_tools/workflows/utils.py +++ b/src/bigdata_research_tools/workflows/utils.py @@ -2,37 +2,9 @@ Script with any common helper functions used across the workflows. """ -from IPython.display import HTML, display from pandas import DataFrame -def display_output_chunks_dataframe(final_df): - """ - Display selected document chunks in a formatted HTML view for better readability. - - Args: - final_df: DataFrame containing semantic labels with document chunks - """ - output_lines = [] - - for row, element in final_df.iterrows(): - # Add lines to the output list with the company in bold - output_lines.append(f"Company: {element.Company}
") - output_lines.append(f"Sector: {element.Sector}
") - output_lines.append(f"Industry: {element.Industry}
") - output_lines.append(f"Date: {element.Date}
") - output_lines.append(f"Headline: {element.Headline}
") - output_lines.append( - f"Sentence Identifier: {element['Document ID']}
" - ) - output_lines.append(f"Quote: {element.Quote}
") - output_lines.append(f"Sub-Theme Label: {element.Theme}
") - output_lines.append("--------------------
") - - # Join all lines into a single string and display it - display(HTML("".join(output_lines))) - - def get_scored_df( df: DataFrame, index_columns: list[str], pivot_column: str ) -> DataFrame: diff --git a/uv.lock b/uv.lock index 605fffd..07927f9 100644 --- a/uv.lock +++ b/uv.lock @@ -139,15 +139,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/12/e5e0282d673bb9746bacfb6e2dba8719989d3660cdb2ea79aee9a9651afb/anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1", size = 107213, upload-time = "2025-08-04T08:54:24.882Z" }, ] -[[package]] -name = "asttokens" -version = "3.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978, upload-time = "2024-11-30T04:30:14.439Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, -] - [[package]] name = "async-timeout" version = "5.0.1" @@ -230,7 +221,6 @@ source = { editable = "." } dependencies = [ { name = "bigdata-client" }, { name = "graphviz" }, - { name = "ipython" }, { name = "json-repair" }, { name = "openpyxl" }, { name = "pandas" }, @@ -266,7 +256,6 @@ requires-dist = [ { name = "bigdata-client", specifier = ">=2.15.0" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.24.0,<2.0.0" }, { name = "graphviz", specifier = ">=0.20.3,<0.21.0" }, - { name = "ipython", specifier = ">=8.0.0,<9.0.0" }, { name = "json-repair", specifier = ">=0.44.1" }, { name = "openai", marker = "extra == 'azure'", specifier = ">=1.61.1,<2.0.0" }, { name = "openai", marker = "extra == 'openai'", specifier = ">=1.61.1,<2.0.0" }, @@ -614,15 +603,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/87/7ce86f3fa14bc11a5a48c30d8103c26e09b6465f8d8e9d74cf7a0714f043/cryptography-45.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:1f3d56f73595376f4244646dd5c5870c14c196949807be39e79e7bd9bac3da63", size = 3332908, upload-time = "2025-09-01T11:14:58.78Z" }, ] -[[package]] -name = "decorator" -version = "5.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, -] - [[package]] name = "distlib" version = "0.4.0" @@ -655,22 +635,13 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" }, ] -[[package]] -name = "executing" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cc/28/c14e053b6762b1044f34a13aab6859bbf40456d37d23aa286ac24cfd9a5d/executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4", size = 1129488, upload-time = "2025-09-01T09:48:10.866Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, -] - [[package]] name = "filelock" version = "3.19.1" @@ -847,40 +818,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] -[[package]] -name = "ipython" -version = "8.37.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "decorator" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, - { name = "jedi" }, - { name = "matplotlib-inline" }, - { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit" }, - { name = "pygments" }, - { name = "stack-data" }, - { name = "traitlets" }, - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/31/10ac88f3357fc276dc8a64e8880c82e80e7459326ae1d0a211b40abf6665/ipython-8.37.0.tar.gz", hash = "sha256:ca815841e1a41a1e6b73a0b08f3038af9b2252564d01fc405356d34033012216", size = 5606088, upload-time = "2025-05-31T16:39:09.613Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/d0/274fbf7b0b12643cbbc001ce13e6a5b1607ac4929d1b11c72460152c9fc3/ipython-8.37.0-py3-none-any.whl", hash = "sha256:ed87326596b878932dbcb171e3e698845434d8c61b8d8cd474bf663041a9dcf2", size = 831864, upload-time = "2025-05-31T16:39:06.38Z" }, -] - -[[package]] -name = "jedi" -version = "0.19.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "parso" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, -] - [[package]] name = "jiter" version = "0.10.0" @@ -971,18 +908,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/c2/93368d4c9355e8ad1f6d62b804de241939d0796b2a3a73737f665b802808/json_repair-0.50.0-py3-none-any.whl", hash = "sha256:b15da2c42deb43419b182d97dcfde6cd86d0b18ccd18ed1a887104ce85e7a364", size = 25985, upload-time = "2025-08-20T15:01:56.567Z" }, ] -[[package]] -name = "matplotlib-inline" -version = "0.1.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159, upload-time = "2024-04-15T13:44:44.803Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" }, -] - [[package]] name = "msal" version = "1.33.0" @@ -1377,27 +1302,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cd/d7/612123674d7b17cf345aad0a10289b2a384bff404e0463a83c4a3a59d205/pandas-2.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d2c3554bd31b731cd6490d94a28f3abb8dd770634a9e06eb6d2911b9827db370", size = 13186141, upload-time = "2025-08-21T10:28:05.377Z" }, ] -[[package]] -name = "parso" -version = "0.8.5" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/de/53e0bcf53d13e005bd8c92e7855142494f41171b34c2536b86187474184d/parso-0.8.5.tar.gz", hash = "sha256:034d7354a9a018bdce352f48b2a8a450f05e9d6ee85db84764e9b6bd96dafe5a", size = 401205, upload-time = "2025-08-23T15:15:28.028Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl", hash = "sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887", size = 106668, upload-time = "2025-08-23T15:15:25.663Z" }, -] - -[[package]] -name = "pexpect" -version = "4.9.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ptyprocess" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, -] - [[package]] name = "pillow" version = "11.3.0" @@ -1547,18 +1451,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965, upload-time = "2025-08-09T18:56:13.192Z" }, ] -[[package]] -name = "prompt-toolkit" -version = "3.0.52" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wcwidth" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, -] - [[package]] name = "propcache" version = "0.3.2" @@ -1648,24 +1540,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/35/cc0aaecf278bb4575b8555f2b137de5ab821595ddae9da9d3cd1da4072c7/propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f", size = 12663, upload-time = "2025-06-09T22:56:04.484Z" }, ] -[[package]] -name = "ptyprocess" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, -] - -[[package]] -name = "pure-eval" -version = "0.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752, upload-time = "2024-07-21T12:58:21.801Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, -] - [[package]] name = "pycparser" version = "2.22" @@ -1978,20 +1852,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] -[[package]] -name = "stack-data" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "asttokens" }, - { name = "executing" }, - { name = "pure-eval" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707, upload-time = "2023-09-30T13:58:05.479Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, -] - [[package]] name = "tabulate" version = "0.9.0" @@ -2052,15 +1912,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] -[[package]] -name = "traitlets" -version = "5.14.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0" @@ -2115,15 +1966,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" }, ] -[[package]] -name = "wcwidth" -version = "0.2.13" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" }, -] - [[package]] name = "websockets" version = "15.0.1" From 02b9cf7fcf8f8882000279a48c0ddd00581ffc74 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 16 Oct 2025 09:25:34 +0200 Subject: [PATCH 32/82] Change year start string representation after deprecation in pandas --- src/bigdata_research_tools/search/query_builder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 6e1ac43..01b69a8 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -470,7 +470,7 @@ def create_date_intervals( Operation: 1. Converts the `start_date` and `end_date` strings to `pd.Timestamp` objects. 2. Adjusts the frequency for yearly ('Y') and monthly ('M') intervals to align with period starts: - - 'Y' → 'AS' (Year Start). + - 'Y' → 'YS' (Year Start). - 'M' → 'MS' (Month Start). 3. Uses `pd.date_range` to generate a range of dates based on the frequency. 4. Creates tuples representing start and end times for each interval: @@ -495,8 +495,8 @@ def create_date_intervals( raise ValueError("start_date must be earlier than or equal to end_date.") # Adjust frequency for yearly and monthly to use appropriate start markers - # 'AS' for year start, 'MS' for month start - adjusted_freq = frequency.replace("Y", "AS").replace("M", "MS") + # 'YS' for year start, 'MS' for month start + adjusted_freq = frequency.replace("Y", "YS").replace("M", "MS") # Generate date range based on the adjusted frequency try: From 74ccf0c875fe12866f00591029e640b701b48970 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 22 Oct 2025 16:54:48 +0200 Subject: [PATCH 33/82] Fix type errors after merge with master and linting and formatting new code --- .../search/query_builder.py | 20 +- .../search/screener_search.py | 14 +- src/bigdata_research_tools/search/search.py | 63 ++-- src/bigdata_research_tools/tracing.py | 12 +- .../workflows/narrative_miner.py | 24 +- .../workflows/risk_analyzer.py | 26 +- .../workflows/thematic_screener.py | 26 +- tests/test_search/__init__.py | 1 - tests/test_search/test_query_builder.py | 293 +++++++++--------- 9 files changed, 270 insertions(+), 209 deletions(-) diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 2cdcc08..cf58ffd 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -508,22 +508,26 @@ def create_date_intervals( # Create intervals intervals = [] - + # If no dates were generated (range shorter than frequency), return single interval if len(date_range) == 0: - return [( - start_date.replace(hour=0, minute=0, second=0), - end_date.replace(hour=23, minute=59, second=59) - )] + return [ + ( + start_date_pd.replace(hour=0, minute=0, second=0), + end_date_pd.replace(hour=23, minute=59, second=59), + ) + ] # Check if we need a partial first interval (if first generated date is after start_date) - if date_range[0].replace(hour=0, minute=0, second=0) > start_date.replace(hour=0, minute=0, second=0): + if date_range[0].replace(hour=0, minute=0, second=0) > start_date_pd.replace( + hour=0, minute=0, second=0 + ): intervals.append( ( - start_date.replace(hour=0, minute=0, second=0), + start_date_pd.replace(hour=0, minute=0, second=0), date_range[0] - pd.Timedelta(seconds=1), ) ) - + for i in range(len(date_range) - 1): intervals.append( ( diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index 6fcaadb..bdf0be9 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -19,12 +19,17 @@ ) from bigdata_research_tools.search.search import run_search from bigdata_research_tools.search.search_utils import filter_search_results -from bigdata_research_tools.tracing import WorkflowTraceEvent, WorkflowStatus, send_trace +from bigdata_research_tools.tracing import ( + WorkflowStatus, + WorkflowTraceEvent, + send_trace, +) logger: Logger = getLogger(__name__) SEARCH_BY_COMPANIES_NAME: str = "SearchByCompanies" + def search_by_companies( companies: list[Company], sentences: list[str], @@ -40,7 +45,7 @@ def search_by_companies( rerank_threshold: float | None = None, document_limit: int = 50, batch_size: int = 10, - workflow_name: str | None = SEARCH_BY_COMPANIES_NAME, + workflow_name: str = SEARCH_BY_COMPANIES_NAME, **kwargs, ) -> DataFrame: """ @@ -172,13 +177,14 @@ def search_by_companies( finally: if workflow_name == SEARCH_BY_COMPANIES_NAME: send_trace( - bigdata_connection(), WorkflowTraceEvent( + bigdata_connection(), + WorkflowTraceEvent( name=workflow_name, start_date=workflow_start, end_date=datetime.now(), llm_model=None, status=workflow_status, - ) + ), ) return df_sentences diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 3787da5..39b10cb 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -7,7 +7,6 @@ of the Bigdata API. """ -from datetime import datetime import itertools import logging import threading @@ -24,7 +23,12 @@ from tqdm import tqdm from bigdata_research_tools.client import bigdata_connection, init_bigdata_client -from bigdata_research_tools.tracing import ReportSearchUsageTraceEvent, WorkflowTraceEvent, send_trace, WorkflowStatus +from bigdata_research_tools.tracing import ( + ReportSearchUsageTraceEvent, + WorkflowStatus, + WorkflowTraceEvent, + send_trace, +) INPUT_DATE_RANGE = Union[ tuple[datetime, datetime], @@ -258,6 +262,7 @@ def get_quota_consumed(self) -> float: with self._quota_lock: return self.quota_consumed + def normalize_date_range( date_ranges: INPUT_DATE_RANGE, ) -> list[tuple[datetime, datetime] | RollingDateRange]: @@ -266,8 +271,10 @@ def normalize_date_range( return date_ranges + RUN_SEARCH_NAME: str = "RunSearch" + @overload def run_search( queries: list[QueryComponent], @@ -332,10 +339,8 @@ def run_search( workflow_start = datetime.now() workflow_status = WorkflowStatus.UNKNOWN - start_date = date_ranges[0][0] if date_ranges else None - end_date = date_ranges[-1][1] if date_ranges else None manager = None - try: + try: manager = SearchManager(**kwargs) query_results = manager.concurrent_search( queries=queries, @@ -352,24 +357,40 @@ def run_search( workflow_status = WorkflowStatus.FAILED raise finally: - if manager: - send_trace( - bigdata_connection(), ReportSearchUsageTraceEvent( - workflow_name=workflow_name, - document_type=scope.value, - start_date=start_date, - end_date=end_date, - query_units=manager.get_quota_consumed(), + start_date = "Unknown" + end_date = "Unknown" + try: + # We only know the exact start and end date if date_ranges is a list of tuples + # With rolling date ranges we cannot determine the exact dates or if empty list + if date_ranges and all(isinstance(dr, tuple) for dr in date_ranges): + start_date = date_ranges[0][0] + end_date = date_ranges[-1][1] + + if manager: + send_trace( + bigdata_connection(), + ReportSearchUsageTraceEvent( + workflow_name=workflow_name, + document_type=scope.value, + start_date=start_date, + end_date=end_date, + query_units=manager.get_quota_consumed(), + ), ) - ) + except Exception: + # Failed to send trace event, however in a try - finally block we should not raise exceptions + pass if workflow_name == RUN_SEARCH_NAME: - send_trace(bigdata_connection(), WorkflowTraceEvent( - name=workflow_name, - start_date=workflow_start, - end_date=datetime.now(), - llm_model=None, - status=workflow_status, - )) + send_trace( + bigdata_connection(), + WorkflowTraceEvent( + name=workflow_name, + start_date=workflow_start, + end_date=datetime.now(), + llm_model=None, + status=workflow_status, + ), + ) if only_results: return list(query_results.values()) diff --git a/src/bigdata_research_tools/tracing.py b/src/bigdata_research_tools/tracing.py index 8837944..5627ae6 100644 --- a/src/bigdata_research_tools/tracing.py +++ b/src/bigdata_research_tools/tracing.py @@ -22,10 +22,11 @@ class TraceEventNames(Enum): WORKFLOW_EXECUTION = "BigdataResearchToolsWorkflowExecution" QUERY_UNITS_CONSUMPTION = "BigdataResearchToolsQueryUnitsConsumption" + class TraceEventABC(ABC): @abstractmethod - def to_trace_event(self) -> tracking_services.TraceEvent: - ... + def to_trace_event(self) -> tracking_services.TraceEvent: ... + class WorkflowTraceEvent(BaseModel, TraceEventABC): start_date: datetime @@ -50,9 +51,9 @@ def to_trace_event(self) -> tracking_services.TraceEvent: "workflow_duration_seconds": self.duration, "bigdata_research_tools_version": __version__, "bigdata_client_version": version("bigdata-client"), - } + }, ) - + class ReportSearchUsageTraceEvent(BaseModel, TraceEventABC): workflow_name: str @@ -72,8 +73,9 @@ def to_trace_event(self) -> tracking_services.TraceEvent: "query_units": self.query_units, "bigdata_research_tools_version": __version__, "bigdata_client_version": version("bigdata-client"), - } + }, ) + def send_trace(bigdata: Bigdata, trace: TraceEventABC): tracking_services.send_trace(bigdata, trace.to_trace_event()) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index 22f70fa..08b45fd 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -8,7 +8,11 @@ from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.narrative_labeler import NarrativeLabeler from bigdata_research_tools.search import search_narratives -from bigdata_research_tools.tracing import WorkflowTraceEvent, WorkflowStatus, send_trace +from bigdata_research_tools.tracing import ( + WorkflowStatus, + WorkflowTraceEvent, + send_trace, +) from bigdata_research_tools.workflows.base import Workflow logger: Logger = getLogger(__name__) @@ -16,6 +20,7 @@ class NarrativeMiner(Workflow): name: str = "NarrativeMiner" + def __init__( self, narrative_sentences: list[str], @@ -139,12 +144,15 @@ def mine_narratives( workflow_status = WorkflowStatus.FAILED raise finally: - send_trace(bigdata_client, WorkflowTraceEvent( - name=NarrativeMiner.name, - start_date=workflow_start, - end_date=datetime.now(), - llm_model=self.llm_model, - status=workflow_status, - )) + send_trace( + bigdata_client, + WorkflowTraceEvent( + name=NarrativeMiner.name, + start_date=workflow_start, + end_date=datetime.now(), + llm_model=self.llm_model, + status=workflow_status, + ), + ) return {"df_labeled": df_labeled} diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index dd5d858..bab33fe 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -11,7 +11,11 @@ from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies -from bigdata_research_tools.tracing import WorkflowTraceEvent, WorkflowStatus, send_trace +from bigdata_research_tools.tracing import ( + WorkflowStatus, + WorkflowTraceEvent, + send_trace, +) from bigdata_research_tools.tree import SemanticTree, generate_risk_tree from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df @@ -21,6 +25,7 @@ class RiskAnalyzer(Workflow): name: str = "RiskAnalyzer" + def __init__( self, llm_model: str, @@ -394,19 +399,22 @@ def screen_companies( risk_tree, export_path=export_path, ) - self.notify_observers(f"Results exported") + self.notify_observers("Results exported") workflow_status = WorkflowStatus.SUCCESS except BaseException: workflow_status = WorkflowStatus.FAILED raise finally: - send_trace(bigdata_client, WorkflowTraceEvent( - name=RiskAnalyzer.name, - start_date=workflow_start, - end_date=datetime.now(), - llm_model=self.llm_model, - status=workflow_status, - )) + send_trace( + bigdata_client, + WorkflowTraceEvent( + name=RiskAnalyzer.name, + start_date=workflow_start, + end_date=datetime.now(), + llm_model=self.llm_model, + status=workflow_status, + ), + ) return { "df_labeled": df_labeled, "df_company": df_company, diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index b6d9b63..06b5b63 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -11,7 +11,11 @@ from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies -from bigdata_research_tools.tracing import WorkflowTraceEvent, WorkflowStatus, send_trace +from bigdata_research_tools.tracing import ( + WorkflowStatus, + WorkflowTraceEvent, + send_trace, +) from bigdata_research_tools.tree import generate_theme_tree from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df @@ -21,6 +25,7 @@ class ThematicScreener(Workflow): name: str = "ThematicScreener" + def __init__( self, llm_model: str, @@ -232,19 +237,22 @@ def screen_companies( "Motivations": (motivation_df, (0, 0)), }, ) - self.notify_observers(f"Results exported.") + self.notify_observers("Results exported.") workflow_status = WorkflowStatus.SUCCESS except BaseException: workflow_status = WorkflowStatus.FAILED raise finally: - send_trace(bigdata_client, WorkflowTraceEvent( - name=ThematicScreener.name, - start_date=workflow_start, - end_date=datetime.now(), - llm_model=self.llm_model, - status=workflow_status, - )) + send_trace( + bigdata_client, + WorkflowTraceEvent( + name=ThematicScreener.name, + start_date=workflow_start, + end_date=datetime.now(), + llm_model=self.llm_model, + status=workflow_status, + ), + ) return { "df_labeled": df, diff --git a/tests/test_search/__init__.py b/tests/test_search/__init__.py index 60a9144..a7843e7 100644 --- a/tests/test_search/__init__.py +++ b/tests/test_search/__init__.py @@ -1,2 +1 @@ """Tests for search module.""" - diff --git a/tests/test_search/test_query_builder.py b/tests/test_search/test_query_builder.py index f2a7799..f529e72 100644 --- a/tests/test_search/test_query_builder.py +++ b/tests/test_search/test_query_builder.py @@ -1,231 +1,236 @@ """ Tests for query_builder date interval functions. """ -import pytest + import pandas as pd -from bigdata_research_tools.search.query_builder import create_date_intervals, create_date_ranges +import pytest + +from bigdata_research_tools.search.query_builder import ( + create_date_intervals, + create_date_ranges, +) class TestCreateDateIntervals: """Test create_date_intervals function with various scenarios.""" - + def test_weekly_with_partial_first_interval(self): """Test weekly frequency when start date is mid-week (loses initial days without fix).""" start_date = "2025-10-01" # Wednesday - end_date = "2025-10-11" # Saturday (10 days later) + end_date = "2025-10-11" # Saturday (10 days later) freq = "W" # Weekly (defaults to Sunday) - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 2 intervals: # 1. Partial: 2025-10-01 to 2025-10-04 (Wed-Sat, before first Sunday) # 2. Full week: 2025-10-05 to 2025-10-11 (Sun-Sat) assert len(intervals) == 2 - + # Check first partial interval assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-10-04 23:59:59") - + # Check second interval assert intervals[1][0] == pd.Timestamp("2025-10-05 00:00:00") assert intervals[1][1] == pd.Timestamp("2025-10-11 23:59:59") - + def test_monthly_aligned_start(self): """Test monthly frequency when start date aligns with month start.""" start_date = "2025-01-01" end_date = "2025-03-31" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 3 intervals (Jan, Feb, Mar) assert len(intervals) == 3 - + # Check January assert intervals[0][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-01-31 23:59:59") - + # Check February assert intervals[1][0] == pd.Timestamp("2025-02-01 00:00:00") assert intervals[1][1] == pd.Timestamp("2025-02-28 23:59:59") - + # Check March assert intervals[2][0] == pd.Timestamp("2025-03-01 00:00:00") assert intervals[2][1] == pd.Timestamp("2025-03-31 23:59:59") - + def test_monthly_mid_month_start(self): """Test monthly frequency when start date is mid-month.""" start_date = "2025-01-15" end_date = "2025-03-31" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 3 intervals: # 1. Partial: 2025-01-15 to 2025-01-31 # 2. Full: 2025-02-01 to 2025-02-28 # 3. Full: 2025-03-01 to 2025-03-31 assert len(intervals) == 3 - + # Check partial first interval assert intervals[0][0] == pd.Timestamp("2025-01-15 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-01-31 23:59:59") - + def test_quarterly_frequency(self): """Test quarterly frequency (3M).""" start_date = "2025-01-01" end_date = "2025-12-31" freq = "3M" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 4 quarters assert len(intervals) == 4 - + # Q1 assert intervals[0][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-03-31 23:59:59") - + # Q4 assert intervals[3][0] == pd.Timestamp("2025-10-01 00:00:00") assert intervals[3][1] == pd.Timestamp("2025-12-31 23:59:59") - + def test_daily_frequency(self): """Test daily frequency.""" start_date = "2025-10-01" end_date = "2025-10-05" freq = "D" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 5 daily intervals assert len(intervals) == 5 - + # Check first day assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-10-01 23:59:59") - + # Check last day assert intervals[4][0] == pd.Timestamp("2025-10-05 00:00:00") assert intervals[4][1] == pd.Timestamp("2025-10-05 23:59:59") - + def test_yearly_frequency(self): """Test yearly frequency.""" start_date = "2023-01-01" end_date = "2025-12-31" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 3 years assert len(intervals) == 3 - + # Check 2023 assert intervals[0][0] == pd.Timestamp("2023-01-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2023-12-31 23:59:59") - + # Check 2025 assert intervals[2][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[2][1] == pd.Timestamp("2025-12-31 23:59:59") - + def test_single_day_range(self): """Test when start and end date are the same.""" start_date = "2025-10-01" end_date = "2025-10-01" freq = "D" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 1 interval for single day assert len(intervals) == 1 assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-10-01 23:59:59") - + def test_short_range_weekly(self): """Test weekly frequency with range shorter than a week.""" start_date = "2025-10-01" # Wednesday - end_date = "2025-10-03" # Friday + end_date = "2025-10-03" # Friday freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate 1 interval (no Sunday in range) # Interval should cover the entire range assert len(intervals) == 1 assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") assert intervals[0][1] == pd.Timestamp("2025-10-03 23:59:59") - + def test_biweekly_frequency(self): """Test bi-weekly frequency (2W).""" start_date = "2025-10-01" end_date = "2025-10-31" freq = "2W" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should generate multiple intervals assert len(intervals) > 1 - + # First interval should start at start_date assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") - + # Last interval should end at end_date assert intervals[-1][1] == pd.Timestamp("2025-10-31 23:59:59") - + def test_no_gaps_in_intervals(self): """Test that there are no gaps between intervals.""" start_date = "2025-01-15" end_date = "2025-04-30" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Check no gaps between consecutive intervals for i in range(len(intervals) - 1): end_of_current = intervals[i][1] start_of_next = intervals[i + 1][0] - + # Next interval should start 1 second after current ends expected_next_start = end_of_current + pd.Timedelta(seconds=1) assert start_of_next == expected_next_start - + def test_coverage_of_entire_range(self): """Test that intervals cover the entire date range.""" start_date = "2025-10-01" end_date = "2025-10-11" freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + # First interval should start at start_date assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") - + # Last interval should end at end_date assert intervals[-1][1] == pd.Timestamp("2025-10-11 23:59:59") - + def test_invalid_frequency_raises_error(self): """Test that invalid frequency raises ValueError.""" start_date = "2025-10-01" end_date = "2025-10-11" freq = "X" # Invalid frequency - + with pytest.raises(ValueError, match="Invalid frequency"): create_date_intervals(start_date, end_date, freq) class TestCreateDateIntervalsExtreme: """Extreme edge case tests focusing on first and last intervals.""" - + # ==================== DAILY FREQUENCY TESTS ==================== - + def test_daily_single_day(self): """Daily: Single day - first and last should be same.""" start_date = "2025-06-15" end_date = "2025-06-15" freq = "D" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 1 # First interval assert intervals[0][0] == pd.Timestamp("2025-06-15 00:00:00") @@ -233,15 +238,15 @@ def test_daily_single_day(self): # Last interval (same as first) assert intervals[-1][0] == pd.Timestamp("2025-06-15 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-06-15 23:59:59") - + def test_daily_two_days(self): """Daily: Two days - verify first and last.""" start_date = "2025-12-30" end_date = "2025-12-31" freq = "D" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 2 # First interval assert intervals[0][0] == pd.Timestamp("2025-12-30 00:00:00") @@ -249,15 +254,15 @@ def test_daily_two_days(self): # Last interval assert intervals[-1][0] == pd.Timestamp("2025-12-31 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-12-31 23:59:59") - + def test_daily_across_year_boundary(self): """Daily: Across year boundary - verify first and last.""" start_date = "2024-12-30" end_date = "2025-01-02" freq = "D" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 4 # First interval (2024) assert intervals[0][0] == pd.Timestamp("2024-12-30 00:00:00") @@ -265,15 +270,15 @@ def test_daily_across_year_boundary(self): # Last interval (2025) assert intervals[-1][0] == pd.Timestamp("2025-01-02 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-01-02 23:59:59") - + def test_daily_leap_year_feb_29(self): """Daily: Including Feb 29 in leap year.""" start_date = "2024-02-28" end_date = "2024-03-01" freq = "D" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # 28, 29, Mar 1 # First interval assert intervals[0][0] == pd.Timestamp("2024-02-28 00:00:00") @@ -281,17 +286,17 @@ def test_daily_leap_year_feb_29(self): # Last interval assert intervals[-1][0] == pd.Timestamp("2024-03-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2024-03-01 23:59:59") - + # ==================== WEEKLY FREQUENCY TESTS ==================== - + def test_weekly_starts_on_sunday(self): """Weekly: Start on Sunday - no partial first interval.""" start_date = "2025-10-05" # Sunday - end_date = "2025-10-25" # Saturday + end_date = "2025-10-25" # Saturday freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (should start on Sunday, no partial) assert intervals[0][0] == pd.Timestamp("2025-10-05 00:00:00") @@ -299,15 +304,15 @@ def test_weekly_starts_on_sunday(self): # Last interval assert intervals[-1][0] == pd.Timestamp("2025-10-19 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-10-25 23:59:59") - + def test_weekly_starts_on_saturday(self): """Weekly: Start on Saturday - partial first interval (1 day).""" start_date = "2025-10-04" # Saturday - end_date = "2025-10-18" # Saturday + end_date = "2025-10-18" # Saturday freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (partial: just Saturday) assert intervals[0][0] == pd.Timestamp("2025-10-04 00:00:00") @@ -315,15 +320,15 @@ def test_weekly_starts_on_saturday(self): # Last interval assert intervals[-1][0] == pd.Timestamp("2025-10-12 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-10-18 23:59:59") - + def test_weekly_starts_on_monday(self): """Weekly: Start on Monday - partial first interval.""" start_date = "2025-09-29" # Monday - end_date = "2025-10-12" # Sunday + end_date = "2025-10-12" # Sunday freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (partial: Mon-Sat, 6 days) assert intervals[0][0] == pd.Timestamp("2025-09-29 00:00:00") @@ -334,15 +339,15 @@ def test_weekly_starts_on_monday(self): # Last interval (partial: just Sunday, 1 day) assert intervals[-1][0] == pd.Timestamp("2025-10-12 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-10-12 23:59:59") - + def test_weekly_ends_mid_week(self): """Weekly: End on Wednesday - last interval should end on Wednesday.""" start_date = "2025-10-01" # Wednesday - end_date = "2025-10-15" # Wednesday + end_date = "2025-10-15" # Wednesday freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (partial: Wed-Sat) assert intervals[0][0] == pd.Timestamp("2025-10-01 00:00:00") @@ -350,30 +355,30 @@ def test_weekly_ends_mid_week(self): # Last interval (partial: Sun-Wed) assert intervals[-1][0] == pd.Timestamp("2025-10-12 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-10-15 23:59:59") - + def test_weekly_exactly_one_week(self): """Weekly: Exactly one week (Sun-Sat).""" start_date = "2025-10-05" # Sunday - end_date = "2025-10-11" # Saturday + end_date = "2025-10-11" # Saturday freq = "W" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 1 # First and last (same interval) assert intervals[0][0] == pd.Timestamp("2025-10-05 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-10-11 23:59:59") - + # ==================== MONTHLY FREQUENCY TESTS ==================== - + def test_monthly_starts_first_day(self): """Monthly: Start on 1st - no partial first interval.""" start_date = "2025-01-01" end_date = "2025-03-31" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (full month) assert intervals[0][0] == pd.Timestamp("2025-01-01 00:00:00") @@ -381,15 +386,15 @@ def test_monthly_starts_first_day(self): # Last interval assert intervals[-1][0] == pd.Timestamp("2025-03-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-03-31 23:59:59") - + def test_monthly_starts_last_day(self): """Monthly: Start on 31st - partial first interval (1 day).""" start_date = "2025-01-31" end_date = "2025-03-15" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (partial: just Jan 31) assert intervals[0][0] == pd.Timestamp("2025-01-31 00:00:00") @@ -397,15 +402,15 @@ def test_monthly_starts_last_day(self): # Last interval (partial: Mar 1-15) assert intervals[-1][0] == pd.Timestamp("2025-03-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-03-15 23:59:59") - + def test_monthly_starts_mid_month(self): """Monthly: Start on 15th - partial first interval.""" start_date = "2025-05-15" end_date = "2025-08-31" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 4 # First interval (partial: May 15-31) assert intervals[0][0] == pd.Timestamp("2025-05-15 00:00:00") @@ -413,15 +418,15 @@ def test_monthly_starts_mid_month(self): # Last interval (full month) assert intervals[-1][0] == pd.Timestamp("2025-08-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-08-31 23:59:59") - + def test_monthly_ends_mid_month(self): """Monthly: End on 10th - last interval should end on 10th.""" start_date = "2025-01-01" end_date = "2025-03-10" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (full month) assert intervals[0][0] == pd.Timestamp("2025-01-01 00:00:00") @@ -429,54 +434,54 @@ def test_monthly_ends_mid_month(self): # Last interval (partial: Mar 1-10) assert intervals[-1][0] == pd.Timestamp("2025-03-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-03-10 23:59:59") - + def test_monthly_february_non_leap_year(self): """Monthly: February in non-leap year.""" start_date = "2025-01-15" end_date = "2025-03-15" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # Check February (28 days) assert intervals[1][0] == pd.Timestamp("2025-02-01 00:00:00") assert intervals[1][1] == pd.Timestamp("2025-02-28 23:59:59") - + def test_monthly_february_leap_year(self): """Monthly: February in leap year.""" start_date = "2024-01-15" end_date = "2024-03-15" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # Check February (29 days) assert intervals[1][0] == pd.Timestamp("2024-02-01 00:00:00") assert intervals[1][1] == pd.Timestamp("2024-02-29 23:59:59") - + def test_monthly_exactly_one_month(self): """Monthly: Exactly one month.""" start_date = "2025-06-01" end_date = "2025-06-30" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 1 # First and last (same interval) assert intervals[0][0] == pd.Timestamp("2025-06-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-06-30 23:59:59") - + def test_monthly_across_year_boundary(self): """Monthly: Across year boundary.""" start_date = "2024-11-15" end_date = "2025-02-20" freq = "M" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 4 # First interval (partial Nov 2024) assert intervals[0][0] == pd.Timestamp("2024-11-15 00:00:00") @@ -484,17 +489,17 @@ def test_monthly_across_year_boundary(self): # Last interval (partial Feb 2025) assert intervals[-1][0] == pd.Timestamp("2025-02-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-02-20 23:59:59") - + # ==================== YEARLY FREQUENCY TESTS ==================== - + def test_yearly_starts_jan_1(self): """Yearly: Start on Jan 1 - no partial first interval.""" start_date = "2023-01-01" end_date = "2025-12-31" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (full year 2023) assert intervals[0][0] == pd.Timestamp("2023-01-01 00:00:00") @@ -502,15 +507,15 @@ def test_yearly_starts_jan_1(self): # Last interval (full year 2025) assert intervals[-1][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-12-31 23:59:59") - + def test_yearly_starts_mid_year(self): """Yearly: Start on July 1 - partial first interval.""" start_date = "2023-07-01" end_date = "2025-06-30" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (partial: Jul-Dec 2023) assert intervals[0][0] == pd.Timestamp("2023-07-01 00:00:00") @@ -518,15 +523,15 @@ def test_yearly_starts_mid_year(self): # Last interval (partial: Jan-Jun 2025) assert intervals[-1][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-06-30 23:59:59") - + def test_yearly_starts_dec_31(self): """Yearly: Start on Dec 31 - partial first interval (1 day).""" start_date = "2023-12-31" end_date = "2025-01-15" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (partial: just Dec 31, 2023) assert intervals[0][0] == pd.Timestamp("2023-12-31 00:00:00") @@ -534,15 +539,15 @@ def test_yearly_starts_dec_31(self): # Last interval (partial: Jan 1-15, 2025) assert intervals[-1][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-01-15 23:59:59") - + def test_yearly_ends_mid_year(self): """Yearly: End on June 30 - last interval should end on June 30.""" start_date = "2023-01-01" end_date = "2025-06-30" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval (full year) assert intervals[0][0] == pd.Timestamp("2023-01-01 00:00:00") @@ -550,41 +555,41 @@ def test_yearly_ends_mid_year(self): # Last interval (partial: Jan-Jun 2025) assert intervals[-1][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-06-30 23:59:59") - + def test_yearly_exactly_one_year(self): """Yearly: Exactly one year.""" start_date = "2025-01-01" end_date = "2025-12-31" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 1 # First and last (same interval) assert intervals[0][0] == pd.Timestamp("2025-01-01 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-12-31 23:59:59") - + def test_yearly_less_than_one_year(self): """Yearly: Less than one year - should return single interval.""" start_date = "2025-03-15" end_date = "2025-09-20" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + # Should return 1 interval covering entire range assert len(intervals) == 1 assert intervals[0][0] == pd.Timestamp("2025-03-15 00:00:00") assert intervals[-1][1] == pd.Timestamp("2025-09-20 23:59:59") - + def test_yearly_includes_leap_year(self): """Yearly: Including leap year 2024.""" start_date = "2023-06-01" end_date = "2025-06-01" freq = "Y" - + intervals = create_date_intervals(start_date, end_date, freq) - + assert len(intervals) == 3 # First interval assert intervals[0][0] == pd.Timestamp("2023-06-01 00:00:00") @@ -599,32 +604,33 @@ def test_yearly_includes_leap_year(self): class TestCreateDateRanges: """Test create_date_ranges function.""" - + def test_returns_absolute_date_ranges(self): """Test that function returns list of AbsoluteDateRange objects.""" start_date = "2025-01-01" end_date = "2025-03-31" freq = "M" - + date_ranges = create_date_ranges(start_date, end_date, freq) - + # Should return list assert isinstance(date_ranges, list) assert len(date_ranges) == 3 - + # Each element should be an AbsoluteDateRange from bigdata_client.daterange import AbsoluteDateRange + for date_range in date_ranges: assert isinstance(date_range, AbsoluteDateRange) - + def test_weekly_date_ranges(self): """Test weekly date ranges creation.""" start_date = "2025-10-01" end_date = "2025-10-11" freq = "W" - + date_ranges = create_date_ranges(start_date, end_date, freq) - + # Should create 2 date ranges (partial week + full week) assert len(date_ranges) == 2 @@ -632,4 +638,3 @@ def test_weekly_date_ranges(self): if __name__ == "__main__": # Run tests with: python -m pytest tests/test_search/test_query_builder.py -v pytest.main([__file__, "-v"]) - From 7e33ffaf667e41716e22c0f2577ec019ce8677c3 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 23 Oct 2025 11:27:59 +0200 Subject: [PATCH 34/82] Bump version --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a13ce7d..209308f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bigdata-research-tools" -version = "1.0.0-beta-0" +version = "1.0.0-beta-1" description = "Bigdata.com API High-Efficiency Tools at Scale" readme = "README.md" authors = [{ name = "Bigdata.com", email = "support@ravenpack.com" }] diff --git a/uv.lock b/uv.lock index 2be7d3b..0a200bb 100644 --- a/uv.lock +++ b/uv.lock @@ -216,7 +216,7 @@ wheels = [ [[package]] name = "bigdata-research-tools" -version = "1.0.0b0" +version = "1.0.0b1" source = { editable = "." } dependencies = [ { name = "bigdata-client" }, From 2e9324f8c1253a9032779a5bee18fd9f250f7990 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 23 Oct 2025 15:52:01 +0200 Subject: [PATCH 35/82] Fix tracing on v1 beta --- src/bigdata_research_tools/search/search.py | 5 +++++ src/bigdata_research_tools/workflows/risk_analyzer.py | 1 + 2 files changed, 6 insertions(+) diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 39b10cb..b2a9873 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -365,6 +365,11 @@ def run_search( if date_ranges and all(isinstance(dr, tuple) for dr in date_ranges): start_date = date_ranges[0][0] end_date = date_ranges[-1][1] + if isinstance(start_date, datetime): + start_date = start_date.isoformat() + if isinstance(end_date, datetime): + end_date = end_date.isoformat() + if manager: send_trace( diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index bab33fe..19b26e6 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -134,6 +134,7 @@ def retrieve_results( frequency=frequency, document_limit=document_limit, batch_size=batch_size, + workflow_name=RiskAnalyzer.name, ) return df_sentences From 4d1bb23e0dae25f10e0afc537532da5cab2f7359 Mon Sep 17 00:00:00 2001 From: jaldana Date: Fri, 24 Oct 2025 16:54:52 +0200 Subject: [PATCH 36/82] Fix very long delays on a very small number of OpenAI request --- Makefile | 2 +- src/bigdata_research_tools/labeler/labeler.py | 14 +++++++++++-- .../labeler/narrative_labeler.py | 4 +++- .../labeler/risk_labeler.py | 4 +++- .../labeler/screener_labeler.py | 4 +++- src/bigdata_research_tools/llm/utils.py | 20 +++++++++++++++++-- src/bigdata_research_tools/search/search.py | 1 - src/bigdata_research_tools/tree.py | 4 ++-- tests/test_llm/test_utils.py | 2 +- tests/test_utils/test_observer.py | 8 ++++---- 10 files changed, 47 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 13a5cfb..b7baa14 100644 --- a/Makefile +++ b/Makefile @@ -13,4 +13,4 @@ format: @uvx ruff format src/bigdata_research_tools/ examples/ tutorial/ tests/ type-check: - @uvx ty check src/bigdata_research_tools/ examples/ tests/ # tutorial/ # Ignore tutorials, the issues come from this open issuehttps://github.com/astral-sh/ty/issues/1297 \ No newline at end of file + @uvx ty check --python-version 3.13 src/bigdata_research_tools/ examples/ tests/ # tutorial/ # Fix version to 3.13 due to this issue https://github.com/astral-sh/ty/issues/1355 # Ignore tutorials, the issues come from this open issue https://github.com/astral-sh/ty/issues/1297 \ No newline at end of file diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 9e170c2..cadb1c0 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -88,7 +88,11 @@ def _deserialize_label_responses( return df_labels def _run_labeling_prompts( - self, prompts: list[str], system_prompt: str, max_workers: int = 100 + self, + prompts: list[str], + system_prompt: str, + timeout: int | None, + max_workers: int = 100, ) -> list: """ Get the labels from the prompts. @@ -96,6 +100,7 @@ def _run_labeling_prompts( Args: prompts: List of prompts to process system_prompt: System prompt for the LLM + timeout: Timeout for each LLM request for concurrent calls max_workers: Maximum number of concurrent workers Returns: @@ -120,7 +125,12 @@ def _run_labeling_prompts( else: llm = AsyncLLMEngine(model=self.llm_model) return run_concurrent_prompts( - llm, prompts, system_prompt, max_workers, **llm_kwargs + llm, + prompts, + system_prompt, + timeout, + max_workers=max_workers, + **llm_kwargs, ) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 8db6cdc..7b38e91 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -40,6 +40,7 @@ def get_labels( theme_labels: list[str], texts: list[str], max_workers: int = 50, + timeout: int | None = 20, ) -> DataFrame: """ Process thematic labels for texts. @@ -47,6 +48,7 @@ def get_labels( Args: theme_labels: The main theme to analyze. texts: List of texts to label. + timeout: Timeout for each LLM request. max_workers: Maximum number of concurrent workers. Returns: @@ -64,7 +66,7 @@ def get_labels( prompts = get_prompts_for_labeler(texts) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers + prompts, system_prompt, max_workers=max_workers, timeout=timeout ) responses = [parse_labeling_response(response) for response in responses] return self._deserialize_label_responses(responses) diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index ab5b6d6..19fb930 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -47,6 +47,7 @@ def get_labels( labels: list[str], texts: list[str], max_workers: int = 50, + timeout: int | None = 20, textsconfig: list[dict[str, Any]] | None = None, ) -> DataFrame: """ @@ -56,6 +57,7 @@ def get_labels( main_theme: The main theme to analyze. labels: Labels for labelling the chunks. texts: List of chunks to label. + timeout: Timeout for each LLM request. max_workers: Maximum number of concurrent workers. Returns: @@ -74,7 +76,7 @@ def get_labels( prompts = get_prompts_for_labeler(texts, textsconfig) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers + prompts, system_prompt, max_workers=max_workers, timeout=timeout ) responses = [parse_labeling_response(response) for response in responses] diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 7e06b7c..ba86b1c 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -43,6 +43,7 @@ def get_labels( main_theme: str, labels: list[str], texts: list[str], + timeout: int | None = 20, max_workers: int = 50, ) -> DataFrame: """ @@ -52,6 +53,7 @@ def get_labels( main_theme: The main theme to analyze. labels: Labels for labelling the chunks. texts: List of chunks to label. + timeout: Timeout for each LLM request. max_workers: Maximum number of concurrent workers. Returns: @@ -67,7 +69,7 @@ def get_labels( prompts = get_prompts_for_labeler(texts) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers + prompts, system_prompt, max_workers=max_workers, timeout=timeout ) responses = [parse_labeling_response(response) for response in responses] return self._deserialize_label_responses(responses) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index dc79f02..f65dedf 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -16,6 +16,7 @@ def run_concurrent_prompts( llm_engine: AsyncLLMEngine, prompts: list[str], system_prompt: str, + timeout: int | None, max_workers: int = 30, **kwargs, ) -> list[str]: @@ -26,6 +27,7 @@ def run_concurrent_prompts( llm_engine (AsyncLLMEngine): The LLM engine to use. prompts (list[str]): List of prompts to run concurrently. system_prompt (str): The system prompt. + timeout (int | None): Timeout for each LLM request. max_workers (int): The maximum number of workers to run concurrently. kwargs (dict): Additional arguments to pass to the `get_response` method of the LLMEngine. @@ -36,7 +38,7 @@ def run_concurrent_prompts( logger.info(f"Running {len(prompts)} prompts concurrently") tasks = [ _fetch_with_semaphore( - idx, llm_engine, semaphore, system_prompt, prompt, **kwargs + idx, llm_engine, semaphore, system_prompt, prompt, timeout=timeout, **kwargs ) for idx, prompt in enumerate(prompts) ] @@ -49,6 +51,7 @@ async def _fetch_with_semaphore( semaphore: asyncio.Semaphore, system_prompt: str, prompt: str, + timeout: int | None, **kwargs, ) -> tuple[int, str]: """ @@ -61,6 +64,7 @@ async def _fetch_with_semaphore( number of concurrent requests. system_prompt (str): The system prompt. prompt (str): The prompt to run. + timeout (int | None): Timeout for the LLM request. kwargs (dict): Additional arguments to pass to the `get_response` method of the LLMEngine. Returns: @@ -76,9 +80,21 @@ async def _fetch_with_semaphore( last_exception = None for attempt in range(max_retries): try: - response = await llm_engine.get_response(chat_history, **kwargs) + # Sometimes, the LLM (often OpenAI) can take up to ten minutes to respond without throwing an error, + # retrying after a prudential timeout avoids this situation. + # A first analysis show that: + # from 5k requests + # ~20 took longer than 10 seconds + # ~10 took longer than 30 seconds + # ~3 took longer than 60 seconds, with up to 600 seconds + async with asyncio.timeout(timeout): + response = await llm_engine.get_response(chat_history, **kwargs) return idx, response except Exception as e: + if isinstance(e, asyncio.TimeoutError) and attempt == 0: + logger.warning( + f"Timeout occurred for prompt during LLM call, current timeout configured {timeout} seconds. If this keeps happening (> 1% of your requests), consider increasing the timeout. Retrying..." + ) last_exception = e await asyncio.sleep(retry_delay) # Exponential backoff diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index b2a9873..541fa50 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -370,7 +370,6 @@ def run_search( if isinstance(end_date, datetime): end_date = end_date.isoformat() - if manager: send_trace( bigdata_connection(), diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index d912c53..e18ac77 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -185,9 +185,9 @@ def visualize(self, engine: str = "graphviz") -> None: - 'plotly': A Plotly figure object for rendering the mindmap. """ if engine == "graphviz": - return self._visualize_graphviz() + self._visualize_graphviz() elif engine == "plotly": - return self._visualize_plotly() + self._visualize_plotly() else: raise ValueError( f"Unsupported engine '{engine}'. " diff --git a/tests/test_llm/test_utils.py b/tests/test_llm/test_utils.py index f6ae839..80bd8f7 100644 --- a/tests/test_llm/test_utils.py +++ b/tests/test_llm/test_utils.py @@ -11,5 +11,5 @@ def test_run_concurrent_prompts(monkeypatch): engine = DummyAsyncLLMEngine() prompts = ["prompt1", "prompt2"] system_prompt = "system" - responses = run_concurrent_prompts(engine, prompts, system_prompt) + responses = run_concurrent_prompts(engine, prompts, system_prompt, timeout=10) assert responses == ["dummy response", "dummy response"] diff --git a/tests/test_utils/test_observer.py b/tests/test_utils/test_observer.py index 18d660d..a08b004 100644 --- a/tests/test_utils/test_observer.py +++ b/tests/test_utils/test_observer.py @@ -7,7 +7,7 @@ ) -class TestObserver(Observer): +class ExampleObserver(Observer): def __init__(self): self.notifications = [] @@ -25,8 +25,8 @@ def test_observer_notification_model(): def test_observable_register_and_notify(): observable = Observable() - observer1 = TestObserver() - observer2 = TestObserver() + observer1 = ExampleObserver() + observer2 = ExampleObserver() observable.register_observer(observer1) observable.register_observer(observer2) message = "test message" @@ -40,7 +40,7 @@ def test_observable_register_and_notify(): def test_observable_unregister(): observable = Observable() - observer = TestObserver() + observer = ExampleObserver() observable.register_observer(observer) observable.notify_observers("first message") assert len(observer.notifications) == 1 From b3009b14bfcaaea80b090423bc17a9f196c31b0a Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 5 Nov 2025 14:10:08 +0000 Subject: [PATCH 37/82] adding llmconfig to support reasoning models --- examples/narrative_miner.py | 7 +- examples/risk_analyzer.py | 9 ++- examples/thematic_screener.py | 10 +-- src/bigdata_research_tools/labeler/labeler.py | 44 ++++++++--- .../labeler/narrative_labeler.py | 8 +- .../labeler/risk_labeler.py | 9 +-- .../labeler/screener_labeler.py | 10 ++- src/bigdata_research_tools/llm/base.py | 75 +++++++++++++++++++ .../portfolio/motivation.py | 44 ++++++----- src/bigdata_research_tools/tree.py | 55 +++++++++++--- .../workflows/narrative_miner.py | 18 ++++- .../workflows/risk_analyzer.py | 24 ++++-- .../workflows/thematic_screener.py | 21 ++++-- 13 files changed, 259 insertions(+), 75 deletions(-) diff --git a/examples/narrative_miner.py b/examples/narrative_miner.py index f3c4a4f..fdb383a 100644 --- a/examples/narrative_miner.py +++ b/examples/narrative_miner.py @@ -2,11 +2,12 @@ from bigdata_client.models.search import DocumentType +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.workflows import NarrativeMiner -def narrative_miner_example(export_path: str = "narrative_miner_sample.xlsx") -> dict: +def narrative_miner_example(llm_model_config: dict | str | LLMConfig = "openai::gpt-4o-mini", export_path: str = "narrative_miner_sample.xlsx") -> dict: narrative_miner = NarrativeMiner( narrative_sentences=[ "Supervised Learning Techniques", @@ -26,7 +27,7 @@ def narrative_miner_example(export_path: str = "narrative_miner_sample.xlsx") -> "Data Privacy Solutions", ], sources=None, - llm_model="openai::gpt-4o-mini", + llm_model_config=llm_model_config, start_date="2024-11-01", end_date="2024-11-15", rerank_threshold=None, @@ -58,4 +59,4 @@ def update(self, message: OberserverNotification): output_path = Path("outputs/narrative_miner_sample.xlsx") output_path.parent.mkdir(parents=True, exist_ok=True) - narrative_miner_example(export_path=str(output_path)) + narrative_miner_example(export_path=str(output_path), llm_model_config={'model': "openai::gpt-5-mini", 'temperature':0}) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index 26dad14..c65883e 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -3,13 +3,14 @@ from bigdata_client.models.search import DocumentType from bigdata_research_tools.client import bigdata_connection +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.workflows.risk_analyzer import RiskAnalyzer def risk_analyzer_example( risk_scenario: str, - llm_model: str = "openai::gpt-4o-mini", + llm_model_config: dict | LLMConfig | None = None, keywords: list = ["Tariffs"], control_entities: dict = {"place": ["Canada", "Mexico"]}, focus: str = "", @@ -24,7 +25,6 @@ def risk_analyzer_example( companies = bigdata.knowledge_graph.get_entities(watchlist_grid.items) analyzer = RiskAnalyzer( - llm_model=llm_model, main_theme=risk_scenario, companies=companies, start_date="2025-01-01", @@ -33,6 +33,7 @@ def risk_analyzer_example( document_type=DocumentType.NEWS, control_entities=control_entities, focus=focus, # Optional focus to narrow the theme, + llm_model_config=llm_model_config, ) class PrintObserver(Observer): @@ -63,6 +64,10 @@ def update(self, message: OberserverNotification): "US Import Tariffs against Canada and Mexico", focus="Provide a detailed taxonomy of risks describing how new American import tariffs against Canada and Mexico will impact US companies, their operations and strategy. Cover trade-relations risks, foreign market access risks, supply chain risks, US market sales and revenue risks (including price impacts), and intellectual property risks, provide at least 4 sub-scenarios for each risk factor.", export_path=str(output_path), + llm_model_config=LLMConfig( + model="openai::gpt-5-mini", + reasoning_effort="high", + ), ) # custom_config = { # 'company_column': 'Company', diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 6d87248..42c7365 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -1,7 +1,7 @@ from pathlib import Path from bigdata_client.models.search import DocumentType - +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.client import bigdata_connection from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.visuals import create_thematic_exposure_dashboard @@ -10,7 +10,7 @@ def thematic_screener_example( theme_name: str, - llm_model: str = "openai::gpt-4o-mini", + llm_model_config: str | dict | LLMConfig = "openai::gpt-4o-mini", export_path: str = "thematic_screener_results.xlsx", ) -> dict: GRID_watchlist_ID = "a60c351a-1822-4a88-8c45-a4e78abd979a" @@ -22,11 +22,11 @@ def thematic_screener_example( companies = bigdata.knowledge_graph.get_entities(watchlist_grid.items) thematic_screener = ThematicScreener( - llm_model=llm_model, + llm_model_config=llm_model_config, main_theme=theme_name, companies=companies, start_date="2024-01-01", - end_date="2024-11-15", + end_date="2024-02-28", document_type=DocumentType.TRANSCRIPTS, fiscal_year=2024, ) @@ -55,7 +55,7 @@ def update(self, message: OberserverNotification): output_path = Path("outputs/thematic_screener_results.xlsx") output_path.parent.mkdir(parents=True, exist_ok=True) - x = thematic_screener_example("Chip Manufacturers", export_path=str(output_path)) + x = thematic_screener_example("Chip Manufacturers", export_path=str(output_path), llm_model_config="openai::gpt-5-mini") custom_config = { "company_column": "Company", "heatmap_colorscale": "Plasma", diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 9e170c2..0e5e6eb 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -2,12 +2,12 @@ from itertools import zip_longest from json import JSONDecodeError, dumps, loads from logging import Logger, getLogger -from typing import Any +from typing import Any, Optional from json_repair import repair_json from pandas import DataFrame -from bigdata_research_tools.llm.base import AsyncLLMEngine, LLMEngine +from bigdata_research_tools.llm.base import AsyncLLMEngine, LLMEngine, LLMConfig, REASONING_MODELS from bigdata_research_tools.llm.utils import ( run_concurrent_prompts, run_parallel_prompts, @@ -21,10 +21,11 @@ class Labeler: def __init__( self, - llm_model: str, + llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', + #llm_model: str, ##included in the config? # Note that his value is also used in the prompts. unknown_label: str = "unclear", - temperature: float = 0, + ): """Initialize base Labeler. @@ -32,12 +33,35 @@ def __init__( llm_model: Name of the LLM model to use. Expected format: ::, e.g. "openai::gpt-4o-mini" unknown_label: Label for unclear classifications - temperature: Temperature to use in the LLM model. + """ - self.llm_model = llm_model - self.temperature = temperature + if isinstance(llm_model_config, dict): + self.llm_model_config = LLMConfig(**llm_model_config) + elif isinstance(llm_model_config, str): + self.llm_model = llm_model_config + self.llm_model_config = self.get_default_labeler_config(llm_model_config) + else: + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config.model + + print(llm_model_config) + self.unknown_label = unknown_label + def get_default_labeler_config(self, model) -> LLMConfig: + """Get default LLM model configuration for labeling.""" + if any(rm in model for rm in REASONING_MODELS): + return LLMConfig(model=model, reasoning_effort='high', seed=42, response_format={"type": "json_object"}) + else: + return LLMConfig(model=model, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + seed=42, + response_format={"type": "json_object"}, + ) + def _deserialize_label_responses( self, responses: list[dict[str, Any]] ) -> DataFrame: @@ -101,10 +125,6 @@ def _run_labeling_prompts( Returns: List of responses from the LLM """ - llm_kwargs = { - "temperature": self.temperature, - "response_format": {"type": "json_object"}, - } # ADS-140 # Currently, Bedrock does not support async calls. Its implementation uses synchronous calls. @@ -112,6 +132,8 @@ def _run_labeling_prompts( # We execute parallel calls using ThreadPoolExecutor for Bedrock and async calls for other providers. provider, _ = self.llm_model.split("::") + llm_kwargs = self.llm_model_config.get_llm_kwargs(remove_max_tokens=True) + if provider == "bedrock": llm = LLMEngine(model=self.llm_model) return run_parallel_prompts( diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 8db6cdc..2ec1d4c 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -7,7 +7,9 @@ get_prompts_for_labeler, parse_labeling_response, ) +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import get_narrative_system_prompt +from typing import Optional logger: Logger = getLogger(__name__) @@ -17,10 +19,9 @@ class NarrativeLabeler(Labeler): def __init__( self, - llm_model: str, label_prompt: str | None = None, unknown_label: str = "unclear", - temperature: float = 0, + llm_model_config: LLMConfig | dict | str = None, ): """Initialize narrative labeler. @@ -30,9 +31,8 @@ def __init__( label_prompt: Prompt provided by user to label the search result chunks. If not provided, then our default labelling prompt is used. unknown_label: Label for unclear classifications - temperature: Temperature to use in the LLM model. """ - super().__init__(llm_model, unknown_label, temperature) + super().__init__(llm_model_config, unknown_label) self.label_prompt = label_prompt def get_labels( diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index ab5b6d6..94a066d 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -1,5 +1,5 @@ from logging import Logger, getLogger -from typing import Any +from typing import Any, Optional from pandas import DataFrame, Series @@ -13,6 +13,7 @@ get_risk_system_prompt, get_target_entity_placeholder, ) +from bigdata_research_tools.llm.base import LLMConfig, REASONING_MODELS logger: Logger = getLogger(__name__) @@ -22,12 +23,11 @@ class RiskLabeler(Labeler): def __init__( self, - llm_model: str, + llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', label_prompt: str | None = None, # TODO (cpinto, 2025.02.07) This value is also in the prompt used. # Changing it here would break the process. unknown_label: str = "unclear", - temperature: float = 0, ): """ Args: @@ -36,9 +36,8 @@ def __init__( label_prompt: Prompt provided by user to label the search result chunks. If not provided, then our default labelling prompt is used. unknown_label: Label for unclear classifications - temperature: Temperature to use in the LLM model. """ - super().__init__(llm_model, unknown_label, temperature) + super().__init__(llm_model_config, unknown_label) self.label_prompt = label_prompt def get_labels( diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 7e06b7c..17dd319 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -13,6 +13,10 @@ get_target_entity_placeholder, ) +from bigdata_research_tools.llm.base import LLMConfig, REASONING_MODELS + +from typing import Optional + logger: Logger = getLogger(__name__) @@ -21,10 +25,9 @@ class ScreenerLabeler(Labeler): def __init__( self, - llm_model: str, + llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', label_prompt: str | None = None, unknown_label: str = "unclear", - temperature: float = 0, ): """ Args: @@ -33,9 +36,8 @@ def __init__( label_prompt: Prompt provided by user to label the search result chunks. If not provided, then our default labelling prompt is used. unknown_label: Label for unclear classifications. - temperature: Temperature to use in the LLM model. """ - super().__init__(llm_model, unknown_label, temperature) + super().__init__(llm_model_config, unknown_label) self.label_prompt = label_prompt def get_labels( diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index b5ff511..e3feb50 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -1,12 +1,87 @@ from __future__ import annotations import os +import warnings from abc import ABC, abstractmethod from logging import Logger, getLogger from typing import AsyncGenerator, Generator +from pydantic import BaseModel, model_validator logger: Logger = getLogger(__name__) +REASONING_MODELS = ['gpt-5', 'o1','o2','o3','o4'] + +class LLMConfig(BaseModel): + """Configuration for LLM models.""" + + model: str + response_format: dict = {"type": "json_object"} + temperature: float | None = None + reasoning_effort: str | None = None + top_p: float | None = 1 + frequency_penalty: int | None = 0 + presence_penalty: int | None = 0 + seed: int | None = 42 + max_completion_tokens: int | None = 300 + + @model_validator(mode='after') + def check_temperature_and_reasoning_effort(self): + ## Only one of temperature or reasoning_effort should be set. + if self.temperature is not None and self.reasoning_effort is not None: + raise ValueError( + "Only one of temperature or reasoning_effort should be set." + ) + if self.temperature is None and self.reasoning_effort is None: + warnings.warn( + "For the best experience, one of temperature or reasoning_effort should be set. " + "The LLM Config will not assign any value to either parameter and the calls will be " + "performed with the default model settings.", + UserWarning, + stacklevel=2 + ) + return self + + @model_validator(mode='after') + def validate_reasoning_config(self): + if any(rm in self.model for rm in REASONING_MODELS): + self.top_p = None + self.frequency_penalty = None + self.presence_penalty = None + self.reasoning_effort = self.reasoning_effort if self.reasoning_effort is not None else 'high' + if self.temperature is not None: + warnings.warn( + "The selected model does not support temperature settings. " + "The LLM Config will set temperature to None and reasoning_effort to its current value (if specified, defaults to 'high')",) + self.temperature = None + else: + self.temperature = self.temperature if self.temperature is not None else 0 + if self.reasoning_effort is not None: + warnings.warn( + "The selected model does not support reasoning modes. " + "The LLM Config will set reasoning_effort to None and temperature to its current value (if specified, defaults to 0)", + ) + self.reasoning_effort = None + #issue here is that we were not even returning a warning if the config is wrong (i.e. asking for 4o mini with reasoning_effort). If we drop the wrong parameter, we should either setting the right one to its best value or warn that we will fallback to default model settings. + return self + + # @classmethod + # def tree_configuration(cls): + # cls().pop('max_completion_tokens', None) # This removes it from kwargs + # return cls + + # @classmethod + # def labeler_configuration(cls): + # cls.pop('max_completion_tokens', None) # This removes it from kwargs + # return cls + + def get_llm_kwargs(self, remove_max_tokens: bool = False, remove_json_formatting: bool = False) -> dict: + config_dict = self.model_dump() + if remove_max_tokens: + config_dict.pop('max_completion_tokens', None) + if remove_json_formatting: + config_dict.pop('response_format', None) + # Remove None values and model key + return {k: v for k, v in config_dict.items() if v is not None and k != 'model'} class AsyncLLMProvider(ABC): def __init__(self, model: str | None = None): diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index e983cfe..3cf71b5 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -4,7 +4,7 @@ import pandas as pd from tqdm import tqdm -from bigdata_research_tools.llm.base import LLMEngine +from bigdata_research_tools.llm.base import LLMConfig, LLMEngine, REASONING_MODELS from bigdata_research_tools.prompts.motivation import ( MotivationType, get_motivation_prompt, @@ -17,7 +17,7 @@ class Motivation: """ def __init__( - self, model: str | None = None, model_config: dict[str, Any] | None = None + self, llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', ): """ Initialize the Motivation class. @@ -26,20 +26,30 @@ def __init__( - model: Model string in format "provider::model" (e.g., "openai::gpt-4o-mini") - model_config: Configuration for the LLM model """ - self.model_config = model_config or self._get_default_model_config() - self.llm_engine = LLMEngine(model=model) - - @staticmethod - def _get_default_model_config() -> dict[str, Any]: + if isinstance(llm_model_config, dict): + self.llm_model_config = LLMConfig(**llm_model_config) + elif isinstance(llm_model_config, str): + self.llm_model_config = self._get_default_model_config(llm_model_config) + self.llm_model = llm_model_config + else: + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config.model + + self.llm_engine = LLMEngine(model=self.llm_model) + + def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" - return { - "temperature": 0, - "top_p": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - "max_tokens": 300, - "seed": 42, - } + if any(rm in model for rm in REASONING_MODELS): + return LLMConfig(model=model, reasoning_effort='high', seed=42, max_completion_tokens=300) + else: + return LLMConfig(model=model, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + max_completion_tokens=300, + seed=42, + ) def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict: """ @@ -119,7 +129,7 @@ def query_llm_for_motivation(self, prompt: str) -> str: chat_history = [{"role": "user", "content": prompt}] motivation = self.llm_engine.get_response( - chat_history=chat_history, **self.model_config + chat_history=chat_history, **self.llm_model_config.get_llm_kwargs(remove_json_formatting=True) ) return motivation.strip() @@ -183,4 +193,4 @@ def generate_company_motivations( def update_model_config(self, config: dict[str, Any]): """Update the model configuration.""" - self.model_config.update(config) + self.llm_model_config.update(config) diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index d912c53..7dc3072 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -3,6 +3,7 @@ from dataclasses import dataclass, field from typing import Any +from bigdata_research_tools.llm.base import LLMConfig, REASONING_MODELS import graphviz from json_repair import repair_json from pandas import DataFrame @@ -330,7 +331,7 @@ def save_json(self, filepath: str, **kwargs) -> None: def generate_theme_tree( main_theme: str, focus: str = "", - llm_model_config: dict[str, Any] | None = None, + llm_model_config: LLMConfig | dict | str = "openai::gpt-4o-mini", ) -> SemanticTree: """ Generate a `SemanticTree` class from a main theme and focus. @@ -352,8 +353,15 @@ def generate_theme_tree( Returns: SemanticTree: The generated theme tree. """ - ll_model_config = llm_model_config or themes_default_llm_model_config - model_str = f"{ll_model_config['provider']}::{ll_model_config['model']}" + if isinstance(llm_model_config, dict): + llm_model_config = LLMConfig(**llm_model_config) + elif isinstance(llm_model_config, str): + llm_model_config = get_default_tree_config(llm_model_config) + + print(llm_model_config) + + model_str = llm_model_config.model + chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) llm = LLMEngine(model=model_str) system_prompt = compose_themes_system_prompt(main_theme, analyst_focus=focus) @@ -363,7 +371,7 @@ def generate_theme_tree( {"role": "user", "content": main_theme}, ] - tree_str = llm.get_response(chat_history, **ll_model_config.get("kwargs", {})) + tree_str = llm.get_response(chat_history, **chat_params) tree_str = repair_json(tree_str) tree_dict = ast.literal_eval(tree_str) @@ -406,7 +414,7 @@ def stringify_label_summaries(label_summaries: dict[str, str]) -> list[str]: def generate_risk_tree( main_theme: str, focus: str = "", - llm_model_config: dict[str, Any] | None = None, + llm_model_config: LLMConfig | dict | str = "openai::gpt-4o-mini", ) -> SemanticTree: """ Generate a `SemanticTree` class from a main theme and analyst focus. @@ -429,16 +437,21 @@ def generate_risk_tree( Returns: SemanticTree: The generated theme tree. """ - ll_model_config = llm_model_config or themes_default_llm_model_config - if "kwargs" not in ll_model_config: - ll_model_config["kwargs"] = {} - model_str = f"{ll_model_config['provider']}::{ll_model_config['model']}" + if isinstance(llm_model_config, dict): + llm_model_config = LLMConfig(**llm_model_config) + elif isinstance(llm_model_config, str): + llm_model_config = get_default_tree_config(llm_model_config) + + print(llm_model_config) + + model_str = llm_model_config.model + chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) llm = LLMEngine(model=model_str) system_prompt = compose_risk_system_prompt_focus(main_theme, focus) tree_str = llm.get_response( - [{"role": "system", "content": system_prompt}], **ll_model_config["kwargs"] + [{"role": "system", "content": system_prompt}], **chat_params ) tree_str = repair_json(tree_str) @@ -446,3 +459,25 @@ def generate_risk_tree( tree_dict = ast.literal_eval(tree_str) return SemanticTree.from_dict(tree_dict) + +def get_default_tree_config(llm_model: str) -> LLMConfig: + """Get default LLM model configuration for tree generation.""" + if any(rm in llm_model for rm in REASONING_MODELS): + return LLMConfig( + model=llm_model, + reasoning_effort='high', + seed=42, + max_completion_tokens=300, + response_format={"type": "json_object"}, + ) + else: + return LLMConfig( + model=llm_model, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + max_completion_tokens=300, + seed=42, + response_format={"type": "json_object"}, + ) \ No newline at end of file diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index 08b45fd..d6af569 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -14,6 +14,7 @@ send_trace, ) from bigdata_research_tools.workflows.base import Workflow +from bigdata_research_tools.llm.base import LLMConfig logger: Logger = getLogger(__name__) @@ -26,7 +27,7 @@ def __init__( narrative_sentences: list[str], start_date: str, end_date: str, - llm_model: str, + llm_model_config: str | dict | LLMConfig, document_type: DocumentType, fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, @@ -49,7 +50,6 @@ def __init__( rerank_threshold: Enable the cross-encoder by setting the value between [0, 1]. """ super().__init__() - self.llm_model = llm_model self.narrative_sentences = narrative_sentences self.sources = sources self.fiscal_year = fiscal_year @@ -58,6 +58,18 @@ def __init__( self.end_date = end_date self.rerank_threshold = rerank_threshold + if isinstance(llm_model_config, dict): + self.llm_model_config = LLMConfig(**llm_model_config) + self.llm_model = self.llm_model_config.model + elif isinstance(llm_model_config, str): + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config + elif isinstance(llm_model_config, LLMConfig): + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config.model + + print(self.llm_model_config) + def mine_narratives( self, document_limit: int = 10, @@ -111,7 +123,7 @@ def mine_narratives( ) self.notify_observers("Labelling search results") # Label the search results with our narrative sentences - labeler = NarrativeLabeler(llm_model=self.llm_model) + labeler = NarrativeLabeler(llm_model_config=self.llm_model_config) df_labels = labeler.get_labels( self.narrative_sentences, texts=df_sentences["text"].tolist(), diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 19b26e6..c896bc5 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -16,9 +16,10 @@ WorkflowTraceEvent, send_trace, ) -from bigdata_research_tools.tree import SemanticTree, generate_risk_tree +from bigdata_research_tools.tree import SemanticTree, generate_risk_tree, get_default_tree_config from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df +from bigdata_research_tools.llm.base import LLMConfig logger: Logger = getLogger(__name__) @@ -28,7 +29,7 @@ class RiskAnalyzer(Workflow): def __init__( self, - llm_model: str, + llm_model_config: str | dict | LLMConfig, main_theme: str, companies: list[Company], start_date: str, @@ -64,7 +65,6 @@ def __init__( If used, generated sub-themes will be based on this. """ super().__init__() - self.llm_model = llm_model self.main_theme = main_theme self.companies = companies self.start_date = start_date @@ -77,6 +77,18 @@ def __init__( self.rerank_threshold = rerank_threshold self.focus = focus + if isinstance(llm_model_config, dict): + self.llm_model_config = LLMConfig(**llm_model_config) + self.llm_model = self.llm_model_config.model + elif isinstance(llm_model_config, str): + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config + elif isinstance(llm_model_config, LLMConfig): + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config.model + + print(llm_model_config) + def create_taxonomy(self): """Create a risk taxonomy based on the main theme and focus. Returns: @@ -89,7 +101,7 @@ def create_taxonomy(self): risk_tree = generate_risk_tree( main_theme=self.main_theme, focus=self.focus, - llm_model_config={"provider": self.provider, "model": self.model}, + llm_model_config=self.llm_model_config, ) risk_summaries = risk_tree.get_terminal_summaries() @@ -185,7 +197,7 @@ def label_search_results( # Label the search results with our theme labels ## To Do: generalize the labeler or pass it as an argument # to allow for different labelers to be used. - labeler = RiskLabeler(llm_model=self.llm_model) + labeler = RiskLabeler(llm_model_config=self.llm_model_config) df_labels = labeler.get_labels( main_theme=self.main_theme, labels=terminal_labels, @@ -247,7 +259,7 @@ def generate_results( df_industry = get_scored_df( df_labeled, index_columns=["Industry"], pivot_column="Sub-Scenario" ) - motivation_generator = Motivation(model=self.llm_model) + motivation_generator = Motivation(llm_model_config=self.llm_model_config) motivation_df = motivation_generator.generate_company_motivations( df=df_labeled.rename(columns={"Sub-Scenario": "Theme"}), theme_name=self.main_theme, diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 06b5b63..365dc6f 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -8,6 +8,7 @@ from bigdata_research_tools.client import init_bigdata_client from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies @@ -28,7 +29,7 @@ class ThematicScreener(Workflow): def __init__( self, - llm_model: str, + llm_model_config: str | dict | LLMConfig, main_theme: str, companies: list[Company], start_date: str, @@ -62,7 +63,6 @@ def __init__( If used, generated sub-themes will be based on this. """ super().__init__() - self.llm_model = llm_model self.main_theme = main_theme if not companies: raise ValueError( @@ -84,6 +84,17 @@ def __init__( self.sources = sources self.rerank_threshold = rerank_threshold self.focus = focus or "" + if isinstance(llm_model_config, dict): + self.llm_model_config = LLMConfig(**llm_model_config) + self.llm_model = self.llm_model_config.model + elif isinstance(llm_model_config, str): + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config + elif isinstance(llm_model_config, LLMConfig): + self.llm_model_config = llm_model_config + self.llm_model = llm_model_config.model + + print(llm_model_config) def screen_companies( self, @@ -133,7 +144,7 @@ def screen_companies( theme_tree = generate_theme_tree( main_theme=self.main_theme, focus=self.focus, - llm_model_config={"provider": self.provider, "model": self.model}, + llm_model_config=self.llm_model_config, ) theme_summaries = theme_tree.get_terminal_summaries() @@ -176,7 +187,7 @@ def screen_companies( .to_markdown(index=False) ) # Label the search results with our theme labels - labeler = ScreenerLabeler(llm_model=self.llm_model) + labeler = ScreenerLabeler(llm_model_config=self.llm_model_config) self.notify_observers( f"Labelling {len(df_sentences)} chunks with {len(terminal_labels)} themes" ) @@ -216,7 +227,7 @@ def screen_companies( self.notify_observers( f"Generating motivations for {len(df_company)} companies" ) - motivation_generator = Motivation(model=self.llm_model) + motivation_generator = Motivation(llm_model_config=self.llm_model_config) motivation_df = motivation_generator.generate_company_motivations( df=df, theme_name=self.main_theme, From c8f397e025d75b37520bb814ea32ec513882c26e Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 6 Nov 2025 12:02:18 +0100 Subject: [PATCH 38/82] Update pre commit hook --- .pre-commit-config.yaml | 28 +++++++++++++++++++--------- Makefile | 6 ++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3a3b8fb..8e868fe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,22 @@ # See https://pre-commit.com for more information repos: - - repo: https://github.com/psf/black - rev: 24.1.1 + - repo: local hooks: - - id: black - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - name: isort (python) - args: [--profile, black] + - id: make-lint + name: Execute Python linters + entry: make lint-check + language: system + pass_filenames: false + stages: [pre-commit] + - id: make-format + name: Format Python code + entry: make format-check + language: system + pass_filenames: false + stages: [pre-commit] + - id: make-type-check + name: Type check Python code + entry: make type-check + language: system + pass_filenames: false + stages: [pre-commit] diff --git a/Makefile b/Makefile index b7baa14..4a9deae 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,8 @@ .PHONY: tests lint format +install-pre-commit: + @uvx pre-commit install + tests: @uv run -m pytest --cov --cov-report term --cov-report xml:./coverage-reports/coverage.xml -s tests/* @@ -12,5 +15,8 @@ lint-check: format: @uvx ruff format src/bigdata_research_tools/ examples/ tutorial/ tests/ +format-check: + @uvx ruff format --check bigdata_thematic_screener/ tests/ + type-check: @uvx ty check --python-version 3.13 src/bigdata_research_tools/ examples/ tests/ # tutorial/ # Fix version to 3.13 due to this issue https://github.com/astral-sh/ty/issues/1355 # Ignore tutorials, the issues come from this open issue https://github.com/astral-sh/ty/issues/1297 \ No newline at end of file From e14b9cf804d3aea1cd63d725ee88f6ee9851d86c Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Thu, 6 Nov 2025 12:14:42 +0000 Subject: [PATCH 39/82] removed redundant code and fixed typing --- examples/narrative_miner.py | 2 +- examples/risk_analyzer.py | 2 +- examples/thematic_screener.py | 2 +- src/bigdata_research_tools/labeler/labeler.py | 13 ++++--------- .../labeler/narrative_labeler.py | 2 +- src/bigdata_research_tools/labeler/risk_labeler.py | 2 +- .../labeler/screener_labeler.py | 2 +- src/bigdata_research_tools/llm/base.py | 10 ---------- src/bigdata_research_tools/portfolio/motivation.py | 10 ++-------- .../workflows/narrative_miner.py | 9 ++------- .../workflows/risk_analyzer.py | 10 ++-------- .../workflows/thematic_screener.py | 12 +++--------- 12 files changed, 19 insertions(+), 57 deletions(-) diff --git a/examples/narrative_miner.py b/examples/narrative_miner.py index fdb383a..734303b 100644 --- a/examples/narrative_miner.py +++ b/examples/narrative_miner.py @@ -7,7 +7,7 @@ from bigdata_research_tools.workflows import NarrativeMiner -def narrative_miner_example(llm_model_config: dict | str | LLMConfig = "openai::gpt-4o-mini", export_path: str = "narrative_miner_sample.xlsx") -> dict: +def narrative_miner_example(llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", export_path: str = "narrative_miner_sample.xlsx") -> dict: narrative_miner = NarrativeMiner( narrative_sentences=[ "Supervised Learning Techniques", diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index c65883e..ee8a99c 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -10,7 +10,7 @@ def risk_analyzer_example( risk_scenario: str, - llm_model_config: dict | LLMConfig | None = None, + llm_model_config: str | LLMConfig | dict = None, keywords: list = ["Tariffs"], control_entities: dict = {"place": ["Canada", "Mexico"]}, focus: str = "", diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 42c7365..5adbf43 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -10,7 +10,7 @@ def thematic_screener_example( theme_name: str, - llm_model_config: str | dict | LLMConfig = "openai::gpt-4o-mini", + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", export_path: str = "thematic_screener_results.xlsx", ) -> dict: GRID_watchlist_ID = "a60c351a-1822-4a88-8c45-a4e78abd979a" diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 0e5e6eb..8ae1809 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -21,8 +21,7 @@ class Labeler: def __init__( self, - llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', - #llm_model: str, ##included in the config? + llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', # Note that his value is also used in the prompts. unknown_label: str = "unclear", @@ -38,13 +37,9 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): - self.llm_model = llm_model_config self.llm_model_config = self.get_default_labeler_config(llm_model_config) else: self.llm_model_config = llm_model_config - self.llm_model = llm_model_config.model - - print(llm_model_config) self.unknown_label = unknown_label @@ -130,17 +125,17 @@ def _run_labeling_prompts( # Currently, Bedrock does not support async calls. Its implementation uses synchronous calls. # In order to handle Bedrock as a provider we use a different function for running the prompts. # We execute parallel calls using ThreadPoolExecutor for Bedrock and async calls for other providers. - provider, _ = self.llm_model.split("::") + provider, _ = self.llm_model_config.model.split("::") llm_kwargs = self.llm_model_config.get_llm_kwargs(remove_max_tokens=True) if provider == "bedrock": - llm = LLMEngine(model=self.llm_model) + llm = LLMEngine(model=self.llm_model_config.model) return run_parallel_prompts( llm, prompts, system_prompt, max_workers, **llm_kwargs ) else: - llm = AsyncLLMEngine(model=self.llm_model) + llm = AsyncLLMEngine(model=self.llm_model_config.model) return run_concurrent_prompts( llm, prompts, system_prompt, max_workers, **llm_kwargs ) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 2ec1d4c..6cc8e9c 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -21,7 +21,7 @@ def __init__( self, label_prompt: str | None = None, unknown_label: str = "unclear", - llm_model_config: LLMConfig | dict | str = None, + llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', ): """Initialize narrative labeler. diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 94a066d..56f0244 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -23,7 +23,7 @@ class RiskLabeler(Labeler): def __init__( self, - llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', + llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', label_prompt: str | None = None, # TODO (cpinto, 2025.02.07) This value is also in the prompt used. # Changing it here would break the process. diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 17dd319..d8603b1 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -25,7 +25,7 @@ class ScreenerLabeler(Labeler): def __init__( self, - llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', + llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', label_prompt: str | None = None, unknown_label: str = "unclear", ): diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index e3feb50..e6c2306 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -64,16 +64,6 @@ def validate_reasoning_config(self): #issue here is that we were not even returning a warning if the config is wrong (i.e. asking for 4o mini with reasoning_effort). If we drop the wrong parameter, we should either setting the right one to its best value or warn that we will fallback to default model settings. return self - # @classmethod - # def tree_configuration(cls): - # cls().pop('max_completion_tokens', None) # This removes it from kwargs - # return cls - - # @classmethod - # def labeler_configuration(cls): - # cls.pop('max_completion_tokens', None) # This removes it from kwargs - # return cls - def get_llm_kwargs(self, remove_max_tokens: bool = False, remove_json_formatting: bool = False) -> dict: config_dict = self.model_dump() if remove_max_tokens: diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index 3cf71b5..e394658 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -17,7 +17,7 @@ class Motivation: """ def __init__( - self, llm_model_config: LLMConfig | dict | str = 'openai::gpt-4o-mini', + self, llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', ): """ Initialize the Motivation class. @@ -30,12 +30,10 @@ def __init__( self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): self.llm_model_config = self._get_default_model_config(llm_model_config) - self.llm_model = llm_model_config else: self.llm_model_config = llm_model_config - self.llm_model = llm_model_config.model - self.llm_engine = LLMEngine(model=self.llm_model) + self.llm_engine = LLMEngine(model=self.llm_model_config.model) def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" @@ -190,7 +188,3 @@ def generate_company_motivations( .sort_values("Composite Score", ascending=False) .reset_index(drop=True) ) - - def update_model_config(self, config: dict[str, Any]): - """Update the model configuration.""" - self.llm_model_config.update(config) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index d6af569..5825d95 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -27,7 +27,7 @@ def __init__( narrative_sentences: list[str], start_date: str, end_date: str, - llm_model_config: str | dict | LLMConfig, + llm_model_config: str | LLMConfig | dict, document_type: DocumentType, fiscal_year: int | list[int] | None = None, sources: list[str] | None = None, @@ -60,15 +60,10 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) - self.llm_model = self.llm_model_config.model elif isinstance(llm_model_config, str): self.llm_model_config = llm_model_config - self.llm_model = llm_model_config elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config - self.llm_model = llm_model_config.model - - print(self.llm_model_config) def mine_narratives( self, @@ -162,7 +157,7 @@ def mine_narratives( name=NarrativeMiner.name, start_date=workflow_start, end_date=datetime.now(), - llm_model=self.llm_model, + llm_model=self.llm_model_config.model if isinstance(self.llm_model_config, LLMConfig) else str(self.llm_model_config), status=workflow_status, ), ) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index c896bc5..6b7acd9 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -29,7 +29,7 @@ class RiskAnalyzer(Workflow): def __init__( self, - llm_model_config: str | dict | LLMConfig, + llm_model_config: str | LLMConfig | dict, main_theme: str, companies: list[Company], start_date: str, @@ -79,15 +79,10 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) - self.llm_model = self.llm_model_config.model elif isinstance(llm_model_config, str): self.llm_model_config = llm_model_config - self.llm_model = llm_model_config elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config - self.llm_model = llm_model_config.model - - print(llm_model_config) def create_taxonomy(self): """Create a risk taxonomy based on the main theme and focus. @@ -97,7 +92,6 @@ def create_taxonomy(self): List[str]: A list of terminal labels for the risk categories. """ - self.provider, self.model = self.llm_model.split("::") risk_tree = generate_risk_tree( main_theme=self.main_theme, focus=self.focus, @@ -424,7 +418,7 @@ def screen_companies( name=RiskAnalyzer.name, start_date=workflow_start, end_date=datetime.now(), - llm_model=self.llm_model, + llm_model=self.llm_model_config.model if isinstance(self.llm_model_config, LLMConfig) else str(self.llm_model_config), status=workflow_status, ), ) diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 365dc6f..1e1d063 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -29,7 +29,7 @@ class ThematicScreener(Workflow): def __init__( self, - llm_model_config: str | dict | LLMConfig, + llm_model_config: str | LLMConfig | dict, main_theme: str, companies: list[Company], start_date: str, @@ -86,15 +86,10 @@ def __init__( self.focus = focus or "" if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) - self.llm_model = self.llm_model_config.model elif isinstance(llm_model_config, str): - self.llm_model_config = llm_model_config - self.llm_model = llm_model_config + self.llm_model_config = llm_model_config ##resolve it to config or add string check in the trace. elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config - self.llm_model = llm_model_config.model - - print(llm_model_config) def screen_companies( self, @@ -139,7 +134,6 @@ def screen_companies( workflow_status = WorkflowStatus.UNKNOWN try: - self.provider, self.model = self.llm_model.split("::") self.notify_observers("Generating thematic tree") theme_tree = generate_theme_tree( main_theme=self.main_theme, @@ -260,7 +254,7 @@ def screen_companies( name=ThematicScreener.name, start_date=workflow_start, end_date=datetime.now(), - llm_model=self.llm_model, + llm_model=self.llm_model_config.model if isinstance(self.llm_model_config, LLMConfig) else str(self.llm_model_config), status=workflow_status, ), ) From a96a6e9ff3842d35e08d947f560df92240865738 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Fri, 7 Nov 2025 13:58:29 +0000 Subject: [PATCH 40/82] fixed docstring --- src/bigdata_research_tools/labeler/narrative_labeler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 26fcc3e..d26de08 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -96,6 +96,8 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: - Motivation - Label - Entity + - Entity ID + - Entity Ticker - Country Code - Entity Type """ From f590bc8227fdbc88cbc9d7e954400600ef17ea56 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Mon, 10 Nov 2025 11:18:58 +0000 Subject: [PATCH 41/82] added support for source names --- examples/query_builder.py | 20 +++++++++---------- .../search/query_builder.py | 14 ++++++++----- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/examples/query_builder.py b/examples/query_builder.py index e92ab47..55e42d0 100644 --- a/examples/query_builder.py +++ b/examples/query_builder.py @@ -59,7 +59,7 @@ def test_basic_entity_config(): scope=DocumentType.TRANSCRIPTS, ).run(limit=2 ) - logger.info("Sample results: %s", results) + logger.info("Sample results: %s", results[0]) logger.info("") @@ -101,7 +101,7 @@ def test_control_entities(): scope=DocumentType.TRANSCRIPTS, ).run(limit=2 ) - logger.info("Sample results: %s", results) + logger.info("Sample results: %s", results[0]) logger.info("") @@ -152,7 +152,7 @@ def test_custom_batches(): scope=DocumentType.FILINGS, ).run(limit=2 ) - logger.info("Sample results: %s", results) + logger.info("Sample results: %s", results[0]) logger.info("") @@ -183,7 +183,7 @@ def test_mixed_configuration(): keywords=["streaming", "content", "subscription", "audience"], entities=entities, control_entities=control_entities, - sources=["Bloomberg", "Variety", "Hollywood Reporter"], + sources=["D904DE", "9D69F1", "AA60D4"], #["MT Newswires", "Associated Press", "Hollywood Reporter", "not a source"] batch_size=2, scope=DocumentType.NEWS, fiscal_year=None, @@ -198,7 +198,7 @@ def test_mixed_configuration(): scope=DocumentType.NEWS, ).run(limit=2 ) - logger.info("Sample results: %s", results) + logger.info("Sample results: %s", results[0]) logger.info("") @@ -226,7 +226,7 @@ def test_edge_cases(): scope=DocumentType.ALL, ).run(limit=2 ) - logger.info("Sample results: %s", results1) + logger.info("Sample results: %s", results1[0]) # Test 2: Only keywords queries2 = build_batched_query( @@ -246,7 +246,7 @@ def test_edge_cases(): scope=DocumentType.ALL, ).run(limit=2 ) - logger.info("Sample results: %s", results2) + logger.info("Sample results: %s", results2[0]) # Test 3: Empty EntityConfig empty_entities = EntitiesToSearch() @@ -267,7 +267,7 @@ def test_edge_cases(): scope=DocumentType.ALL, ).run(limit=2 ) - logger.info("Sample results: %s", results3) + logger.info("Sample results: %s", results3[0]) # Test 4: Single entity type single_type = EntitiesToSearch(companies=["Apple Inc"]) @@ -288,7 +288,7 @@ def test_edge_cases(): scope=DocumentType.ALL, ).run(limit=2 ) - logger.info("Sample results: %s", results4) + logger.info("Sample results: %s", results4[0]) logger.info("") @@ -328,7 +328,7 @@ def test_reporting_entities(): scope=DocumentType.TRANSCRIPTS, ).run(limit=2 ) - logger.info("Sample results: %s", results) + logger.info("Sample results: %s", results[0]) logger.info("") def main(): diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 5bf3be2..2bedf8f 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -181,8 +181,10 @@ def _build_base_queries( keyword_query = Any([Keyword(word) for word in keywords]) if keywords else None # Create source query - source_query = Any([Source(source) for source in sources]) if sources else None - + sources_ids = _get_entity_ids(sources, Source) if sources else [] + + source_query = Any(sources_ids) if sources_ids else None + return queries, keyword_query, source_query def _get_entity_ids( @@ -201,6 +203,7 @@ def _get_entity_ids( Concept: bigdata.knowledge_graph.find_concepts, Entity: bigdata.knowledge_graph.find_companies, ReportingEntity: bigdata.knowledge_graph.find_companies, + Source: bigdata.knowledge_graph.find_sources, } lookup_func = lookup_map.get(entity_type) @@ -213,6 +216,8 @@ def _get_entity_ids( if entity_type in (Entity, ReportingEntity): entity = entity_type(entity.id) entity_ids.append(entity) + elif entity_type == Source: + entity_ids.append(Source(entity.id)) else: entity_ids.append(Entity(entity.id)) @@ -237,9 +242,8 @@ def _build_control_entity_query( entity_ids.extend(prod_ids) if control_entities.companies: - entity_type = _get_entity_type(scope) - comp_ids = _get_entity_ids(control_entities.companies,entity_type) - if comp_ids: + comp_ids = _get_entity_ids(control_entities.companies, Entity) + if comp_ids: entity_ids.extend(comp_ids) if control_entities.place: From 694012ee9c1cc3edd558275d9ba117367e4a65c7 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Mon, 10 Nov 2025 14:40:51 +0000 Subject: [PATCH 42/82] returning topics and source details cols --- examples/thematic_screener.py | 3 +- .../labeler/narrative_labeler.py | 36 +++++++++++----- .../labeler/risk_labeler.py | 38 ++++++++++------- .../labeler/screener_labeler.py | 42 ++++++++++++------- .../search/narrative_search.py | 3 ++ 5 files changed, 82 insertions(+), 40 deletions(-) diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 5adbf43..ef060fe 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -13,7 +13,8 @@ def thematic_screener_example( llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", export_path: str = "thematic_screener_results.xlsx", ) -> dict: - GRID_watchlist_ID = "a60c351a-1822-4a88-8c45-a4e78abd979a" + + GRID_watchlist_ID = "814d0944-a2c1-44f6-8b42-a70c0795428e" bigdata = bigdata_connection() # Retrieve the watchlist object diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index b54fc39..3c71681 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -71,7 +71,7 @@ def get_labels( responses = [parse_labeling_response(response) for response in responses] return self._deserialize_label_responses(responses) - def post_process_dataframe(self, df: DataFrame) -> DataFrame: + def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict] = None, extra_columns: Optional[List[str]] = None) -> DataFrame: """ Post-process the labeled DataFrame. @@ -116,13 +116,7 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: sort_columns = ["timestamp_utc", "label"] df = df.sort_values(by=sort_columns).reset_index(drop=True) - # Add formatted columns - df["Time Period"] = df["timestamp_utc"].dt.strftime("%b %Y") - df["Date"] = df["timestamp_utc"].dt.strftime("%Y-%m-%d") - - df = df.rename( - columns={ - "document_id": "Document ID", + columns_map = {"document_id": "Document ID", "sentence_id": "Sentence ID", "headline": "Headline", "text": "Chunk Text", @@ -134,9 +128,14 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "entity_id": "Entity ID", "entity_ticker": "Entity Ticker", } - ) - df = df.explode(["Entity", "Entity Type", "Country Code", "Entity ID", "Entity Ticker"], ignore_index=True) + optional_fields = ['topics','source_name', 'source_rank', 'url'] + for field in optional_fields: + if field in df.columns: + columns_map[field] = field.replace('_', ' ').title() + + if extra_fields: + columns_map.update(extra_fields) # Select and order columns export_columns = [ @@ -155,6 +154,23 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "Entity Type", ] + if extra_columns: + export_columns += extra_columns + + for field in optional_fields: + if field in df.columns: + export_columns += [field.replace('_', ' ').title()] + + df = df.rename( + columns=columns_map + ) + + # Add formatted columns + df["Time Period"] = df["timestamp_utc"].dt.strftime("%b %Y") + df["Date"] = df["timestamp_utc"].dt.strftime("%Y-%m-%d") + + df = df.explode(["Entity", "Entity Type", "Country Code"], ignore_index=True) + sort_columns = ["Date", "Time Period", "Document ID", "Headline", "Chunk Text"] df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 046a09c..975e658 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -81,9 +81,7 @@ def get_labels( return self._deserialize_label_responses(responses) - def post_process_dataframe( - self, df: DataFrame, extra_fields: dict, extra_columns: list[str] - ) -> DataFrame: + def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict], extra_columns: Optional[List[str]]) -> DataFrame: """ Post-process the labeled DataFrame. @@ -155,16 +153,20 @@ def post_process_dataframe( ) columns_map = { - "entity_name": "Company", - "entity_sector": "Sector", - "entity_industry": "Industry", - "entity_country": "Country", - "entity_ticker": "Ticker", - "headline": "Headline", - "text": "Quote", - "motivation": "Motivation", - "label": "Sub-Scenario", - } + "entity_name": "Company", + "entity_sector": "Sector", + "entity_industry": "Industry", + "entity_country": "Country", + "entity_ticker": "Ticker", + "headline": "Headline", + "text": "Quote", + "motivation": "Motivation", + "label": "Sub-Scenario" + } + optional_fields = ['topics','source_name', 'source_rank', 'url'] + for field in optional_fields: + if field in df.columns: + columns_map[field] = field.replace('_', ' ').title() if extra_fields: columns_map.update(extra_fields) @@ -176,8 +178,6 @@ def post_process_dataframe( else: print("quotes column not in df") - df = df.rename(columns=columns_map) - # Select and order columns export_columns = [ "Time Period", @@ -196,6 +196,14 @@ def post_process_dataframe( if extra_columns: export_columns += extra_columns + + for field in optional_fields: + if field in df.columns: + export_columns += [field.replace('_', ' ').title()] + + df = df.rename( + columns=columns_map + ) return df[export_columns] diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 46b5c3c..fff7dc4 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -76,7 +76,7 @@ def get_labels( responses = [parse_labeling_response(response) for response in responses] return self._deserialize_label_responses(responses) - def post_process_dataframe(self, df: DataFrame) -> DataFrame: + def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict] = None, extra_columns: Optional[List[str]] = None) -> DataFrame: """ Post-process the labeled DataFrame. @@ -143,9 +143,7 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: df["Time Period"] = df["timestamp_utc"].dt.strftime("%b %Y") df["Date"] = df["timestamp_utc"].dt.strftime("%Y-%m-%d") - df = df.rename( - columns={ - "document_id": "Document ID", + columns_map = {"document_id": "Document ID", "entity_name": "Company", "entity_sector": "Sector", "entity_industry": "Industry", @@ -156,7 +154,19 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "motivation": "Motivation", "label": "Theme", } - ) + + optional_fields = ['topics','source_name', 'source_rank', 'url'] + for field in optional_fields: + if field in df.columns: + columns_map[field] = field.replace('_', ' ').title() + + if extra_fields: + columns_map.update(extra_fields) + if "quotes" in extra_fields.keys(): + if "quotes" in df.columns: + df["quotes"] = df.apply(replace_company_placeholders, axis=1, col_name = 'quotes') + else: + print("quotes column not in df") # Select and order columns export_columns = [ @@ -174,16 +184,20 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: "Theme", ] - sort_columns = [ - "Date", - "Time Period", - "Company", - "Document ID", - "Headline", - "Quote", - ] - df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) + if extra_columns: + export_columns += extra_columns + + for field in optional_fields: + if field in df.columns: + export_columns += [field.replace('_', ' ').title()] + + df = df.rename( + columns=columns_map + ) + sort_columns = ["Date", "Time Period", "Company", "Document ID", "Headline", "Quote"] + df = df[export_columns].sort_values(sort_columns).reset_index(drop=True) + return df diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index 6d5948d..a71d7f6 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -163,6 +163,9 @@ def _process_narrative_search( "entity_type": [entity["entity_type"] for entity in chunk_entities], "entity_id": [entity["key"] for entity in chunk_entities], "entity_ticker": [entity.get("ticker", '') for entity in chunk_entities], + 'source_name': result.source.name or None, + 'source_rank': result.source.rank or None, + 'url': result.url or None } ) From fc28a06598d9f74ee3253d253e944b743b616589 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Mon, 10 Nov 2025 14:47:19 +0000 Subject: [PATCH 43/82] improved typing and source handling --- src/bigdata_research_tools/labeler/narrative_labeler.py | 2 +- src/bigdata_research_tools/labeler/risk_labeler.py | 2 +- src/bigdata_research_tools/labeler/screener_labeler.py | 2 +- src/bigdata_research_tools/search/narrative_search.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 3c71681..b62e043 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -71,7 +71,7 @@ def get_labels( responses = [parse_labeling_response(response) for response in responses] return self._deserialize_label_responses(responses) - def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict] = None, extra_columns: Optional[List[str]] = None) -> DataFrame: + def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict] = None, extra_columns: Optional[list[str]] = None) -> DataFrame: """ Post-process the labeled DataFrame. diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 975e658..7ffd77b 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -81,7 +81,7 @@ def get_labels( return self._deserialize_label_responses(responses) - def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict], extra_columns: Optional[List[str]]) -> DataFrame: + def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict], extra_columns: Optional[list[str]]) -> DataFrame: """ Post-process the labeled DataFrame. diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index fff7dc4..c833f4f 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -76,7 +76,7 @@ def get_labels( responses = [parse_labeling_response(response) for response in responses] return self._deserialize_label_responses(responses) - def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict] = None, extra_columns: Optional[List[str]] = None) -> DataFrame: + def post_process_dataframe(self, df: DataFrame, extra_fields: Optional[dict] = None, extra_columns: Optional[list[str]] = None) -> DataFrame: """ Post-process the labeled DataFrame. diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index a71d7f6..40240f2 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -163,9 +163,9 @@ def _process_narrative_search( "entity_type": [entity["entity_type"] for entity in chunk_entities], "entity_id": [entity["key"] for entity in chunk_entities], "entity_ticker": [entity.get("ticker", '') for entity in chunk_entities], - 'source_name': result.source.name or None, - 'source_rank': result.source.rank or None, - 'url': result.url or None + 'source_name': result.source.name, + 'source_rank': result.source.rank, + 'url': result.url } ) From fb4c8efc4aba67c582869b298e3b9b2815e1f266 Mon Sep 17 00:00:00 2001 From: jaldana Date: Mon, 10 Nov 2025 16:16:09 +0100 Subject: [PATCH 44/82] Big refactoring of query building logic --- examples/query_builder.py | 2 +- .../search/query_builder.py | 201 +++++++----------- .../search/screener_search.py | 54 +++-- 3 files changed, 115 insertions(+), 142 deletions(-) diff --git a/examples/query_builder.py b/examples/query_builder.py index e4aafb1..c3bc7f1 100644 --- a/examples/query_builder.py +++ b/examples/query_builder.py @@ -313,7 +313,7 @@ def test_reporting_entities(): keywords=["streaming", "content", "subscription", "audience"], entities=entities, control_entities=None, - batch_size=2, + batch_size=4, fiscal_year=2024, sources=None, scope=DocumentType.TRANSCRIPTS, diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index a792a74..558b82f 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -1,7 +1,6 @@ from dataclasses import dataclass from datetime import datetime -from itertools import chain, zip_longest -from typing import Type +from typing import Callable, Type, Iterator, TypeVar import pandas as pd from bigdata_client.daterange import AbsoluteDateRange @@ -21,6 +20,8 @@ from bigdata_research_tools.client import bigdata_connection +T = TypeVar("T") # Type var for generic type + @dataclass class EntitiesToSearch: @@ -123,11 +124,7 @@ def build_batched_query( ) # Step 2: Build control entity query - control_query = ( - _build_control_entity_query(control_entities, scope=scope) - if control_entities - else None - ) + control_query = _build_entity_query(control_entities) if control_entities else None # Step 3: Build entity batch queries entity_batch_queries = _build_entity_batch_queries( @@ -180,6 +177,7 @@ def _build_base_queries( sources: list[str] | None, ) -> tuple[list[QueryComponent] | None, QueryComponent | None, QueryComponent | None]: """Build the base queries from sentences, keywords, and sources.""" + bigdata = bigdata_connection() # Create similarity queries from sentences queries = build_similarity_queries(sentences) if sentences else None @@ -187,93 +185,92 @@ def _build_base_queries( keyword_query = Any([Keyword(word) for word in keywords]) if keywords else None # Create source query - sources_ids = _get_entity_ids(sources, Source) if sources else [] + sources_ids = ( + _find_first_for_each(bigdata.knowledge_graph.find_sources, sources) + if sources + else [] + ) + sources_ids = [Source(source.id) for source in sources_ids] source_query = Any(sources_ids) if sources_ids else None return queries, keyword_query, source_query -def _get_entity_ids( - entity_names: list[str], - entity_type: Type, -) -> list[QueryComponent]: - bigdata = bigdata_connection() - entity_ids = [] - - lookup_map = { - Place: bigdata.knowledge_graph.find_places, - Product: bigdata.knowledge_graph.find_products, - Person: bigdata.knowledge_graph.find_people, - Organization: bigdata.knowledge_graph.find_organizations, - Topic: bigdata.knowledge_graph.find_topics, - Concept: bigdata.knowledge_graph.find_concepts, - Entity: bigdata.knowledge_graph.find_companies, - ReportingEntity: bigdata.knowledge_graph.find_companies, - Source: bigdata.knowledge_graph.find_sources, - } - - lookup_func = lookup_map.get(entity_type) - if not lookup_func: - return [] - - for name in entity_names: - entity = next(iter(lookup_func(name)), None) - if entity is not None: - if entity_type in (Entity, ReportingEntity): - entity = entity_type(entity.id) - entity_ids.append(entity) - elif entity_type == Source: - entity_ids.append(Source(entity.id)) - else: - entity_ids.append(Entity(entity.id)) - - return entity_ids +def _find_first_for_each( + func: Callable[..., Iterator[T]], values: list[str] +) -> list[T]: + """Helper function to get only the first item from a generator.""" + responses = [] + for value in values: + gen = func(value) + # If next value exists, append to responses + if (response := next(iter(gen), None)) is not None: + responses.append(response) + return responses -def _build_control_entity_query( +def _resolve_entities( control_entities: EntitiesToSearch, - scope: DocumentType = DocumentType.ALL, -) -> QueryComponent: + is_reporting_entity: bool = False, +) -> list[QueryComponent]: """Build a query for control entities.""" - + bigdata = bigdata_connection() entity_ids = [] comp_ids = [] if control_entities.people: - people_ids = _get_entity_ids(control_entities.people, Person) - if people_ids: - entity_ids.extend(people_ids) + people_ids = _find_first_for_each( + bigdata.knowledge_graph.find_people, control_entities.people + ) + entity_ids.extend([Entity(person.id) for person in people_ids]) if control_entities.product: - prod_ids = _get_entity_ids(control_entities.product, Product) - if prod_ids: - entity_ids.extend(prod_ids) + prod_ids = _find_first_for_each( + bigdata.knowledge_graph.find_products, control_entities.product + ) + entity_ids.extend([Entity(prod.id) for prod in prod_ids]) if control_entities.companies: - comp_ids = _get_entity_ids(control_entities.companies, Entity) - if comp_ids: - entity_ids.extend(comp_ids) + comp_ids = _find_first_for_each( + bigdata.knowledge_graph.find_companies, control_entities.companies + ) + if is_reporting_entity: + entity_ids.extend([(ReportingEntity(comp.id)) for comp in comp_ids]) + else: + entity_ids.extend([(Entity(comp.id)) for comp in comp_ids]) if control_entities.place: - place_ids = _get_entity_ids(control_entities.place, Place) - if place_ids: - entity_ids.extend(place_ids) + place_ids = _find_first_for_each( + bigdata.knowledge_graph.find_places, control_entities.place + ) + entity_ids.extend([Entity(place.id) for place in place_ids]) if control_entities.org: - orga_ids = _get_entity_ids(control_entities.org, Organization) - if orga_ids: - entity_ids.extend(orga_ids) + orga_ids = _find_first_for_each( + bigdata.knowledge_graph.find_organizations, control_entities.org + ) + entity_ids.extend([Entity(org.id) for org in orga_ids]) if control_entities.topic: - topic_ids = _get_entity_ids(control_entities.topic, Topic) - if topic_ids: - entity_ids.extend(topic_ids) + topic_ids = _find_first_for_each( + bigdata.knowledge_graph.find_topics, control_entities.topic + ) + entity_ids.extend([Entity(topic.id) for topic in topic_ids]) if control_entities.concepts: - concept_ids = _get_entity_ids(control_entities.concepts, Concept) - if concept_ids: - entity_ids.extend(concept_ids) + concept_ids = _find_first_for_each( + bigdata.knowledge_graph.find_concepts, control_entities.concepts + ) + entity_ids.extend([Entity(concept.id) for concept in concept_ids]) + + return entity_ids + +def _build_entity_query( + control_entities: EntitiesToSearch, + is_reporting_entity: bool = False, +) -> QueryComponent: + entity_ids = _resolve_entities(control_entities, is_reporting_entity) control_query = Any(entity_ids) return control_query @@ -306,36 +303,15 @@ def _get_entity_type(scope: DocumentType) -> Type[Entity | ReportingEntity]: def _build_custom_batch_queries( - custom_batches: list[EntitiesToSearch], scope: DocumentType + custom_batches: list[EntitiesToSearch], + scope: DocumentType, ) -> list[QueryComponent] | None: """Build entity queries from a list of EntitiesToSearch objects.""" - entity_type_map = EntitiesToSearch.get_entity_type_map() - - def get_entity_ids_for_attr( - entity_config: EntitiesToSearch, attr_name: str, entity_class - ) -> list[QueryComponent]: - """Get entity IDs for a specific attribute.""" - entity_names = getattr(entity_config, attr_name, None) - if not entity_names: - return [] - - entity_type = ( - _get_entity_type(scope) if entity_class == Entity else entity_class - ) - return _get_entity_ids(entity_names, entity_type) - + is_reporting_entity = _get_entity_type(scope) == ReportingEntity batch_queries = [] for entity_config in custom_batches: - # Use chain to flatten all entity IDs from all attributes - all_entities = list( - chain.from_iterable( - get_entity_ids_for_attr(entity_config, attr_name, entity_class) - for attr_name, entity_class in entity_type_map.items() - ) - ) - - if all_entities: - batch_queries.append(Any(all_entities)) + all_entities = _build_entity_query(entity_config, is_reporting_entity) + batch_queries.append(all_entities) return batch_queries if batch_queries else None @@ -345,39 +321,22 @@ def _auto_batch_entities( batch_size: int, scope: DocumentType = DocumentType.ALL, ) -> list[QueryComponent]: - """Auto-batch entities by type using the specified batch size.""" + """Auto-batch entities using the specified batch size.""" - # Create batches for each entity type - all_entity_batches = [] + batches = [] - for attr_name, entity_class in EntitiesToSearch.get_entity_type_map().items(): - entity_names = getattr(entities, attr_name, None) - if not entity_names: - continue - - # Get valid entity IDs - entity_type = ( - _get_entity_type(scope) if entity_class == Entity else entity_class - ) - entity_ids = _get_entity_ids(entity_names, entity_type) + entity_ids = _resolve_entities( + entities, is_reporting_entity=_get_entity_type(scope) == ReportingEntity + ) - # Split into batches and add to collection - if entity_ids: - batches = [ - entity_ids[i : i + batch_size] - for i in range(0, len(entity_ids), batch_size) - ] - all_entity_batches.append(batches) + batches = [ + entity_ids[i : i + batch_size] for i in range(0, len(entity_ids), batch_size) + ] - if not all_entity_batches: + if not batches: return [] - # Combine batches across entity types using zip_longest - return [ - Any([entity for batch in batch_group for entity in batch]) - for batch_group in zip_longest(*all_entity_batches, fillvalue=[]) - if any(batch for batch in batch_group) # Skip empty batch groups - ] + return [Any([entity for entity in batch]) for batch in batches] def _expand_queries( diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index eca84e1..a0d7c1b 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -154,7 +154,7 @@ def search_by_companies( results, entities = filter_search_results(results) # Filter entities to only include COMPANY entities entities, topics = filter_company_entities(entities) - + # Determine whether to filter by companies based on document type # For filings and transcripts, we don't need to filter as we use reporting entities # For news, we need to check against our original universe of companies as a news article @@ -193,7 +193,7 @@ def search_by_companies( def filter_company_entities( entities: list[Concept], -) -> list[Concept]: +) -> tuple[list[Concept], list[Concept]]: """ Filter only COMPANY entities from the list of entities. @@ -206,17 +206,19 @@ def filter_company_entities( entity for entity in entities if hasattr(entity, "entity_type") and entity.entity_type == "COMP" - ], [entity for entity in entities - if hasattr(entity, 'entity_type') and - entity.entity_type != 'COMP'] + ], [ + entity + for entity in entities + if hasattr(entity, "entity_type") and entity.entity_type != "COMP" + ] def process_screener_search_results( results: list[Document], entities: list[Concept], + topics: list[Concept], companies: list[Company] | None = None, document_type: DocumentType = DocumentType.NEWS, - topics: list[Concept] | None = None, ) -> DataFrame: """ Build a unified DataFrame from search results for any document type. @@ -281,14 +283,26 @@ def process_screener_search_results( for entity in chunk.entities if entity.key in entity_key_map ] - chunk_topics = [{'key': entity.key, - 'name': (topic_key_map[entity.key].name if entity.key in topic_key_map else None), - 'entity_type': (topic_key_map[entity.key].entity_type if entity.key in topic_key_map else None), - #'country': (topic_key_map[entity.key].country if entity.key in topic_key_map else None), - 'start': entity.start, - 'end': entity.end} - for entity in chunk.entities - if entity.key in topic_key_map] + chunk_topics = [ + { + "key": entity.key, + "name": ( + topic_key_map[entity.key].name + if entity.key in topic_key_map + else None + ), + "entity_type": ( + topic_key_map[entity.key].entity_type + if entity.key in topic_key_map + else None + ), + #'country': (topic_key_map[entity.key].country if entity.key in topic_key_map else None), + "start": entity.start, + "end": entity.end, + } + for entity in chunk.entities + if entity.key in topic_key_map + ] if not chunk_entities: continue # Skip if no entities are mapped @@ -327,7 +341,7 @@ def process_screener_search_results( e["name"] for e in other_entities ), "entities": chunk_entities, - 'topics': chunk_topics, + "topics": chunk_topics, } ) else: @@ -367,11 +381,11 @@ def process_screener_search_results( e["name"] for e in other_entities ), "entities": chunk_entities, - 'topics': chunk_topics, - 'source_name': result.source.name, - 'source_rank': result.source.rank, - 'url': result.url - } + "topics": chunk_topics, + "source_name": result.source.name, + "source_rank": result.source.rank, + "url": result.url, + } ) if not rows: From 0d9dc416548f599b55950734c7723f8fe3d5b713 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 11 Nov 2025 09:39:16 +0100 Subject: [PATCH 45/82] Fix formatting and linting --- examples/narrative_miner.py | 10 ++++- examples/risk_analyzer.py | 2 +- examples/thematic_screener.py | 11 ++++-- src/bigdata_research_tools/labeler/labeler.py | 38 ++++++++++++------- .../labeler/narrative_labeler.py | 8 ++-- .../labeler/risk_labeler.py | 6 +-- .../labeler/screener_labeler.py | 7 +--- src/bigdata_research_tools/llm/base.py | 32 ++++++++++------ .../portfolio/motivation.py | 30 ++++++++------- .../search/narrative_search.py | 4 +- src/bigdata_research_tools/tree.py | 9 +++-- .../workflows/narrative_miner.py | 6 ++- .../workflows/risk_analyzer.py | 11 ++++-- .../workflows/thematic_screener.py | 6 ++- 14 files changed, 112 insertions(+), 68 deletions(-) diff --git a/examples/narrative_miner.py b/examples/narrative_miner.py index 734303b..fb52601 100644 --- a/examples/narrative_miner.py +++ b/examples/narrative_miner.py @@ -7,7 +7,10 @@ from bigdata_research_tools.workflows import NarrativeMiner -def narrative_miner_example(llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", export_path: str = "narrative_miner_sample.xlsx") -> dict: +def narrative_miner_example( + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", + export_path: str = "narrative_miner_sample.xlsx", +) -> dict: narrative_miner = NarrativeMiner( narrative_sentences=[ "Supervised Learning Techniques", @@ -59,4 +62,7 @@ def update(self, message: OberserverNotification): output_path = Path("outputs/narrative_miner_sample.xlsx") output_path.parent.mkdir(parents=True, exist_ok=True) - narrative_miner_example(export_path=str(output_path), llm_model_config={'model': "openai::gpt-5-mini", 'temperature':0}) + narrative_miner_example( + export_path=str(output_path), + llm_model_config={"model": "openai::gpt-5-mini", "temperature": 0}, + ) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index ee8a99c..13eb3d9 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -10,7 +10,7 @@ def risk_analyzer_example( risk_scenario: str, - llm_model_config: str | LLMConfig | dict = None, + llm_model_config: str | LLMConfig | dict = None, keywords: list = ["Tariffs"], control_entities: dict = {"place": ["Canada", "Mexico"]}, focus: str = "", diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 5adbf43..f1d8cb2 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -1,8 +1,9 @@ from pathlib import Path from bigdata_client.models.search import DocumentType -from bigdata_research_tools.llm.base import LLMConfig + from bigdata_research_tools.client import bigdata_connection +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.utils.observer import OberserverNotification, Observer from bigdata_research_tools.visuals import create_thematic_exposure_dashboard from bigdata_research_tools.workflows import ThematicScreener @@ -10,7 +11,7 @@ def thematic_screener_example( theme_name: str, - llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", export_path: str = "thematic_screener_results.xlsx", ) -> dict: GRID_watchlist_ID = "a60c351a-1822-4a88-8c45-a4e78abd979a" @@ -55,7 +56,11 @@ def update(self, message: OberserverNotification): output_path = Path("outputs/thematic_screener_results.xlsx") output_path.parent.mkdir(parents=True, exist_ok=True) - x = thematic_screener_example("Chip Manufacturers", export_path=str(output_path), llm_model_config="openai::gpt-5-mini") + x = thematic_screener_example( + "Chip Manufacturers", + export_path=str(output_path), + llm_model_config="openai::gpt-5-mini", + ) custom_config = { "company_column": "Company", "heatmap_colorscale": "Plasma", diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 54cbb02..7186bf2 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -2,12 +2,17 @@ from itertools import zip_longest from json import JSONDecodeError, dumps, loads from logging import Logger, getLogger -from typing import Any, Optional +from typing import Any from json_repair import repair_json from pandas import DataFrame -from bigdata_research_tools.llm.base import AsyncLLMEngine, LLMEngine, LLMConfig, REASONING_MODELS +from bigdata_research_tools.llm.base import ( + REASONING_MODELS, + AsyncLLMEngine, + LLMConfig, + LLMEngine, +) from bigdata_research_tools.llm.utils import ( run_concurrent_prompts, run_parallel_prompts, @@ -21,10 +26,9 @@ class Labeler: def __init__( self, - llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", # Note that his value is also used in the prompts. unknown_label: str = "unclear", - ): """Initialize base Labeler. @@ -40,22 +44,28 @@ def __init__( self.llm_model_config = self.get_default_labeler_config(llm_model_config) else: self.llm_model_config = llm_model_config - + self.unknown_label = unknown_label def get_default_labeler_config(self, model) -> LLMConfig: """Get default LLM model configuration for labeling.""" if any(rm in model for rm in REASONING_MODELS): - return LLMConfig(model=model, reasoning_effort='high', seed=42, response_format={"type": "json_object"}) + return LLMConfig( + model=model, + reasoning_effort="high", + seed=42, + response_format={"type": "json_object"}, + ) else: - return LLMConfig(model=model, - temperature=0, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - seed=42, - response_format={"type": "json_object"}, - ) + return LLMConfig( + model=model, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + seed=42, + response_format={"type": "json_object"}, + ) def _deserialize_label_responses( self, responses: list[dict[str, Any]] diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index b54fc39..436d11a 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -9,7 +9,6 @@ ) from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import get_narrative_system_prompt -from typing import Optional logger: Logger = getLogger(__name__) @@ -21,7 +20,7 @@ def __init__( self, label_prompt: str | None = None, unknown_label: str = "unclear", - llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", ): """Initialize narrative labeler. @@ -136,7 +135,10 @@ def post_process_dataframe(self, df: DataFrame) -> DataFrame: } ) - df = df.explode(["Entity", "Entity Type", "Country Code", "Entity ID", "Entity Ticker"], ignore_index=True) + df = df.explode( + ["Entity", "Entity Type", "Country Code", "Entity ID", "Entity Ticker"], + ignore_index=True, + ) # Select and order columns export_columns = [ diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 046a09c..0d165b9 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -1,5 +1,5 @@ from logging import Logger, getLogger -from typing import Any, Optional +from typing import Any from pandas import DataFrame, Series @@ -8,12 +8,12 @@ get_prompts_for_labeler, parse_labeling_response, ) +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import ( get_other_entity_placeholder, get_risk_system_prompt, get_target_entity_placeholder, ) -from bigdata_research_tools.llm.base import LLMConfig, REASONING_MODELS logger: Logger = getLogger(__name__) @@ -23,7 +23,7 @@ class RiskLabeler(Labeler): def __init__( self, - llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", label_prompt: str | None = None, # TODO (cpinto, 2025.02.07) This value is also in the prompt used. # Changing it here would break the process. diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 46b5c3c..357382d 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -7,16 +7,13 @@ get_prompts_for_labeler, parse_labeling_response, ) +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import ( get_other_entity_placeholder, get_screener_system_prompt, get_target_entity_placeholder, ) -from bigdata_research_tools.llm.base import LLMConfig, REASONING_MODELS - -from typing import Optional - logger: Logger = getLogger(__name__) @@ -25,7 +22,7 @@ class ScreenerLabeler(Labeler): def __init__( self, - llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", label_prompt: str | None = None, unknown_label: str = "unclear", ): diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index e6c2306..7a0eec8 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -5,11 +5,13 @@ from abc import ABC, abstractmethod from logging import Logger, getLogger from typing import AsyncGenerator, Generator + from pydantic import BaseModel, model_validator logger: Logger = getLogger(__name__) -REASONING_MODELS = ['gpt-5', 'o1','o2','o3','o4'] +REASONING_MODELS = ["gpt-5", "o1", "o2", "o3", "o4"] + class LLMConfig(BaseModel): """Configuration for LLM models.""" @@ -24,7 +26,7 @@ class LLMConfig(BaseModel): seed: int | None = 42 max_completion_tokens: int | None = 300 - @model_validator(mode='after') + @model_validator(mode="after") def check_temperature_and_reasoning_effort(self): ## Only one of temperature or reasoning_effort should be set. if self.temperature is not None and self.reasoning_effort is not None: @@ -37,21 +39,24 @@ def check_temperature_and_reasoning_effort(self): "The LLM Config will not assign any value to either parameter and the calls will be " "performed with the default model settings.", UserWarning, - stacklevel=2 + stacklevel=2, ) return self - @model_validator(mode='after') + @model_validator(mode="after") def validate_reasoning_config(self): if any(rm in self.model for rm in REASONING_MODELS): self.top_p = None self.frequency_penalty = None self.presence_penalty = None - self.reasoning_effort = self.reasoning_effort if self.reasoning_effort is not None else 'high' + self.reasoning_effort = ( + self.reasoning_effort if self.reasoning_effort is not None else "high" + ) if self.temperature is not None: warnings.warn( - "The selected model does not support temperature settings. " - "The LLM Config will set temperature to None and reasoning_effort to its current value (if specified, defaults to 'high')",) + "The selected model does not support temperature settings. " + "The LLM Config will set temperature to None and reasoning_effort to its current value (if specified, defaults to 'high')", + ) self.temperature = None else: self.temperature = self.temperature if self.temperature is not None else 0 @@ -61,17 +66,20 @@ def validate_reasoning_config(self): "The LLM Config will set reasoning_effort to None and temperature to its current value (if specified, defaults to 0)", ) self.reasoning_effort = None - #issue here is that we were not even returning a warning if the config is wrong (i.e. asking for 4o mini with reasoning_effort). If we drop the wrong parameter, we should either setting the right one to its best value or warn that we will fallback to default model settings. + # issue here is that we were not even returning a warning if the config is wrong (i.e. asking for 4o mini with reasoning_effort). If we drop the wrong parameter, we should either setting the right one to its best value or warn that we will fallback to default model settings. return self - def get_llm_kwargs(self, remove_max_tokens: bool = False, remove_json_formatting: bool = False) -> dict: + def get_llm_kwargs( + self, remove_max_tokens: bool = False, remove_json_formatting: bool = False + ) -> dict: config_dict = self.model_dump() if remove_max_tokens: - config_dict.pop('max_completion_tokens', None) + config_dict.pop("max_completion_tokens", None) if remove_json_formatting: - config_dict.pop('response_format', None) + config_dict.pop("response_format", None) # Remove None values and model key - return {k: v for k, v in config_dict.items() if v is not None and k != 'model'} + return {k: v for k, v in config_dict.items() if v is not None and k != "model"} + class AsyncLLMProvider(ABC): def __init__(self, model: str | None = None): diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index e394658..e9c24d0 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -1,10 +1,9 @@ from collections import defaultdict -from typing import Any import pandas as pd from tqdm import tqdm -from bigdata_research_tools.llm.base import LLMConfig, LLMEngine, REASONING_MODELS +from bigdata_research_tools.llm.base import REASONING_MODELS, LLMConfig, LLMEngine from bigdata_research_tools.prompts.motivation import ( MotivationType, get_motivation_prompt, @@ -17,7 +16,8 @@ class Motivation: """ def __init__( - self, llm_model_config: str | LLMConfig | dict = 'openai::gpt-4o-mini', + self, + llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini", ): """ Initialize the Motivation class. @@ -38,16 +38,19 @@ def __init__( def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" if any(rm in model for rm in REASONING_MODELS): - return LLMConfig(model=model, reasoning_effort='high', seed=42, max_completion_tokens=300) + return LLMConfig( + model=model, reasoning_effort="high", seed=42, max_completion_tokens=300 + ) else: - return LLMConfig(model=model, - temperature=0, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - max_completion_tokens=300, - seed=42, - ) + return LLMConfig( + model=model, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + max_completion_tokens=300, + seed=42, + ) def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict: """ @@ -127,7 +130,8 @@ def query_llm_for_motivation(self, prompt: str) -> str: chat_history = [{"role": "user", "content": prompt}] motivation = self.llm_engine.get_response( - chat_history=chat_history, **self.llm_model_config.get_llm_kwargs(remove_json_formatting=True) + chat_history=chat_history, + **self.llm_model_config.get_llm_kwargs(remove_json_formatting=True), ) return motivation.strip() diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index 6d5948d..c676e6d 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -162,7 +162,9 @@ def _process_narrative_search( "country_code": [entity["country"] for entity in chunk_entities], "entity_type": [entity["entity_type"] for entity in chunk_entities], "entity_id": [entity["key"] for entity in chunk_entities], - "entity_ticker": [entity.get("ticker", '') for entity in chunk_entities], + "entity_ticker": [ + entity.get("ticker", "") for entity in chunk_entities + ], } ) diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index 282ae01..e1fd499 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -3,12 +3,12 @@ from dataclasses import dataclass, field from typing import Any -from bigdata_research_tools.llm.base import LLMConfig, REASONING_MODELS import graphviz from json_repair import repair_json from pandas import DataFrame from bigdata_research_tools.llm import LLMEngine +from bigdata_research_tools.llm.base import REASONING_MODELS, LLMConfig from bigdata_research_tools.prompts.risk import compose_risk_system_prompt_focus from bigdata_research_tools.prompts.themes import compose_themes_system_prompt @@ -359,7 +359,7 @@ def generate_theme_tree( llm_model_config = get_default_tree_config(llm_model_config) print(llm_model_config) - + model_str = llm_model_config.model chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) llm = LLMEngine(model=model_str) @@ -460,12 +460,13 @@ def generate_risk_tree( return SemanticTree.from_dict(tree_dict) + def get_default_tree_config(llm_model: str) -> LLMConfig: """Get default LLM model configuration for tree generation.""" if any(rm in llm_model for rm in REASONING_MODELS): return LLMConfig( model=llm_model, - reasoning_effort='high', + reasoning_effort="high", seed=42, max_completion_tokens=300, response_format={"type": "json_object"}, @@ -480,4 +481,4 @@ def get_default_tree_config(llm_model: str) -> LLMConfig: max_completion_tokens=300, seed=42, response_format={"type": "json_object"}, - ) \ No newline at end of file + ) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index 5825d95..2c9dafb 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -7,6 +7,7 @@ from bigdata_research_tools.client import init_bigdata_client from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.narrative_labeler import NarrativeLabeler +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.search import search_narratives from bigdata_research_tools.tracing import ( WorkflowStatus, @@ -14,7 +15,6 @@ send_trace, ) from bigdata_research_tools.workflows.base import Workflow -from bigdata_research_tools.llm.base import LLMConfig logger: Logger = getLogger(__name__) @@ -157,7 +157,9 @@ def mine_narratives( name=NarrativeMiner.name, start_date=workflow_start, end_date=datetime.now(), - llm_model=self.llm_model_config.model if isinstance(self.llm_model_config, LLMConfig) else str(self.llm_model_config), + llm_model=self.llm_model_config.model + if isinstance(self.llm_model_config, LLMConfig) + else str(self.llm_model_config), status=workflow_status, ), ) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 6b7acd9..05b3ec4 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -8,6 +8,7 @@ from bigdata_research_tools.client import init_bigdata_client from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category +from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies @@ -16,10 +17,12 @@ WorkflowTraceEvent, send_trace, ) -from bigdata_research_tools.tree import SemanticTree, generate_risk_tree, get_default_tree_config +from bigdata_research_tools.tree import ( + SemanticTree, + generate_risk_tree, +) from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df -from bigdata_research_tools.llm.base import LLMConfig logger: Logger = getLogger(__name__) @@ -418,7 +421,9 @@ def screen_companies( name=RiskAnalyzer.name, start_date=workflow_start, end_date=datetime.now(), - llm_model=self.llm_model_config.model if isinstance(self.llm_model_config, LLMConfig) else str(self.llm_model_config), + llm_model=self.llm_model_config.model + if isinstance(self.llm_model_config, LLMConfig) + else str(self.llm_model_config), status=workflow_status, ), ) diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 1e1d063..53b7827 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -87,7 +87,7 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): - self.llm_model_config = llm_model_config ##resolve it to config or add string check in the trace. + self.llm_model_config = llm_model_config ##resolve it to config or add string check in the trace. elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config @@ -254,7 +254,9 @@ def screen_companies( name=ThematicScreener.name, start_date=workflow_start, end_date=datetime.now(), - llm_model=self.llm_model_config.model if isinstance(self.llm_model_config, LLMConfig) else str(self.llm_model_config), + llm_model=self.llm_model_config.model + if isinstance(self.llm_model_config, LLMConfig) + else str(self.llm_model_config), status=workflow_status, ), ) From 10076b8bafca0cda61715b1eecf503ad638b1b75 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Tue, 11 Nov 2025 11:31:58 +0000 Subject: [PATCH 46/82] increasing llm tiemouts --- src/bigdata_research_tools/labeler/narrative_labeler.py | 2 +- src/bigdata_research_tools/labeler/risk_labeler.py | 2 +- src/bigdata_research_tools/labeler/screener_labeler.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 2ccc6fc..9c1d333 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -39,7 +39,7 @@ def get_labels( theme_labels: list[str], texts: list[str], max_workers: int = 50, - timeout: int | None = 20, + timeout: int | None = 55, ) -> DataFrame: """ Process thematic labels for texts. diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 786d901..0ec0f1a 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -46,7 +46,7 @@ def get_labels( labels: list[str], texts: list[str], max_workers: int = 50, - timeout: int | None = 20, + timeout: int | None = 55, textsconfig: list[dict[str, Any]] | None = None, ) -> DataFrame: """ diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 5a9ce49..ce50d78 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -43,7 +43,7 @@ def get_labels( labels: list[str], texts: list[str], timeout: int | None = 20, - max_workers: int = 50, + max_workers: int = 55, ) -> DataFrame: """ Process thematic labels for texts. From 1f9c051c25b0a031305a4f17642699194ab7f455 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 11 Nov 2025 10:06:01 +0100 Subject: [PATCH 47/82] Error control and fix type issues --- src/bigdata_research_tools/llm/azure.py | 19 ++++++++++++++++-- src/bigdata_research_tools/llm/base.py | 24 +++++++++++++++-------- src/bigdata_research_tools/llm/bedrock.py | 19 +++++++++++++++--- src/bigdata_research_tools/llm/openai.py | 18 ++++++++++++++++- 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/src/bigdata_research_tools/llm/azure.py b/src/bigdata_research_tools/llm/azure.py index 0d2148a..a3a429c 100644 --- a/src/bigdata_research_tools/llm/azure.py +++ b/src/bigdata_research_tools/llm/azure.py @@ -22,7 +22,11 @@ "please install `bigdata_research_tools[azure,openai]` to enable them." ) -from bigdata_research_tools.llm.base import AsyncLLMProvider, LLMProvider +from bigdata_research_tools.llm.base import ( + AsyncLLMProvider, + LLMProvider, + NotInitializedLLMProviderError, +) class AsyncAzureProvider(AsyncLLMProvider): @@ -67,6 +71,8 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st Reference examples of the format accepted: https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models. kwargs (dict): Additional arguments to pass to the OpenAI API. """ + if not self._client: + raise NotInitializedLLMProviderError(self) max_retries = 5 delay = 1 + random.random() # initial delay in seconds last_exception = None @@ -110,6 +116,8 @@ async def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ + if not self._client: + raise NotInitializedLLMProviderError(self) response = await self._client.chat.completions.create( messages=chat_history, model=self.model, @@ -145,6 +153,8 @@ async def get_stream_response( Returns: Generator[str, None, None]: A generator that yields the response from the LLM model. """ + if not self._client: + raise NotInitializedLLMProviderError(self) async for delta in await self._client.chat.completions.create( model=self.model, messages=chat_history, stream=True, **kwargs ): @@ -196,7 +206,8 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: Reference examples of the format accepted: https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models. kwargs (dict): Additional arguments to pass to the OpenAI API. """ - + if not self._client: + raise NotInitializedLLMProviderError(self) max_retries = 5 delay = 1 + random.random() # initial delay in seconds last_exception = None @@ -240,6 +251,8 @@ def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ + if not self._client: + raise NotInitializedLLMProviderError(self) response = self._client.chat.completions.create( messages=chat_history, model=self.model, @@ -275,6 +288,8 @@ def get_stream_response( Returns: Generator[str, None, None]: A generator that yields the response from the LLM model. """ + if not self._client: + raise NotInitializedLLMProviderError(self) for delta in self._client.chat.completions.create( model=self.model, messages=chat_history, stream=True, **kwargs ): diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 7a0eec8..7180358 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -138,7 +138,7 @@ def __init__(self, model: str | None = None): source = "Argument" try: - self.provider, self.model = model.split("::") + self.provider_name, self.model = model.split("::") except (ValueError, AttributeError): logger.error( f"Invalid model format. It should be `::`." @@ -149,10 +149,10 @@ def __init__(self, model: str | None = None): "Invalid model format. It should be `::`." ) - self.provider = self.load_provider() + self.provider = self.load_provider(provider_name=self.provider_name) - def load_provider(self) -> AsyncLLMProvider: - provider = self.provider.lower() + def load_provider(self, provider_name: str) -> AsyncLLMProvider: + provider = provider_name.lower() if provider == "openai": from bigdata_research_tools.llm.openai import AsyncOpenAIProvider @@ -265,7 +265,7 @@ def __init__(self, model: str | None = None): source = "Argument" try: - self.provider, self.model = model.split("::") + self.provider_name, self.model = model.split("::") except (ValueError, AttributeError): logger.error( f"Invalid model format. It should be `::`." @@ -276,10 +276,10 @@ def __init__(self, model: str | None = None): "Invalid model format. It should be `::`." ) - self.provider = self.load_provider() + self.provider = self.load_provider(provider_name=self.provider_name) - def load_provider(self) -> LLMProvider: - provider = self.provider.lower() + def load_provider(self, provider_name: str) -> LLMProvider: + provider = provider_name.lower() if provider == "openai": from bigdata_research_tools.llm.openai import OpenAIProvider @@ -332,3 +332,11 @@ def get_tools_response( return self.provider.get_tools_response( chat_history, tools, temperature, **kwargs ) + + +class NotInitializedLLMProviderError(Exception): + """Exception raised when an LLM provider is not initialized properly.""" + + def __init__(self, provider: LLMProvider | AsyncLLMProvider): + message = f"LLM Provider has been used, but it has not been properly initialized. Provider type: {type(provider).__name__}" + super().__init__(message) diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index f71c82e..f9bd6f9 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -9,7 +9,11 @@ "please install `bigdata_research_tools[bedrock]` to enable them." ) -from bigdata_research_tools.llm.base import AsyncLLMProvider, LLMProvider +from bigdata_research_tools.llm.base import ( + AsyncLLMProvider, + LLMProvider, + NotInitializedLLMProviderError, +) class AsyncBedrockProvider(AsyncLLMProvider): @@ -34,6 +38,8 @@ def configure_bedrock_client(self) -> None: ) def _get_bedrock_client(self) -> Session: + if not self._client: + raise NotInitializedLLMProviderError(self) return self._client.client("bedrock-runtime") def _get_bedrock_input( @@ -195,6 +201,11 @@ def configure_bedrock_client(self) -> None: region_name=self.region or environ.get("AWS_DEFAULT_REGION") ) + def _get_bedrock_client(self) -> Session: + if not self._client: + raise NotInitializedLLMProviderError(self) + return self._client.client("bedrock-runtime") + def _get_bedrock_input( self, chat_history: list[dict[str, str]], **kwargs ) -> tuple[dict[str, Any], str]: @@ -256,7 +267,8 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: Only implemented for a few models. See https://docs.aws.amazon.com/bedrock/latest/userguide/latency-optimized-inference.html """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( + bedrock_client = self._get_bedrock_client() + model_kwargs, output_prefix = self._get_bedrock_input( chat_history, **kwargs ) response = bedrock_client.converse(**model_kwargs) @@ -291,7 +303,8 @@ def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ - bedrock_client, model_kwargs, output_prefix = self._get_bedrock_input( + bedrock_client = self._get_bedrock_client() + model_kwargs, output_prefix = self._get_bedrock_input( chat_history, **kwargs ) if tools: diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 4b8519c..4cc002b 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -11,7 +11,11 @@ "please install `bigdata_research_tools[openai]` to enable them." ) -from bigdata_research_tools.llm.base import AsyncLLMProvider, LLMProvider +from bigdata_research_tools.llm.base import ( + AsyncLLMProvider, + LLMProvider, + NotInitializedLLMProviderError, +) class AsyncOpenAIProvider(AsyncLLMProvider): @@ -46,6 +50,8 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st Reference examples of the format accepted: https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models. kwargs (dict): Additional arguments to pass to the OpenAI API. """ + if not self._client: + raise NotInitializedLLMProviderError(self) chat_completion = await self._client.chat.completions.create( messages=chat_history, model=self.model, **kwargs ) @@ -76,6 +82,8 @@ async def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ + if not self._client: + raise NotInitializedLLMProviderError(self) response = await self._client.chat.completions.create( messages=chat_history, model=self.model, @@ -111,6 +119,8 @@ async def get_stream_response( Returns: Generator[str, None, None]: A generator that yields the response from the LLM model. """ + if not self._client: + raise NotInitializedLLMProviderError(self) async for delta in await self._client.chat.completions.create( model=self.model, messages=chat_history, stream=True, **kwargs ): @@ -150,6 +160,8 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: Reference examples of the format accepted: https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models. kwargs (dict): Additional arguments to pass to the OpenAI API. """ + if not self._client: + raise NotInitializedLLMProviderError(self) chat_completion = self._client.chat.completions.create( messages=chat_history, model=self.model, **kwargs ) @@ -180,6 +192,8 @@ def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ + if not self._client: + raise NotInitializedLLMProviderError(self) response = self._client.chat.completions.create( messages=chat_history, model=self.model, @@ -215,6 +229,8 @@ def get_stream_response( Returns: Generator[str, None, None]: A generator that yields the response from the LLM model. """ + if not self._client: + raise NotInitializedLLMProviderError(self) for delta in self._client.chat.completions.create( model=self.model, messages=chat_history, stream=True, **kwargs ): From 937cee4b367bac835ca067a79ccf75e8e9fefca8 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 11 Nov 2025 12:43:21 +0100 Subject: [PATCH 48/82] Add connection configuration to LLM providers --- examples/risk_analyzer.py | 2 +- src/bigdata_research_tools/llm/azure.py | 12 +++++--- src/bigdata_research_tools/llm/base.py | 36 ++++++++++++++--------- src/bigdata_research_tools/llm/bedrock.py | 23 +++++---------- src/bigdata_research_tools/llm/openai.py | 14 ++++----- 5 files changed, 45 insertions(+), 42 deletions(-) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index 13eb3d9..d3272eb 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -10,7 +10,7 @@ def risk_analyzer_example( risk_scenario: str, - llm_model_config: str | LLMConfig | dict = None, + llm_model_config: str | LLMConfig | dict, keywords: list = ["Tariffs"], control_entities: dict = {"place": ["Canada", "Mexico"]}, focus: str = "", diff --git a/src/bigdata_research_tools/llm/azure.py b/src/bigdata_research_tools/llm/azure.py index a3a429c..7929568 100644 --- a/src/bigdata_research_tools/llm/azure.py +++ b/src/bigdata_research_tools/llm/azure.py @@ -33,8 +33,9 @@ class AsyncAzureProvider(AsyncLLMProvider): def __init__( self, model: str, + **connection_config, ): - super().__init__(model) + super().__init__(model, **connection_config) self._client = None self.configure_azure_client() @@ -49,7 +50,7 @@ def configure_azure_client(self) -> None: """ if not self._client: try: - self._client = AsyncAzureOpenAI() + self._client = AsyncAzureOpenAI(**self.connection_config) except OpenAIError: token_provider = get_bearer_token_provider( DefaultAzureCredential(), @@ -58,6 +59,7 @@ def configure_azure_client(self) -> None: self._client = AsyncAzureOpenAI( azure_ad_token_provider=token_provider, + **self.connection_config, ) async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: @@ -168,8 +170,9 @@ class AzureProvider(LLMProvider): def __init__( self, model: str, + **connection_config, ): - super().__init__(model) + super().__init__(model, **connection_config) self._client = None self.configure_azure_client() @@ -184,7 +187,7 @@ def configure_azure_client(self) -> None: """ if not self._client: try: - self._client = AzureOpenAI() + self._client = AzureOpenAI(**self.connection_config) except OpenAIError: token_provider = get_bearer_token_provider( DefaultAzureCredential(), @@ -193,6 +196,7 @@ def configure_azure_client(self) -> None: self._client = AzureOpenAI( azure_ad_token_provider=token_provider, + **self.connection_config, ) def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 7180358..5b1b974 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -82,8 +82,9 @@ def get_llm_kwargs( class AsyncLLMProvider(ABC): - def __init__(self, model: str | None = None): + def __init__(self, model: str | None = None, **connection_config): self.model = model + self.connection_config = connection_config @abstractmethod async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: @@ -130,7 +131,7 @@ async def get_stream_response( class AsyncLLMEngine: - def __init__(self, model: str | None = None): + def __init__(self, model: str | None = None, **connection_config): if model is None: model = os.getenv("BIGDATA_RESEARCH_DEFAULT_LLM", "openai::gpt-4o-mini") source = "Environment" @@ -149,23 +150,27 @@ def __init__(self, model: str | None = None): "Invalid model format. It should be `::`." ) - self.provider = self.load_provider(provider_name=self.provider_name) + self.provider = self.load_provider( + provider_name=self.provider_name, **connection_config + ) - def load_provider(self, provider_name: str) -> AsyncLLMProvider: + def load_provider( + self, provider_name: str, **connection_config + ) -> AsyncLLMProvider: provider = provider_name.lower() if provider == "openai": from bigdata_research_tools.llm.openai import AsyncOpenAIProvider - return AsyncOpenAIProvider(model=self.model) + return AsyncOpenAIProvider(model=self.model, **connection_config) elif provider == "bedrock": from bigdata_research_tools.llm.bedrock import AsyncBedrockProvider - return AsyncBedrockProvider(model=self.model) + return AsyncBedrockProvider(model=self.model, **connection_config) elif provider == "azure": from bigdata_research_tools.llm.azure import AsyncAzureProvider - return AsyncAzureProvider(model=self.model) + return AsyncAzureProvider(model=self.model, **connection_config) else: logger.error(f"Invalid provider: `{self.provider}`") @@ -209,8 +214,9 @@ async def get_tools_response( class LLMProvider(ABC): - def __init__(self, model: str | None = None): + def __init__(self, model: str | None = None, **connection_config): self.model = model + self.connection_config = connection_config @abstractmethod def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: @@ -257,7 +263,7 @@ def get_stream_response( class LLMEngine: - def __init__(self, model: str | None = None): + def __init__(self, model: str | None = None, **connection_config): if model is None: model = os.getenv("BIGDATA_RESEARCH_DEFAULT_LLM", "openai::gpt-4o-mini") source = "Environment" @@ -276,22 +282,24 @@ def __init__(self, model: str | None = None): "Invalid model format. It should be `::`." ) - self.provider = self.load_provider(provider_name=self.provider_name) + self.provider = self.load_provider( + provider_name=self.provider_name, **connection_config + ) - def load_provider(self, provider_name: str) -> LLMProvider: + def load_provider(self, provider_name: str, **connection_config) -> LLMProvider: provider = provider_name.lower() if provider == "openai": from bigdata_research_tools.llm.openai import OpenAIProvider - return OpenAIProvider(model=self.model) + return OpenAIProvider(model=self.model, **connection_config) elif provider == "bedrock": from bigdata_research_tools.llm.bedrock import BedrockProvider - return BedrockProvider(model=self.model) + return BedrockProvider(model=self.model, **connection_config) elif provider == "azure": from bigdata_research_tools.llm.azure import AzureProvider - return AzureProvider(model=self.model) + return AzureProvider(model=self.model, **connection_config) else: logger.error(f"Invalid provider: `{self.provider}`") diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index f9bd6f9..761b171 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -1,4 +1,3 @@ -from os import environ from typing import Any, Generator try: @@ -19,9 +18,8 @@ class AsyncBedrockProvider(AsyncLLMProvider): # Asynchronous boto3 is tricky, for now use the synchronous client, this will not # provide the benefits from async, but will at least let our workflows run for now - def __init__(self, model: str, region: str | None = None): - super().__init__(model) - self.region: str | None = region + def __init__(self, model: str, **connection_config): + super().__init__(model, **connection_config) self._client: Session | None = None self.configure_bedrock_client() @@ -34,7 +32,7 @@ def configure_bedrock_client(self) -> None: """ if not self._client: self._client = Session( - region_name=self.region or environ.get("AWS_DEFAULT_REGION") + **self.connection_config, ) def _get_bedrock_client(self) -> Session: @@ -183,9 +181,8 @@ async def get_stream_response( class BedrockProvider(LLMProvider): - def __init__(self, model: str, region: str | None = None): - super().__init__(model) - self.region: str | None = region + def __init__(self, model: str, **connection_config): + super().__init__(model, **connection_config) self._client: Session | None = None self.configure_bedrock_client() @@ -198,7 +195,7 @@ def configure_bedrock_client(self) -> None: """ if not self._client: self._client = Session( - region_name=self.region or environ.get("AWS_DEFAULT_REGION") + **self.connection_config, ) def _get_bedrock_client(self) -> Session: @@ -268,9 +265,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: https://docs.aws.amazon.com/bedrock/latest/userguide/latency-optimized-inference.html """ bedrock_client = self._get_bedrock_client() - model_kwargs, output_prefix = self._get_bedrock_input( - chat_history, **kwargs - ) + model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) response = bedrock_client.converse(**model_kwargs) output_message = ( @@ -304,9 +299,7 @@ def get_tools_response( - text (str): The text content of the message, if any. """ bedrock_client = self._get_bedrock_client() - model_kwargs, output_prefix = self._get_bedrock_input( - chat_history, **kwargs - ) + model_kwargs, output_prefix = self._get_bedrock_input(chat_history, **kwargs) if tools: model_kwargs["toolConfig"] = {"tools": tools} response = bedrock_client.converse(**model_kwargs) diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 4cc002b..57152bd 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -19,11 +19,8 @@ class AsyncOpenAIProvider(AsyncLLMProvider): - def __init__( - self, - model: str, - ): - super().__init__(model) + def __init__(self, model: str, **connection_config): + super().__init__(model, **connection_config) self._client = None self.configure_openai_client() @@ -37,7 +34,7 @@ def configure_openai_client(self) -> None: OpenAI: The OpenAI client. """ if not self._client: - self._client = AsyncOpenAI() + self._client = AsyncOpenAI(**self.connection_config) async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: """ @@ -132,8 +129,9 @@ class OpenAIProvider(LLMProvider): def __init__( self, model: str, + **connection_config, ): - super().__init__(model) + super().__init__(model, **connection_config) self._client = None self.configure_openai_client() @@ -147,7 +145,7 @@ def configure_openai_client(self) -> None: OpenAI: The OpenAI client. """ if not self._client: - self._client = OpenAI() + self._client = OpenAI(**self.connection_config) def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: """ From 23a9378907942655a95e7046106c2bc9833b7c2a Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 11 Nov 2025 13:20:55 +0100 Subject: [PATCH 49/82] Expose llm connection config in llm config Remove prints in favor of loggers --- src/bigdata_research_tools/labeler/labeler.py | 10 ++++++++-- src/bigdata_research_tools/labeler/risk_labeler.py | 2 +- .../labeler/screener_labeler.py | 2 +- src/bigdata_research_tools/llm/base.py | 6 +++++- src/bigdata_research_tools/portfolio/motivation.py | 11 ++++++++--- src/bigdata_research_tools/tree.py | 11 +++++++---- 6 files changed, 30 insertions(+), 12 deletions(-) diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 7186bf2..9813a5d 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -145,12 +145,18 @@ def _run_labeling_prompts( llm_kwargs = self.llm_model_config.get_llm_kwargs(remove_max_tokens=True) if provider == "bedrock": - llm = LLMEngine(model=self.llm_model_config.model) + llm = LLMEngine( + model=self.llm_model_config.model, + **self.llm_model_config.connection_config, + ) return run_parallel_prompts( llm, prompts, system_prompt, max_workers, **llm_kwargs ) else: - llm = AsyncLLMEngine(model=self.llm_model_config.model) + llm = AsyncLLMEngine( + model=self.llm_model_config.model, + **self.llm_model_config.connection_config, + ) return run_concurrent_prompts( llm, prompts, diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 0ec0f1a..b319ee8 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -181,7 +181,7 @@ def post_process_dataframe( replace_company_placeholders, axis=1, col_name="quotes" ) else: - print("quotes column not in df") + logger.warning("quotes column not in df") # Select and order columns export_columns = [ diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index ce50d78..b758346 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -171,7 +171,7 @@ def post_process_dataframe( replace_company_placeholders, axis=1, col_name="quotes" ) else: - print("quotes column not in df") + logger.warning("quotes column not in df") # Select and order columns export_columns = [ diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 5b1b974..8b93650 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -6,7 +6,7 @@ from logging import Logger, getLogger from typing import AsyncGenerator, Generator -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, Field, model_validator logger: Logger = getLogger(__name__) @@ -25,6 +25,10 @@ class LLMConfig(BaseModel): presence_penalty: int | None = 0 seed: int | None = 42 max_completion_tokens: int | None = 300 + connection_config: dict = Field( + default_factory=dict, + description="A pair of key-value connection configurations for the LLM provider, the contents will be passed as kwargs to the provider client.", + ) @model_validator(mode="after") def check_temperature_and_reasoning_effort(self): diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index e9c24d0..fa3a381 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -1,4 +1,5 @@ from collections import defaultdict +from logging import Logger, getLogger import pandas as pd from tqdm import tqdm @@ -9,6 +10,8 @@ get_motivation_prompt, ) +logger: Logger = getLogger(__name__) + class Motivation: """ @@ -33,7 +36,9 @@ def __init__( else: self.llm_model_config = llm_model_config - self.llm_engine = LLMEngine(model=self.llm_model_config.model) + self.llm_engine = LLMEngine( + model=self.llm_model_config.model, **self.llm_model_config.connection_config + ) def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" @@ -80,7 +85,7 @@ def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict: # Check if DataFrame is empty if filtered_df.empty: - print("Warning: DataFrame is empty. Returning empty dictionary.") + logger.warning("Warning: DataFrame is empty. Returning empty dictionary.") return {} company_data = defaultdict(lambda: {"quotes_and_labels": []}) @@ -99,7 +104,7 @@ def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict: {"quote": quote, "label": theme} ) - print(f"Found {len(company_data)} unique companies with quotes") + logger.info(f"Found {len(company_data)} unique companies with quotes") # Count label occurrences for each company for company, data in company_data.items(): diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index e1fd499..b237c6a 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -1,6 +1,7 @@ import ast import json from dataclasses import dataclass, field +from logging import Logger, getLogger from typing import Any import graphviz @@ -12,6 +13,8 @@ from bigdata_research_tools.prompts.risk import compose_risk_system_prompt_focus from bigdata_research_tools.prompts.themes import compose_themes_system_prompt +logger: Logger = getLogger(__name__) + themes_default_llm_model_config: dict[str, Any] = { "provider": "openai", "model": "gpt-4o-mini", @@ -358,11 +361,11 @@ def generate_theme_tree( elif isinstance(llm_model_config, str): llm_model_config = get_default_tree_config(llm_model_config) - print(llm_model_config) + logger.debug(f"LLM Model Config: {llm_model_config}") model_str = llm_model_config.model chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) - llm = LLMEngine(model=model_str) + llm = LLMEngine(model=model_str, **llm_model_config.connection_config) system_prompt = compose_themes_system_prompt(main_theme, analyst_focus=focus) @@ -442,11 +445,11 @@ def generate_risk_tree( elif isinstance(llm_model_config, str): llm_model_config = get_default_tree_config(llm_model_config) - print(llm_model_config) + logger.debug(f"LLM Model Config: {llm_model_config}") model_str = llm_model_config.model chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) - llm = LLMEngine(model=model_str) + llm = LLMEngine(model=model_str, **llm_model_config.connection_config) system_prompt = compose_risk_system_prompt_focus(main_theme, focus) From e29088c92ba18f123acc54745707c41d5f75dbbd Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 11 Nov 2025 13:23:48 +0100 Subject: [PATCH 50/82] Sample data before doing motivation to avoid overloading the LLM context --- src/bigdata_research_tools/prompts/motivation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index 9680506..59212e6 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -74,6 +74,7 @@ def get_motivation_prompt( theme_name: str, min_words: int, max_words: int, + max_data_points: int = 300, use_case: MotivationType = MotivationType.THEMATIC_SCREENER, ) -> str: """ @@ -89,6 +90,11 @@ def get_motivation_prompt( Returns: - str: Fully formatted motivation prompt """ + + # Sample only up to max_data_points quotes to avoid overly long prompts + if len(data["quotes_and_labels"]) > max_data_points: + data["quotes_and_labels"] = data["quotes_and_labels"].sample(max_data_points, random_state=42) + label_summary = "\n".join( [f"- {label}: {count} quotes" for label, count in data["label_counts"]] ) From 8c0a3c23b3b8a16d64ff601d1d4f2c15229ad3e9 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 12 Nov 2025 10:54:37 +0100 Subject: [PATCH 51/82] Add timeout to llm config --- src/bigdata_research_tools/llm/base.py | 3 +++ src/bigdata_research_tools/workflows/narrative_miner.py | 3 ++- src/bigdata_research_tools/workflows/risk_analyzer.py | 3 ++- src/bigdata_research_tools/workflows/thematic_screener.py | 3 ++- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 8b93650..8b7ffcf 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -25,6 +25,9 @@ class LLMConfig(BaseModel): presence_penalty: int | None = 0 seed: int | None = 42 max_completion_tokens: int | None = 300 + timeout: int | None = Field( + default=60, description="Timeout for LLM requests in seconds.", ge=0 + ) connection_config: dict = Field( default_factory=dict, description="A pair of key-value connection configurations for the LLM provider, the contents will be passed as kwargs to the provider client.", diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index 2c9dafb..ee04c1f 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -61,7 +61,7 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): - self.llm_model_config = llm_model_config + self.llm_model_config = LLMConfig(model=llm_model_config) elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config @@ -122,6 +122,7 @@ def mine_narratives( df_labels = labeler.get_labels( self.narrative_sentences, texts=df_sentences["text"].tolist(), + timeout=self.llm_model_config.timeout ) self.notify_observers( f"Labelling completed. {len(df_labels)} labels generated." diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 05b3ec4..bb9873f 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -83,7 +83,7 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): - self.llm_model_config = llm_model_config + self.llm_model_config = LLMConfig(model=llm_model_config) elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config @@ -200,6 +200,7 @@ def label_search_results( labels=terminal_labels, texts=df_sentences["masked_text"].tolist(), textsconfig=prompt_fields, + timeout=self.llm_model_config.timeout, ) # Merge and process results diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 53b7827..9e83e7d 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -87,7 +87,7 @@ def __init__( if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): - self.llm_model_config = llm_model_config ##resolve it to config or add string check in the trace. + self.llm_model_config = LLMConfig(model=llm_model_config) elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config @@ -188,6 +188,7 @@ def screen_companies( df_labels = labeler.get_labels( main_theme=self.main_theme, labels=terminal_labels, + timeout=self.llm_model_config.timeout, texts=df_sentences["masked_text"].tolist(), ) self.notify_observers("Labelling completed") From ed69aad715e6c0c1e3fd42e8aa5cb06cf163a874 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 12 Nov 2025 13:28:21 +0000 Subject: [PATCH 52/82] added single response validation for risk labeler --- examples/risk_analyzer.py | 2 +- src/bigdata_research_tools/labeler/labeler.py | 219 +++++++++++------- .../labeler/narrative_labeler.py | 6 +- .../labeler/risk_labeler.py | 15 +- .../labeler/screener_labeler.py | 6 +- src/bigdata_research_tools/llm/base.py | 6 +- src/bigdata_research_tools/llm/utils.py | 33 ++- .../portfolio/motivation.py | 2 +- src/bigdata_research_tools/tree.py | 4 +- .../workflows/risk_analyzer.py | 3 + 10 files changed, 178 insertions(+), 118 deletions(-) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index d3272eb..7edb2f3 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -66,7 +66,7 @@ def update(self, message: OberserverNotification): export_path=str(output_path), llm_model_config=LLMConfig( model="openai::gpt-5-mini", - reasoning_effort="high", + reasoning_effort="medium", ), ) # custom_config = { diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 9813a5d..82c25e0 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -85,36 +85,87 @@ def _deserialize_label_responses( """ response_mapping = {} for response in responses: - if not response or not isinstance(response, dict): - continue - - for k, v in response.items(): - try: - response_mapping[k] = { - "motivation": v.get("motivation", ""), - "label": v.get("label", self.unknown_label), - **{ - key: value - for key, value in v.items() - if key not in ["motivation", "label"] - }, - } - # Add any extra keys present in v - extra_keys = { - key: value - for key, value in v.items() - if key not in ["motivation", "label"] - } - response_mapping[k].update(extra_keys) - except (KeyError, AttributeError): - response_mapping[k] = { - "motivation": "", - "label": self.unknown_label, - } + # if not response or not isinstance(response, dict): + # continue + + # for k, v in response.items(): + # try: + # response_mapping[k] = { + # "motivation": v.get("motivation", ""), + # "label": v.get("label", self.unknown_label), + # **{ + # key: value + # for key, value in v.items() + # if key not in ["motivation", "label"] + # }, + # } + # # Add any extra keys present in v + # extra_keys = { + # key: value + # for key, value in v.items() + # if key not in ["motivation", "label"] + # } + # response_mapping[k].update(extra_keys) + # except (KeyError, AttributeError): + # response_mapping[k] = { + # "motivation": "", + # "label": self.unknown_label, + # } + response_mapping.update(self._deserialize_label_response(response)) + + df_labels = self._convert_to_label_df(response_mapping) + + return df_labels + + def _convert_to_label_df(self, response_mapping: dict[str, Any]) -> DataFrame: + """Convert a labeling response dictionary to a DataFrame. + + Args: + response: A response dictionary from the LLM collecting one or more parsed responses + Returns: + DataFrame with schema: + - index: sentence_id + - columns: + - motivation + - label + """ df_labels = DataFrame.from_dict(response_mapping, orient="index") df_labels.index = df_labels.index.astype(int) + df_labels.sort_index(inplace=True) return df_labels + + def _deserialize_label_response(self, response: dict[str, Any]) -> dict: + """mmm + """ + response_mapping = {} + if not response or not isinstance(response, dict): + return response_mapping + + for k, v in response.items(): + try: + response_mapping[int(k)] = { + "motivation": v.get("motivation", ""), + "label": v.get("label", self.unknown_label), + **{ + key: value + for key, value in v.items() + if key not in ["motivation", "label"] + }, + } + # Add any extra keys present in v + extra_keys = { + key: value + for key, value in v.items() + if key not in ["motivation", "label"] + } + response_mapping[int(k)].update(extra_keys) + except (KeyError, AttributeError): + response_mapping[int(k)] = { + "motivation": "", + "label": self.unknown_label, + } + return response_mapping def _run_labeling_prompts( self, @@ -122,7 +173,8 @@ def _run_labeling_prompts( system_prompt: str, timeout: int | None, max_workers: int = 100, - ) -> list: + callback: Any = None, + ) -> dict: """ Get the labels from the prompts. @@ -131,9 +183,9 @@ def _run_labeling_prompts( system_prompt: System prompt for the LLM timeout: Timeout for each LLM request for concurrent calls max_workers: Maximum number of concurrent workers - + callback: Callback function for handling responses Returns: - List of responses from the LLM + Dict of parsed responses from the LLM """ # ADS-140 @@ -142,7 +194,7 @@ def _run_labeling_prompts( # We execute parallel calls using ThreadPoolExecutor for Bedrock and async calls for other providers. provider, _ = self.llm_model_config.model.split("::") - llm_kwargs = self.llm_model_config.get_llm_kwargs(remove_max_tokens=True) + llm_kwargs = self.llm_model_config.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) if provider == "bedrock": llm = LLMEngine( @@ -163,62 +215,61 @@ def _run_labeling_prompts( system_prompt, timeout, max_workers=max_workers, + callback=callback, **llm_kwargs, ) + + def parse_labeling_response(self, response: str) -> dict: + """ + Parse the response from the LLM model used for labeling. + Args: + response: The response from the LLM model used for labeling, + as a raw string. + Returns: + Parsed dictionary. Will be empty if the parsing fails. Keys: + - motivation + - label + """ + try: + # Improve json retrieval robustness by using a regex as first attempt + # to extract the json object from the response. + # If that fails, we use the json_repair library to try to fix common + # json formatting issues. + match = re.search(r'\{\s*"\d*":.*?\}\s*\}', response, re.DOTALL) -def get_prompts_for_labeler( - texts: list[str], - textsconfig: list[dict[str, Any]] | None = None, -) -> list[str]: - """ - Generate a list of user messages for each text to be labelled by the labeling system. - - Example of generated prompts: [{"sentence_id": 0, "text": "Chunk 0 text here"}, - {"sentence_id": 1, "text": "Chunk 1 text here"}, ...] - - Args: - texts: texts to get the labels from. - textsconfig: Optional fields for the prompts in addition to the text. - - Returns: - A list of prompts for the labeling system. - """ - textsconfig = textsconfig or [] - return [ - dumps({"sentence_id": i, **config, "text": text}) - for i, (config, text) in enumerate( - zip_longest(textsconfig, texts, fillvalue={}) - ) - ] - - -def parse_labeling_response(response: str) -> dict: - """ - Parse the response from the LLM model used for labeling. - - Args: - response: The response from the LLM model used for labeling, - as a raw string. - Returns: - Parsed dictionary. Will be empty if the parsing fails. Keys: - - motivation - - label - """ - try: - # Improve json retrieval robustness by using a regex as first attempt - # to extract the json object from the response. - # If that fails, we use the json_repair library to try to fix common - # json formatting issues. - match = re.search(r'\{\s*"\d*":.*?\}\s*\}', response, re.DOTALL) - - if match: - response = match.group(0) - else: - response = repair_json(response, return_objects=False) - deserialized_response = loads(response) - except JSONDecodeError: - logger.error(f"Error deserializing response: {response}") - return {} + if match: + response = match.group(0) + else: + response = repair_json(response, return_objects=False) + deserialized_response = loads(response) + except JSONDecodeError: + logger.error(f"Error deserializing response: {response}") + return {} - return deserialized_response + return deserialized_response + + def get_prompts_for_labeler(self, + texts: list[str], + textsconfig: list[dict[str, Any]] | None = None, + ) -> list[str]: + """ + Generate a list of user messages for each text to be labelled by the labeling system. + + Example of generated prompts: [{"sentence_id": 0, "text": "Chunk 0 text here"}, + {"sentence_id": 1, "text": "Chunk 1 text here"}, ...] + + Args: + texts: texts to get the labels from. + textsconfig: Optional fields for the prompts in addition to the text. + + Returns: + A list of prompts for the labeling system. + """ + textsconfig = textsconfig or [] + return [ + dumps({"sentence_id": i, **config, "text": text}) + for i, (config, text) in enumerate( + zip_longest(textsconfig, texts, fillvalue={}) + ) + ] diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 9c1d333..d7a3bd3 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -2,11 +2,7 @@ from pandas import DataFrame -from bigdata_research_tools.labeler.labeler import ( - Labeler, - get_prompts_for_labeler, - parse_labeling_response, -) +from bigdata_research_tools.labeler.labeler import Labeler from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import get_narrative_system_prompt diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index b319ee8..22bacb3 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -3,11 +3,7 @@ from pandas import DataFrame, Series -from bigdata_research_tools.labeler.labeler import ( - Labeler, - get_prompts_for_labeler, - parse_labeling_response, -) +from bigdata_research_tools.labeler.labeler import Labeler from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import ( get_other_entity_placeholder, @@ -72,14 +68,15 @@ def get_labels( else self.label_prompt ) - prompts = get_prompts_for_labeler(texts, textsconfig) + prompts = self.get_prompts_for_labeler(texts, textsconfig) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers, timeout=timeout + prompts, system_prompt, max_workers=max_workers, timeout=timeout, callback=[self.parse_labeling_response, self._deserialize_label_response] ) - responses = [parse_labeling_response(response) for response in responses] + #responses = [self.parse_labeling_response(response) for response in responses] + # return self._deserialize_label_responses(responses) - return self._deserialize_label_responses(responses) + return self._convert_to_label_df(responses) def post_process_dataframe( self, diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index b758346..3b55d76 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -2,11 +2,7 @@ from pandas import DataFrame, Series -from bigdata_research_tools.labeler.labeler import ( - Labeler, - get_prompts_for_labeler, - parse_labeling_response, -) +from bigdata_research_tools.labeler.labeler import Labeler from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.prompts.labeler import ( get_other_entity_placeholder, diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 8b7ffcf..711967c 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -77,13 +77,17 @@ def validate_reasoning_config(self): return self def get_llm_kwargs( - self, remove_max_tokens: bool = False, remove_json_formatting: bool = False + self, remove_max_tokens: bool = False, remove_json_formatting: bool = False, remove_connection_config: bool = True, remove_timeout: bool = False ) -> dict: config_dict = self.model_dump() if remove_max_tokens: config_dict.pop("max_completion_tokens", None) if remove_json_formatting: config_dict.pop("response_format", None) + if remove_connection_config: + config_dict.pop("connection_config", None) + if remove_timeout: + config_dict.pop("timeout", None) # Remove None values and model key return {k: v for k, v in config_dict.items() if v is not None and k != "model"} diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index f65dedf..e9be362 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -18,8 +18,9 @@ def run_concurrent_prompts( system_prompt: str, timeout: int | None, max_workers: int = 30, + callback: Any = None, **kwargs, -) -> list[str]: +) -> dict: """ Run the LLM on the received prompts, concurrently. @@ -32,13 +33,13 @@ def run_concurrent_prompts( kwargs (dict): Additional arguments to pass to the `get_response` method of the LLMEngine. Returns: - list[str]: The list of responses from the LLM model, each in the same order as the prompts. + dict: The dictionary of parsed responses from the LLM model, each keyed by the prompt index. """ semaphore = asyncio.Semaphore(max_workers) logger.info(f"Running {len(prompts)} prompts concurrently") tasks = [ _fetch_with_semaphore( - idx, llm_engine, semaphore, system_prompt, prompt, timeout=timeout, **kwargs + idx, llm_engine, semaphore, system_prompt, prompt, timeout=timeout, callback=callback, **kwargs ) for idx, prompt in enumerate(prompts) ] @@ -52,8 +53,9 @@ async def _fetch_with_semaphore( system_prompt: str, prompt: str, timeout: int | None, + callback: Any = None, **kwargs, -) -> tuple[int, str]: +) -> tuple[int, dict]: """ Fetch the response from the LLM engine with a semaphore. @@ -65,6 +67,7 @@ async def _fetch_with_semaphore( system_prompt (str): The system prompt. prompt (str): The prompt to run. timeout (int | None): Timeout for the LLM request. + callback (Any): Optional callback function to be called with the index and response for each prompt. kwargs (dict): Additional arguments to pass to the `get_response` method of the LLMEngine. Returns: @@ -89,12 +92,19 @@ async def _fetch_with_semaphore( # ~3 took longer than 60 seconds, with up to 600 seconds async with asyncio.timeout(timeout): response = await llm_engine.get_response(chat_history, **kwargs) + if callback is not None: + for func in callback: + response = func(response) return idx, response except Exception as e: if isinstance(e, asyncio.TimeoutError) and attempt == 0: logger.warning( f"Timeout occurred for prompt during LLM call, current timeout configured {timeout} seconds. If this keeps happening (> 1% of your requests), consider increasing the timeout. Retrying..." ) + elif isinstance(e, ValueError): + logger.warning( + f"Error occurred for response validation during LLM call. Retrying..." + ) last_exception = e await asyncio.sleep(retry_delay) # Exponential backoff @@ -102,23 +112,24 @@ async def _fetch_with_semaphore( logger.error( f"Failed to get response for prompt: {prompt} Error: {last_exception}" ) - return idx, "" + return idx, {} async def _run_with_progress_bar( - tasks: list[Coroutine[Any, Any, tuple[int, str]]], -) -> list[str]: + tasks: list[Coroutine[Any, Any, tuple[int, dict]]], +) -> dict: """Run asyncio tasks with a tqdm progress bar.""" # Pre-allocate a list for results to preserve order - results = [""] * len(tasks) + results = {} #""] * len(tasks) with tqdm(total=len(tasks), desc="Querying an LLM...") as pbar: for coro in asyncio.as_completed(tasks): idx, result = await coro - results[idx] = result + #results[idx] = result + results.update(result) # Update the progress bar pbar.update(1) - return results + return results # ADS-140 # Added function to run synchronous LLM calls in parallel using threads. @@ -127,6 +138,7 @@ def run_parallel_prompts( prompts: list[str], system_prompt: str, max_workers: int = 30, + callback: Any = None, **kwargs, ) -> list[str]: """ @@ -137,6 +149,7 @@ def run_parallel_prompts( prompts (list[str]): List of prompts to run concurrently. system_prompt (str): The system prompt. max_workers (int): The maximum number of threads. + callback (Any): Optional callback function to be called with the index and response for each prompt. kwargs (dict): Additional arguments for get_response. Returns: diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index fa3a381..023fda4 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -136,7 +136,7 @@ def query_llm_for_motivation(self, prompt: str) -> str: motivation = self.llm_engine.get_response( chat_history=chat_history, - **self.llm_model_config.get_llm_kwargs(remove_json_formatting=True), + **self.llm_model_config.get_llm_kwargs(remove_json_formatting=True, remove_timeout=True), ) return motivation.strip() diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index b237c6a..1d54637 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -364,7 +364,7 @@ def generate_theme_tree( logger.debug(f"LLM Model Config: {llm_model_config}") model_str = llm_model_config.model - chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) + chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) llm = LLMEngine(model=model_str, **llm_model_config.connection_config) system_prompt = compose_themes_system_prompt(main_theme, analyst_focus=focus) @@ -448,7 +448,7 @@ def generate_risk_tree( logger.debug(f"LLM Model Config: {llm_model_config}") model_str = llm_model_config.model - chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True) + chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) llm = LLMEngine(model=model_str, **llm_model_config.connection_config) system_prompt = compose_risk_system_prompt_focus(main_theme, focus) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index bb9873f..00c0d5d 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -203,6 +203,9 @@ def label_search_results( timeout=self.llm_model_config.timeout, ) + print(f"Labeling completed. {len(df_labels)} labels generated.") + print(df_labels.head()) + # Merge and process results df = merge(df_sentences, df_labels, left_index=True, right_index=True) From 28f8719ee1bce1c3e82ffa947110b665d120ba5a Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 12 Nov 2025 13:53:29 +0000 Subject: [PATCH 53/82] updated screener and narrative labeler --- .../labeler/narrative_labeler.py | 11 +++++++---- .../labeler/screener_labeler.py | 10 ++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index d7a3bd3..e9005fb 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -58,13 +58,16 @@ def get_labels( if self.label_prompt is None else self.label_prompt ) - prompts = get_prompts_for_labeler(texts) + prompts = self.get_prompts_for_labeler(texts) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers, timeout=timeout + prompts, system_prompt, max_workers=max_workers, timeout=timeout, callback=[self.parse_labeling_response, self._deserialize_label_response] ) - responses = [parse_labeling_response(response) for response in responses] - return self._deserialize_label_responses(responses) + #responses = [self.parse_labeling_response(response) for response in responses] + # return self._deserialize_label_responses(responses) + + return self._convert_to_label_df(responses) + def post_process_dataframe( self, diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 3b55d76..f675a38 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -61,13 +61,15 @@ def get_labels( system_prompt = self.label_prompt or get_screener_system_prompt( main_theme, labels, unknown_label=self.unknown_label ) - prompts = get_prompts_for_labeler(texts) + prompts = self.get_prompts_for_labeler(texts, textsconfig) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers, timeout=timeout + prompts, system_prompt, max_workers=max_workers, timeout=timeout, callback=[self.parse_labeling_response, self._deserialize_label_response] ) - responses = [parse_labeling_response(response) for response in responses] - return self._deserialize_label_responses(responses) + #responses = [self.parse_labeling_response(response) for response in responses] + # return self._deserialize_label_responses(responses) + + return self._convert_to_label_df(responses) def post_process_dataframe( self, From 60a417e9951a04dbeae1463f1c8189aae656b9da Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 12 Nov 2025 16:10:38 +0000 Subject: [PATCH 54/82] handling and logging azure jailbreak errors --- src/bigdata_research_tools/llm/utils.py | 46 +++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index e9be362..3144929 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -1,15 +1,50 @@ import asyncio +import json import time from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime from logging import Logger, getLogger +from pathlib import Path from typing import Any, Coroutine +import threading from tqdm import tqdm from bigdata_research_tools.llm.base import AsyncLLMEngine +from openai import BadRequestError logger: Logger = getLogger(__name__) +def initialize_llm_error_log(): + # Ensure output directory exists + output_dir = Path("outputs") + output_dir.mkdir(exist_ok=True) + log_file = output_dir / "llm_error_logs.txt" + return log_file + +def _log_llm_error_to_file(lock: threading.Lock, log_file: Path, timestamp: str, chat_history: list, error: Exception, prompt_idx: int = None): + """ + Thread-safe logging of LLM errors to file. + + Args: + timestamp (str): ISO format timestamp of the error + chat_history (list): The chat history that caused the error + error (Exception): The exception that occurred + prompt_idx (int): Optional index of the prompt for tracking + """ + log_entry = { + "timestamp": timestamp, + "prompt_index": prompt_idx, + "chat_history": chat_history, + "error_type": type(error).__name__, + "error_details": getattr(error, 'body', None) if hasattr(error, 'body') else None + } + + # Thread-safe file writing + with lock: + with open(log_file, "a", encoding="utf-8") as f: + f.write(json.dumps(log_entry, indent=2, default=str) + "\n" + "="*80 + "\n") + # https://platform.openai.com/docs/guides/batch def run_concurrent_prompts( @@ -73,6 +108,8 @@ async def _fetch_with_semaphore( Returns: Tuple[int, str]: The index of the prompt and the response from the LLM model. """ + log_file = initialize_llm_error_log() + _error_lock = threading.Lock() chat_history = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, @@ -105,6 +142,15 @@ async def _fetch_with_semaphore( logger.warning( f"Error occurred for response validation during LLM call. Retrying..." ) + elif isinstance(e, BadRequestError): + if e.body['innererror']['code'] == 'ResponsibleAIPolicyViolation': + print( + f"LLM returned a ResponsibleAIPolicyViolation Error. Ignoring this response..." + ) + # Log the error to file + timestamp = datetime.now().isoformat() + _log_llm_error_to_file(_error_lock, log_file, timestamp, chat_history, e, idx) + return idx, {} # Return empty response for policy violations last_exception = e await asyncio.sleep(retry_delay) # Exponential backoff From 2f2386b0e562686ad06ccbb75ef4c33414575ab7 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 12 Nov 2025 16:31:47 +0000 Subject: [PATCH 55/82] fixed sampling data before motivation --- src/bigdata_research_tools/prompts/motivation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index 59212e6..aa4e248 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -93,7 +93,7 @@ def get_motivation_prompt( # Sample only up to max_data_points quotes to avoid overly long prompts if len(data["quotes_and_labels"]) > max_data_points: - data["quotes_and_labels"] = data["quotes_and_labels"].sample(max_data_points, random_state=42) + data = data.sample(max_data_points, random_state=42).sort_index().reset_index(drop=True) label_summary = "\n".join( [f"- {label}: {count} quotes" for label, count in data["label_counts"]] From 4b635059b8fb86c4be7c4147f365f920ed6964f6 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 12 Nov 2025 17:03:20 +0000 Subject: [PATCH 56/82] other sampling fixes --- src/bigdata_research_tools/prompts/motivation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index aa4e248..e74cb13 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -1,5 +1,5 @@ from enum import Enum - +import random import pandas as pd @@ -70,7 +70,7 @@ def generate_prompt_template_risk() -> str: def get_motivation_prompt( company: str, - data: pd.DataFrame, + data: dict, theme_name: str, min_words: int, max_words: int, @@ -93,7 +93,9 @@ def get_motivation_prompt( # Sample only up to max_data_points quotes to avoid overly long prompts if len(data["quotes_and_labels"]) > max_data_points: - data = data.sample(max_data_points, random_state=42).sort_index().reset_index(drop=True) + print(data["quotes_and_labels"]) + random.seed(42) + data["quotes_and_labels"] = random.sample(data["quotes_and_labels"].tolist(), max_data_points) label_summary = "\n".join( [f"- {label}: {count} quotes" for label, count in data["label_counts"]] From cc778d7aebbadf8b5380bd318f44dc2ac701c92c Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 12 Nov 2025 17:36:14 +0000 Subject: [PATCH 57/82] more fixes --- src/bigdata_research_tools/prompts/motivation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index e74cb13..831dcad 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -93,9 +93,8 @@ def get_motivation_prompt( # Sample only up to max_data_points quotes to avoid overly long prompts if len(data["quotes_and_labels"]) > max_data_points: - print(data["quotes_and_labels"]) random.seed(42) - data["quotes_and_labels"] = random.sample(data["quotes_and_labels"].tolist(), max_data_points) + data["quotes_and_labels"] = random.sample(data["quotes_and_labels"], max_data_points) label_summary = "\n".join( [f"- {label}: {count} quotes" for label, count in data["label_counts"]] From 3164024a349b8f01d22eb9f76e2bd432f9f53036 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 12:44:08 +0100 Subject: [PATCH 58/82] Formatting and linting --- src/bigdata_research_tools/labeler/labeler.py | 20 +++---- .../labeler/narrative_labeler.py | 9 ++-- .../labeler/risk_labeler.py | 8 ++- .../labeler/screener_labeler.py | 10 ++-- src/bigdata_research_tools/llm/base.py | 6 ++- src/bigdata_research_tools/llm/utils.py | 53 +++++++++++++------ .../portfolio/motivation.py | 4 +- .../prompts/motivation.py | 7 +-- src/bigdata_research_tools/tree.py | 8 ++- .../workflows/narrative_miner.py | 2 +- 10 files changed, 87 insertions(+), 40 deletions(-) diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 82c25e0..04a9e14 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -114,9 +114,9 @@ def _deserialize_label_responses( response_mapping.update(self._deserialize_label_response(response)) df_labels = self._convert_to_label_df(response_mapping) - + return df_labels - + def _convert_to_label_df(self, response_mapping: dict[str, Any]) -> DataFrame: """Convert a labeling response dictionary to a DataFrame. @@ -134,13 +134,12 @@ def _convert_to_label_df(self, response_mapping: dict[str, Any]) -> DataFrame: df_labels.index = df_labels.index.astype(int) df_labels.sort_index(inplace=True) return df_labels - + def _deserialize_label_response(self, response: dict[str, Any]) -> dict: - """mmm - """ + """mmm""" response_mapping = {} if not response or not isinstance(response, dict): - return response_mapping + return response_mapping for k, v in response.items(): try: @@ -194,7 +193,9 @@ def _run_labeling_prompts( # We execute parallel calls using ThreadPoolExecutor for Bedrock and async calls for other providers. provider, _ = self.llm_model_config.model.split("::") - llm_kwargs = self.llm_model_config.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + llm_kwargs = self.llm_model_config.get_llm_kwargs( + remove_max_tokens=True, remove_timeout=True + ) if provider == "bedrock": llm = LLMEngine( @@ -218,7 +219,7 @@ def _run_labeling_prompts( callback=callback, **llm_kwargs, ) - + def parse_labeling_response(self, response: str) -> dict: """ Parse the response from the LLM model used for labeling. @@ -249,7 +250,8 @@ def parse_labeling_response(self, response: str) -> dict: return deserialized_response - def get_prompts_for_labeler(self, + def get_prompts_for_labeler( + self, texts: list[str], textsconfig: list[dict[str, Any]] | None = None, ) -> list[str]: diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index e9005fb..7693da1 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -61,14 +61,17 @@ def get_labels( prompts = self.get_prompts_for_labeler(texts) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers, timeout=timeout, callback=[self.parse_labeling_response, self._deserialize_label_response] + prompts, + system_prompt, + max_workers=max_workers, + timeout=timeout, + callback=[self.parse_labeling_response, self._deserialize_label_response], ) - #responses = [self.parse_labeling_response(response) for response in responses] + # responses = [self.parse_labeling_response(response) for response in responses] # return self._deserialize_label_responses(responses) return self._convert_to_label_df(responses) - def post_process_dataframe( self, df: DataFrame, diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 22bacb3..0b736d0 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -71,9 +71,13 @@ def get_labels( prompts = self.get_prompts_for_labeler(texts, textsconfig) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers, timeout=timeout, callback=[self.parse_labeling_response, self._deserialize_label_response] + prompts, + system_prompt, + max_workers=max_workers, + timeout=timeout, + callback=[self.parse_labeling_response, self._deserialize_label_response], ) - #responses = [self.parse_labeling_response(response) for response in responses] + # responses = [self.parse_labeling_response(response) for response in responses] # return self._deserialize_label_responses(responses) return self._convert_to_label_df(responses) diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index f675a38..791401f 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -61,12 +61,16 @@ def get_labels( system_prompt = self.label_prompt or get_screener_system_prompt( main_theme, labels, unknown_label=self.unknown_label ) - prompts = self.get_prompts_for_labeler(texts, textsconfig) + prompts = self.get_prompts_for_labeler(texts) responses = self._run_labeling_prompts( - prompts, system_prompt, max_workers=max_workers, timeout=timeout, callback=[self.parse_labeling_response, self._deserialize_label_response] + prompts, + system_prompt, + max_workers=max_workers, + timeout=timeout, + callback=[self.parse_labeling_response, self._deserialize_label_response], ) - #responses = [self.parse_labeling_response(response) for response in responses] + # responses = [self.parse_labeling_response(response) for response in responses] # return self._deserialize_label_responses(responses) return self._convert_to_label_df(responses) diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 711967c..8296733 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -77,7 +77,11 @@ def validate_reasoning_config(self): return self def get_llm_kwargs( - self, remove_max_tokens: bool = False, remove_json_formatting: bool = False, remove_connection_config: bool = True, remove_timeout: bool = False + self, + remove_max_tokens: bool = False, + remove_json_formatting: bool = False, + remove_connection_config: bool = True, + remove_timeout: bool = False, ) -> dict: config_dict = self.model_dump() if remove_max_tokens: diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 3144929..4a7d4e3 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -1,20 +1,21 @@ import asyncio import json +import threading import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from logging import Logger, getLogger from pathlib import Path from typing import Any, Coroutine -import threading +from openai import BadRequestError from tqdm import tqdm from bigdata_research_tools.llm.base import AsyncLLMEngine -from openai import BadRequestError logger: Logger = getLogger(__name__) + def initialize_llm_error_log(): # Ensure output directory exists output_dir = Path("outputs") @@ -22,10 +23,18 @@ def initialize_llm_error_log(): log_file = output_dir / "llm_error_logs.txt" return log_file -def _log_llm_error_to_file(lock: threading.Lock, log_file: Path, timestamp: str, chat_history: list, error: Exception, prompt_idx: int = None): + +def _log_llm_error_to_file( + lock: threading.Lock, + log_file: Path, + timestamp: str, + chat_history: list, + error: Exception, + prompt_idx: int = None, +): """ Thread-safe logging of LLM errors to file. - + Args: timestamp (str): ISO format timestamp of the error chat_history (list): The chat history that caused the error @@ -37,13 +46,17 @@ def _log_llm_error_to_file(lock: threading.Lock, log_file: Path, timestamp: str, "prompt_index": prompt_idx, "chat_history": chat_history, "error_type": type(error).__name__, - "error_details": getattr(error, 'body', None) if hasattr(error, 'body') else None + "error_details": getattr(error, "body", None) + if hasattr(error, "body") + else None, } - + # Thread-safe file writing with lock: with open(log_file, "a", encoding="utf-8") as f: - f.write(json.dumps(log_entry, indent=2, default=str) + "\n" + "="*80 + "\n") + f.write( + json.dumps(log_entry, indent=2, default=str) + "\n" + "=" * 80 + "\n" + ) # https://platform.openai.com/docs/guides/batch @@ -74,7 +87,14 @@ def run_concurrent_prompts( logger.info(f"Running {len(prompts)} prompts concurrently") tasks = [ _fetch_with_semaphore( - idx, llm_engine, semaphore, system_prompt, prompt, timeout=timeout, callback=callback, **kwargs + idx, + llm_engine, + semaphore, + system_prompt, + prompt, + timeout=timeout, + callback=callback, + **kwargs, ) for idx, prompt in enumerate(prompts) ] @@ -140,16 +160,18 @@ async def _fetch_with_semaphore( ) elif isinstance(e, ValueError): logger.warning( - f"Error occurred for response validation during LLM call. Retrying..." + "Error occurred for response validation during LLM call. Retrying..." ) elif isinstance(e, BadRequestError): - if e.body['innererror']['code'] == 'ResponsibleAIPolicyViolation': + if e.body["innererror"]["code"] == "ResponsibleAIPolicyViolation": print( - f"LLM returned a ResponsibleAIPolicyViolation Error. Ignoring this response..." - ) + "LLM returned a ResponsibleAIPolicyViolation Error. Ignoring this response..." + ) # Log the error to file timestamp = datetime.now().isoformat() - _log_llm_error_to_file(_error_lock, log_file, timestamp, chat_history, e, idx) + _log_llm_error_to_file( + _error_lock, log_file, timestamp, chat_history, e, idx + ) return idx, {} # Return empty response for policy violations last_exception = e await asyncio.sleep(retry_delay) @@ -166,17 +188,18 @@ async def _run_with_progress_bar( ) -> dict: """Run asyncio tasks with a tqdm progress bar.""" # Pre-allocate a list for results to preserve order - results = {} #""] * len(tasks) + results = {} # ""] * len(tasks) with tqdm(total=len(tasks), desc="Querying an LLM...") as pbar: for coro in asyncio.as_completed(tasks): idx, result = await coro - #results[idx] = result + # results[idx] = result results.update(result) # Update the progress bar pbar.update(1) return results + # ADS-140 # Added function to run synchronous LLM calls in parallel using threads. def run_parallel_prompts( diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index 023fda4..cd05b1f 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -136,7 +136,9 @@ def query_llm_for_motivation(self, prompt: str) -> str: motivation = self.llm_engine.get_response( chat_history=chat_history, - **self.llm_model_config.get_llm_kwargs(remove_json_formatting=True, remove_timeout=True), + **self.llm_model_config.get_llm_kwargs( + remove_json_formatting=True, remove_timeout=True + ), ) return motivation.strip() diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py index 831dcad..5c22488 100644 --- a/src/bigdata_research_tools/prompts/motivation.py +++ b/src/bigdata_research_tools/prompts/motivation.py @@ -1,6 +1,5 @@ -from enum import Enum import random -import pandas as pd +from enum import Enum class MotivationType(str, Enum): @@ -94,7 +93,9 @@ def get_motivation_prompt( # Sample only up to max_data_points quotes to avoid overly long prompts if len(data["quotes_and_labels"]) > max_data_points: random.seed(42) - data["quotes_and_labels"] = random.sample(data["quotes_and_labels"], max_data_points) + data["quotes_and_labels"] = random.sample( + data["quotes_and_labels"], max_data_points + ) label_summary = "\n".join( [f"- {label}: {count} quotes" for label, count in data["label_counts"]] diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/tree.py index 1d54637..018451e 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/tree.py @@ -364,7 +364,9 @@ def generate_theme_tree( logger.debug(f"LLM Model Config: {llm_model_config}") model_str = llm_model_config.model - chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + chat_params = llm_model_config.get_llm_kwargs( + remove_max_tokens=True, remove_timeout=True + ) llm = LLMEngine(model=model_str, **llm_model_config.connection_config) system_prompt = compose_themes_system_prompt(main_theme, analyst_focus=focus) @@ -448,7 +450,9 @@ def generate_risk_tree( logger.debug(f"LLM Model Config: {llm_model_config}") model_str = llm_model_config.model - chat_params = llm_model_config.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + chat_params = llm_model_config.get_llm_kwargs( + remove_max_tokens=True, remove_timeout=True + ) llm = LLMEngine(model=model_str, **llm_model_config.connection_config) system_prompt = compose_risk_system_prompt_focus(main_theme, focus) diff --git a/src/bigdata_research_tools/workflows/narrative_miner.py b/src/bigdata_research_tools/workflows/narrative_miner.py index ee04c1f..e73b5d6 100644 --- a/src/bigdata_research_tools/workflows/narrative_miner.py +++ b/src/bigdata_research_tools/workflows/narrative_miner.py @@ -122,7 +122,7 @@ def mine_narratives( df_labels = labeler.get_labels( self.narrative_sentences, texts=df_sentences["text"].tolist(), - timeout=self.llm_model_config.timeout + timeout=self.llm_model_config.timeout, ) self.notify_observers( f"Labelling completed. {len(df_labels)} labels generated." From 9871bc455d2bcd10bfe92779aa139128cae1e940 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 12:44:18 +0100 Subject: [PATCH 59/82] Pin version of type checker --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4a9deae..73c358c 100644 --- a/Makefile +++ b/Makefile @@ -19,4 +19,4 @@ format-check: @uvx ruff format --check bigdata_thematic_screener/ tests/ type-check: - @uvx ty check --python-version 3.13 src/bigdata_research_tools/ examples/ tests/ # tutorial/ # Fix version to 3.13 due to this issue https://github.com/astral-sh/ty/issues/1355 # Ignore tutorials, the issues come from this open issue https://github.com/astral-sh/ty/issues/1297 \ No newline at end of file + @uvx ty@0.0.1a26 check --python-version 3.13 src/bigdata_research_tools/ examples/ tests/ # tutorial/ # Fix version to 3.13 due to this issue https://github.com/astral-sh/ty/issues/1355 # Ignore tutorials, the issues come from this open issue https://github.com/astral-sh/ty/issues/1297 \ No newline at end of file From 68f47b75ef13609041cf132e034ea42bfce8621c Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 13:45:37 +0100 Subject: [PATCH 60/82] Remove print in favor of loggers --- src/bigdata_research_tools/llm/utils.py | 2 +- src/bigdata_research_tools/workflows/risk_analyzer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 4a7d4e3..3a12610 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -164,7 +164,7 @@ async def _fetch_with_semaphore( ) elif isinstance(e, BadRequestError): if e.body["innererror"]["code"] == "ResponsibleAIPolicyViolation": - print( + logger.error( "LLM returned a ResponsibleAIPolicyViolation Error. Ignoring this response..." ) # Log the error to file diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 00c0d5d..92588fa 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -203,8 +203,8 @@ def label_search_results( timeout=self.llm_model_config.timeout, ) - print(f"Labeling completed. {len(df_labels)} labels generated.") - print(df_labels.head()) + logger.info(f"Labeling completed. {len(df_labels)} labels generated.") + logger.info(df_labels.head()) # Merge and process results df = merge(df_sentences, df_labels, left_index=True, right_index=True) From b8f366aad9f2c7614ddcef3ac46b058061471dce Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 13:48:42 +0100 Subject: [PATCH 61/82] Remove unneded logs --- src/bigdata_research_tools/workflows/risk_analyzer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 92588fa..bb9873f 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -203,9 +203,6 @@ def label_search_results( timeout=self.llm_model_config.timeout, ) - logger.info(f"Labeling completed. {len(df_labels)} labels generated.") - logger.info(df_labels.head()) - # Merge and process results df = merge(df_sentences, df_labels, left_index=True, right_index=True) From 2f5b56e86302b6dbcd3b7f62715c6a3f88a7ebc6 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 17:04:35 +0100 Subject: [PATCH 62/82] Solve typing errors --- src/bigdata_research_tools/labeler/labeler.py | 82 ++++--------------- .../labeler/narrative_labeler.py | 7 +- .../labeler/risk_labeler.py | 7 +- .../labeler/screener_labeler.py | 7 +- src/bigdata_research_tools/llm/utils.py | 43 +++++----- src/bigdata_research_tools/search/models.py | 79 ++++++++++++++++++ .../search/narrative_search.py | 4 +- .../search/screener_search.py | 15 ++-- .../search/search_utils.py | 14 ++-- 9 files changed, 150 insertions(+), 108 deletions(-) create mode 100644 src/bigdata_research_tools/search/models.py diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 04a9e14..01e0021 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -1,8 +1,9 @@ +import json import re from itertools import zip_longest from json import JSONDecodeError, dumps, loads from logging import Logger, getLogger -from typing import Any +from typing import Any, Callable from json_repair import repair_json from pandas import DataFrame @@ -67,57 +68,7 @@ def get_default_labeler_config(self, model) -> LLMConfig: response_format={"type": "json_object"}, ) - def _deserialize_label_responses( - self, responses: list[dict[str, Any]] - ) -> DataFrame: - """ - Deserialize labeling responses into a DataFrame. - - Args: - responses: List of response dictionaries from the LLM. - - Returns: - DataFrame with schema: - - index: sentence_id - - columns: - - motivation - - label - """ - response_mapping = {} - for response in responses: - # if not response or not isinstance(response, dict): - # continue - - # for k, v in response.items(): - # try: - # response_mapping[k] = { - # "motivation": v.get("motivation", ""), - # "label": v.get("label", self.unknown_label), - # **{ - # key: value - # for key, value in v.items() - # if key not in ["motivation", "label"] - # }, - # } - # # Add any extra keys present in v - # extra_keys = { - # key: value - # for key, value in v.items() - # if key not in ["motivation", "label"] - # } - # response_mapping[k].update(extra_keys) - # except (KeyError, AttributeError): - # response_mapping[k] = { - # "motivation": "", - # "label": self.unknown_label, - # } - response_mapping.update(self._deserialize_label_response(response)) - - df_labels = self._convert_to_label_df(response_mapping) - - return df_labels - - def _convert_to_label_df(self, response_mapping: dict[str, Any]) -> DataFrame: + def _convert_to_label_df(self, response_mapping: list[str]) -> DataFrame: """Convert a labeling response dictionary to a DataFrame. Args: @@ -130,16 +81,19 @@ def _convert_to_label_df(self, response_mapping: dict[str, Any]) -> DataFrame: - motivation - label """ - df_labels = DataFrame.from_dict(response_mapping, orient="index") + responses_json = {} + for response in response_mapping: + responses_json.update(json.loads(response)) + df_labels = DataFrame.from_dict(responses_json, orient="index") df_labels.index = df_labels.index.astype(int) df_labels.sort_index(inplace=True) return df_labels - def _deserialize_label_response(self, response: dict[str, Any]) -> dict: - """mmm""" + def _deserialize_label_response(self, response: str) -> str: + response = json.loads(response) response_mapping = {} if not response or not isinstance(response, dict): - return response_mapping + raise ValueError("Response is empty or not a dictionary") for k, v in response.items(): try: @@ -164,7 +118,7 @@ def _deserialize_label_response(self, response: dict[str, Any]) -> dict: "motivation": "", "label": self.unknown_label, } - return response_mapping + return str(response_mapping) def _run_labeling_prompts( self, @@ -172,8 +126,8 @@ def _run_labeling_prompts( system_prompt: str, timeout: int | None, max_workers: int = 100, - callback: Any = None, - ) -> dict: + processing_callbacks: list[Callable[[str], str]] | None = None, + ) -> list[str]: """ Get the labels from the prompts. @@ -182,7 +136,7 @@ def _run_labeling_prompts( system_prompt: System prompt for the LLM timeout: Timeout for each LLM request for concurrent calls max_workers: Maximum number of concurrent workers - callback: Callback function for handling responses + processing_callbacks: Callback function for handling responses Returns: Dict of parsed responses from the LLM """ @@ -216,11 +170,11 @@ def _run_labeling_prompts( system_prompt, timeout, max_workers=max_workers, - callback=callback, + processing_callbacks=processing_callbacks, **llm_kwargs, ) - def parse_labeling_response(self, response: str) -> dict: + def parse_labeling_response(self, response: str) -> str: """ Parse the response from the LLM model used for labeling. @@ -246,9 +200,9 @@ def parse_labeling_response(self, response: str) -> dict: deserialized_response = loads(response) except JSONDecodeError: logger.error(f"Error deserializing response: {response}") - return {} + return "" - return deserialized_response + return str(deserialized_response) def get_prompts_for_labeler( self, diff --git a/src/bigdata_research_tools/labeler/narrative_labeler.py b/src/bigdata_research_tools/labeler/narrative_labeler.py index 7693da1..7ed13f8 100644 --- a/src/bigdata_research_tools/labeler/narrative_labeler.py +++ b/src/bigdata_research_tools/labeler/narrative_labeler.py @@ -65,10 +65,11 @@ def get_labels( system_prompt, max_workers=max_workers, timeout=timeout, - callback=[self.parse_labeling_response, self._deserialize_label_response], + processing_callbacks=[ + self.parse_labeling_response, + self._deserialize_label_response, + ], ) - # responses = [self.parse_labeling_response(response) for response in responses] - # return self._deserialize_label_responses(responses) return self._convert_to_label_df(responses) diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py index 0b736d0..6a18cb6 100644 --- a/src/bigdata_research_tools/labeler/risk_labeler.py +++ b/src/bigdata_research_tools/labeler/risk_labeler.py @@ -75,10 +75,11 @@ def get_labels( system_prompt, max_workers=max_workers, timeout=timeout, - callback=[self.parse_labeling_response, self._deserialize_label_response], + processing_callbacks=[ + self.parse_labeling_response, + self._deserialize_label_response, + ], ) - # responses = [self.parse_labeling_response(response) for response in responses] - # return self._deserialize_label_responses(responses) return self._convert_to_label_df(responses) diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py index 791401f..28fc5a7 100644 --- a/src/bigdata_research_tools/labeler/screener_labeler.py +++ b/src/bigdata_research_tools/labeler/screener_labeler.py @@ -68,10 +68,11 @@ def get_labels( system_prompt, max_workers=max_workers, timeout=timeout, - callback=[self.parse_labeling_response, self._deserialize_label_response], + processing_callbacks=[ + self.parse_labeling_response, + self._deserialize_label_response, + ], ) - # responses = [self.parse_labeling_response(response) for response in responses] - # return self._deserialize_label_responses(responses) return self._convert_to_label_df(responses) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 3a12610..00334af 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -6,7 +6,7 @@ from datetime import datetime from logging import Logger, getLogger from pathlib import Path -from typing import Any, Coroutine +from typing import Any, Callable, Coroutine from openai import BadRequestError from tqdm import tqdm @@ -30,7 +30,7 @@ def _log_llm_error_to_file( timestamp: str, chat_history: list, error: Exception, - prompt_idx: int = None, + prompt_idx: int | None = None, ): """ Thread-safe logging of LLM errors to file. @@ -66,9 +66,9 @@ def run_concurrent_prompts( system_prompt: str, timeout: int | None, max_workers: int = 30, - callback: Any = None, + processing_callbacks: list[Callable[[str], str]] | None = None, **kwargs, -) -> dict: +) -> list[str]: """ Run the LLM on the received prompts, concurrently. @@ -93,7 +93,7 @@ def run_concurrent_prompts( system_prompt, prompt, timeout=timeout, - callback=callback, + processing_callbacks=processing_callbacks, **kwargs, ) for idx, prompt in enumerate(prompts) @@ -108,9 +108,9 @@ async def _fetch_with_semaphore( system_prompt: str, prompt: str, timeout: int | None, - callback: Any = None, + processing_callbacks: list[Callable[[str], str]] | None = None, **kwargs, -) -> tuple[int, dict]: +) -> tuple[int, str]: """ Fetch the response from the LLM engine with a semaphore. @@ -122,7 +122,7 @@ async def _fetch_with_semaphore( system_prompt (str): The system prompt. prompt (str): The prompt to run. timeout (int | None): Timeout for the LLM request. - callback (Any): Optional callback function to be called with the index and response for each prompt. + processing_callbacks (list[Callable[[str], str]] | None): Optional callback function to be called with the index and response for each prompt. kwargs (dict): Additional arguments to pass to the `get_response` method of the LLMEngine. Returns: @@ -149,8 +149,8 @@ async def _fetch_with_semaphore( # ~3 took longer than 60 seconds, with up to 600 seconds async with asyncio.timeout(timeout): response = await llm_engine.get_response(chat_history, **kwargs) - if callback is not None: - for func in callback: + if processing_callbacks is not None: + for func in processing_callbacks: response = func(response) return idx, response except Exception as e: @@ -163,7 +163,11 @@ async def _fetch_with_semaphore( "Error occurred for response validation during LLM call. Retrying..." ) elif isinstance(e, BadRequestError): - if e.body["innererror"]["code"] == "ResponsibleAIPolicyViolation": + # Check for ResponsibleAIPolicyViolation error on Azure OpenAI + if ( + e.response.json().get("innererror", {}).get("code") + == "ResponsibleAIPolicyViolation" + ): logger.error( "LLM returned a ResponsibleAIPolicyViolation Error. Ignoring this response..." ) @@ -172,7 +176,7 @@ async def _fetch_with_semaphore( _log_llm_error_to_file( _error_lock, log_file, timestamp, chat_history, e, idx ) - return idx, {} # Return empty response for policy violations + return idx, "" # Return empty response for policy violations last_exception = e await asyncio.sleep(retry_delay) # Exponential backoff @@ -180,20 +184,19 @@ async def _fetch_with_semaphore( logger.error( f"Failed to get response for prompt: {prompt} Error: {last_exception}" ) - return idx, {} + return idx, "" async def _run_with_progress_bar( - tasks: list[Coroutine[Any, Any, tuple[int, dict]]], -) -> dict: + tasks: list[Coroutine[Any, Any, tuple[int, str]]], +) -> list[str]: """Run asyncio tasks with a tqdm progress bar.""" # Pre-allocate a list for results to preserve order - results = {} # ""] * len(tasks) + results = [""] * len(tasks) with tqdm(total=len(tasks), desc="Querying an LLM...") as pbar: for coro in asyncio.as_completed(tasks): idx, result = await coro - # results[idx] = result - results.update(result) + results[idx] = result # Update the progress bar pbar.update(1) @@ -207,7 +210,7 @@ def run_parallel_prompts( prompts: list[str], system_prompt: str, max_workers: int = 30, - callback: Any = None, + processing_callbacks: list[Callable[[str], str]] | None = None, **kwargs, ) -> list[str]: """ @@ -218,7 +221,7 @@ def run_parallel_prompts( prompts (list[str]): List of prompts to run concurrently. system_prompt (str): The system prompt. max_workers (int): The maximum number of threads. - callback (Any): Optional callback function to be called with the index and response for each prompt. + processing_callbacks (list[Callable[[str], str]] | None): Optional callback function to be called with the index and response for each prompt. kwargs (dict): Additional arguments for get_response. Returns: diff --git a/src/bigdata_research_tools/search/models.py b/src/bigdata_research_tools/search/models.py new file mode 100644 index 0000000..e9de491 --- /dev/null +++ b/src/bigdata_research_tools/search/models.py @@ -0,0 +1,79 @@ +from bigdata_client.models.entities import QueryComponentMixin +from pydantic import BaseModel + + +class BigdataEntity(BaseModel): + id: str + name: str + volume: int | None = None + description: str | None = None + entity_type: str + company_type: str | None = None + country: str | None = None + sector: str | None = None + industry_group: str | None = None + industry: str | None = None + ticker: str | None = None + webpage: str | None = None + isin_values: list[str] | None = None + cusip_values: list[str] | None = None + sedol_values: list[str] | None = None + listing_values: list[str] | None = None + product_type: str | None = None + product_owner: str | None = None + organization_type: str | None = None + position: str | None = None + employer: str | None = None + nationality: str | None = None + gender: str | None = None + place_type: str | None = None + region: str | None = None + landmark_type: str | None = None + # Fields for concepts that are not covered above + entity_type_name: str | None = None + concept_level_2: str | None = None + concept_level_3: str | None = None + concept_level_4: str | None = None + concept_level_5: str | None = None + + @classmethod + def from_sdk(cls, sdk_entity: QueryComponentMixin) -> "BigdataEntity": + assert getattr(sdk_entity, "id", None) is not None, ( + "SDK entity must have an 'id' attribute" + ) + assert getattr(sdk_entity, "name", None) is not None, ( + "SDK entity must have a 'name' attribute" + ) + return cls( + id=getattr(sdk_entity, "id", None), + name=getattr(sdk_entity, "name", None), + volume=getattr(sdk_entity, "volume", None), + description=getattr(sdk_entity, "description", None), + entity_type=getattr(sdk_entity, "entity_type", None), + company_type=getattr(sdk_entity, "company_type", None), + country=getattr(sdk_entity, "country", None), + sector=getattr(sdk_entity, "sector", None), + industry_group=getattr(sdk_entity, "industry_group", None), + industry=getattr(sdk_entity, "industry", None), + ticker=getattr(sdk_entity, "ticker", None), + webpage=getattr(sdk_entity, "webpage", None), + isin_values=getattr(sdk_entity, "isin_values", None), + cusip_values=getattr(sdk_entity, "cusip_values", None), + sedol_values=getattr(sdk_entity, "sedol_values", None), + listing_values=getattr(sdk_entity, "listing_values", None), + product_type=getattr(sdk_entity, "product_type", None), + product_owner=getattr(sdk_entity, "product_owner", None), + organization_type=getattr(sdk_entity, "organization_type", None), + position=getattr(sdk_entity, "position", None), + employer=getattr(sdk_entity, "employer", None), + nationality=getattr(sdk_entity, "nationality", None), + gender=getattr(sdk_entity, "gender", None), + place_type=getattr(sdk_entity, "place_type", None), + region=getattr(sdk_entity, "region", None), + landmark_type=getattr(sdk_entity, "landmark_type", None), + entity_type_name=getattr(sdk_entity, "entity_type_name", None), + concept_level_2=getattr(sdk_entity, "concept_level_2", None), + concept_level_3=getattr(sdk_entity, "concept_level_3", None), + concept_level_4=getattr(sdk_entity, "concept_level_4", None), + concept_level_5=getattr(sdk_entity, "concept_level_5", None), + ) diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py index 81d6ce9..d7c8711 100644 --- a/src/bigdata_research_tools/search/narrative_search.py +++ b/src/bigdata_research_tools/search/narrative_search.py @@ -1,11 +1,11 @@ from logging import Logger, getLogger from bigdata_client.document import Document -from bigdata_client.models.entities import Concept from bigdata_client.models.search import DocumentType, SortBy from pandas import DataFrame from tqdm import tqdm +from bigdata_research_tools.search.models import BigdataEntity from bigdata_research_tools.search.query_builder import ( EntitiesToSearch, build_batched_query, @@ -120,7 +120,7 @@ def search_narratives( def _process_narrative_search( results: list[Document], - entities: list[Concept], + entities: list[BigdataEntity], ) -> DataFrame: """ Build a dataframe for when no companies are specified. diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py index a0d7c1b..c45f692 100644 --- a/src/bigdata_research_tools/search/screener_search.py +++ b/src/bigdata_research_tools/search/screener_search.py @@ -2,7 +2,7 @@ from logging import Logger, getLogger from bigdata_client.document import Document -from bigdata_client.models.entities import Company, Concept +from bigdata_client.models.entities import Company from bigdata_client.models.search import DocumentType, SortBy from pandas import DataFrame from tqdm import tqdm @@ -12,6 +12,7 @@ get_other_entity_placeholder, get_target_entity_placeholder, ) +from bigdata_research_tools.search.models import BigdataEntity from bigdata_research_tools.search.query_builder import ( EntitiesToSearch, build_batched_query, @@ -192,15 +193,15 @@ def search_by_companies( def filter_company_entities( - entities: list[Concept], -) -> tuple[list[Concept], list[Concept]]: + entities: list[BigdataEntity], +) -> tuple[list[BigdataEntity], list[BigdataEntity]]: """ Filter only COMPANY entities from the list of entities. Args: - entities (List[Concept]): A list of entities to filter. + entities (List[BigdataEntity]): A list of entities to filter. Returns: - List[Concept]: A list of COMPANY entities. + List[BigdataEntity]: A list of COMPANY entities. """ return [ entity @@ -215,8 +216,8 @@ def filter_company_entities( def process_screener_search_results( results: list[Document], - entities: list[Concept], - topics: list[Concept], + entities: list[BigdataEntity], + topics: list[BigdataEntity], companies: list[Company] | None = None, document_type: DocumentType = DocumentType.NEWS, ) -> DataFrame: diff --git a/src/bigdata_research_tools/search/search_utils.py b/src/bigdata_research_tools/search/search_utils.py index ea40bbf..cb0f936 100644 --- a/src/bigdata_research_tools/search/search_utils.py +++ b/src/bigdata_research_tools/search/search_utils.py @@ -7,11 +7,11 @@ from bigdata_client.connection import RequestMaxLimitExceeds from bigdata_client.document import Document from bigdata_client.models.document import DocumentChunk -from bigdata_client.models.entities import Concept from bigdata_client.query_type import QueryType from pydantic import ValidationError from bigdata_research_tools.client import bigdata_connection +from bigdata_research_tools.search.models import BigdataEntity logger: Logger = getLogger(__name__) @@ -38,7 +38,7 @@ def _collect_entity_keys(results: list[Document]) -> list[str]: def _look_up_entities_binary_search( entity_keys: list[str], max_batch_size: int = 50 -) -> list[Concept]: +) -> list[BigdataEntity]: """ Look up entities using the Bigdata Knowledge Graph in a binary search manner. @@ -68,7 +68,7 @@ def depth_first_search(batch: list[str]) -> None: try: batch_lookup = bigdata.knowledge_graph.get_entities(batch) - entities.extend(batch_lookup) + entities.extend([BigdataEntity.from_sdk(ent) for ent in batch_lookup]) except ValidationError as e: non_entities_found = findall(non_entity_key_pattern, str(e)) non_entities.extend(non_entities_found) @@ -105,7 +105,7 @@ def depth_first_search(batch: list[str]) -> None: def filter_search_results( results: list[list[Document]], -) -> tuple[list[Document], list[Concept]]: +) -> tuple[list[Document], list[BigdataEntity]]: """ Postprocess the search results to filter only COMPANY entities. @@ -114,7 +114,7 @@ def filter_search_results( the function `bigdata_research_tools.search.run_search` with the parameter `only_results` set to True Returns: - Tuple[List[Document], List[Concept]]: A tuple of the filtered + Tuple[List[Document], List[BigdataEntity]]: A tuple of the filtered search results and the entities. """ # Flatten the list of result lists @@ -127,7 +127,9 @@ def filter_search_results( return results, entities -def build_chunk_entities(chunk: DocumentChunk, entities: list[Concept]) -> list[dict]: +def build_chunk_entities( + chunk: DocumentChunk, entities: list[BigdataEntity] +) -> list[dict]: entity_key_map = {entity.id: entity for entity in entities} chunk_entities = [ From f1864d36d831546ed08cd247f043538e201c0679 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 17:19:56 +0100 Subject: [PATCH 63/82] Fix type issue --- src/bigdata_research_tools/llm/utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 00334af..4aff38c 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -8,7 +8,16 @@ from pathlib import Path from typing import Any, Callable, Coroutine -from openai import BadRequestError +import httpx + +try: + from openai import BadRequestError # ty: ignore[unresolved-import] +except ImportError: + # Fallback, this wont work for actual OpenAI calls but avoids import errors + class BadRequestError(Exception): + response: httpx.Response + + from tqdm import tqdm from bigdata_research_tools.llm.base import AsyncLLMEngine From 6d7fe23d9a103e3ade931af0e5004f764053b621 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 13 Nov 2025 17:22:55 +0100 Subject: [PATCH 64/82] Fix typing error --- src/bigdata_research_tools/llm/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index 4aff38c..e154bfb 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -8,14 +8,15 @@ from pathlib import Path from typing import Any, Callable, Coroutine -import httpx - try: from openai import BadRequestError # ty: ignore[unresolved-import] except ImportError: # Fallback, this wont work for actual OpenAI calls but avoids import errors + class hasJsonBody: + def json(self) -> dict: + return {} class BadRequestError(Exception): - response: httpx.Response + response: hasJsonBody from tqdm import tqdm From 46750ea5395a47b0b721e46fb338b26e6ce24d89 Mon Sep 17 00:00:00 2001 From: jaldana Date: Fri, 14 Nov 2025 13:54:40 +0100 Subject: [PATCH 65/82] Fixeed json serialization --- src/bigdata_research_tools/labeler/labeler.py | 4 ++-- src/bigdata_research_tools/llm/utils.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index 01e0021..adbb382 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -118,7 +118,7 @@ def _deserialize_label_response(self, response: str) -> str: "motivation": "", "label": self.unknown_label, } - return str(response_mapping) + return json.dumps(response_mapping) def _run_labeling_prompts( self, @@ -202,7 +202,7 @@ def parse_labeling_response(self, response: str) -> str: logger.error(f"Error deserializing response: {response}") return "" - return str(deserialized_response) + return json.dumps(deserialized_response) def get_prompts_for_labeler( self, diff --git a/src/bigdata_research_tools/llm/utils.py b/src/bigdata_research_tools/llm/utils.py index e154bfb..52051fd 100644 --- a/src/bigdata_research_tools/llm/utils.py +++ b/src/bigdata_research_tools/llm/utils.py @@ -15,6 +15,7 @@ class hasJsonBody: def json(self) -> dict: return {} + class BadRequestError(Exception): response: hasJsonBody From f68a7b5e218dd3764f9becd62a3b31dae0f9b904 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Mon, 17 Nov 2025 16:33:42 +0000 Subject: [PATCH 66/82] migrating deep mindmaps --- examples/grounded_mindmaps.py | 109 +++ src/bigdata_research_tools/llm/base.py | 4 +- src/bigdata_research_tools/llm/openai.py | 12 +- .../{tree.py => mindmap/mindmap.py} | 78 +- .../mindmap/mindmap_generator.py | 719 ++++++++++++++++++ .../mindmap/mindmap_utils.py | 121 +++ .../search/query_builder.py | 2 +- src/bigdata_research_tools/search/search.py | 14 +- .../workflows/risk_analyzer.py | 14 +- .../workflows/thematic_screener.py | 2 +- 10 files changed, 1030 insertions(+), 45 deletions(-) create mode 100644 examples/grounded_mindmaps.py rename src/bigdata_research_tools/{tree.py => mindmap/mindmap.py} (86%) create mode 100644 src/bigdata_research_tools/mindmap/mindmap_generator.py create mode 100644 src/bigdata_research_tools/mindmap/mindmap_utils.py diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py new file mode 100644 index 0000000..f032b8d --- /dev/null +++ b/examples/grounded_mindmaps.py @@ -0,0 +1,109 @@ +import logging + +from bigdata_client import Bigdata +from bigdata_client.models.search import DocumentType +from dotenv import load_dotenv +from traitlets import Any + +from bigdata_research_tools.mindmap.mindmap import MindMap +from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator + +# Load environment variables for authentication +print(f"Environment variables loaded: {load_dotenv()}") + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +def test_one_shot_mindmap(main_theme, focus, map_type, instructions, llm_base_config: str = "openai::gpt-4o-mini") -> MindMap: + """Test one-shot mind map generation with base LLM.""" + logger.info("=" * 60) + logger.info("TEST 1: One-Shot Mind Map Generation with Base LLM") + logger.info("=" * 60) + mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config,) + mindmap = mindmap_generator.generate_one_shot( + instructions=instructions, + focus=focus, + main_theme=main_theme, + map_type = map_type, + allow_grounding=False, +) + logger.info("Results: %s", mindmap['mindmap_text']) + return mindmap["mindmap_json"] + + +def test_refined_mindmap(main_theme, focus, map_type, instructions, base_mindmap: str, llm_base_config: str = "openai::o3-mini") -> MindMap: + """Test refined mindmap generation with reasoning LLM sent in the base config.""" + logger.info("=" * 60) + logger.info("TEST 2: Refined MindMap Generation with Reasoning LLM in Base Config") + logger.info("=" * 60) + mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config,) + mindmap = mindmap_generator.generate_refined(focus = focus, + main_theme = main_theme, + initial_mindmap = base_mindmap, + grounding_method = "tool_call", + output_dir = "./refined_mindmaps", + filename = "refined_mindmap.json", + map_type = map_type, + instructions = instructions, + ) + logger.info("Results: %s", mindmap['mindmap_text']) + +def test_refined_mindmap2(main_theme, focus, map_type, instructions, base_mindmap: str, llm_base_config: str | None = None, llm_reasoning_config: str = "openai::o3-mini") -> MindMap: + """Test refined mindmap generation with reasoning LLM sent in the reasoning config.""" + logger.info("=" * 60) + logger.info("TEST 3: Refined MindMap Generation with Reasoning LLM in Reasoning Config") + logger.info("=" * 60) + mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config, llm_model_config_reasoning=llm_reasoning_config) + mindmap = mindmap_generator.generate_refined(focus = focus, + main_theme = main_theme, + initial_mindmap = base_mindmap, + grounding_method = "tool_call", + output_dir = "./refined_mindmaps", + filename = "refined_mindmap.json", + map_type = map_type, + instructions = instructions, + ) + logger.info("Results: %s", mindmap['mindmap_text']) + +def test_dynamic_mindmap(main_theme, focus, map_type, instructions, llm_base_config: str = "openai::gpt-4o-mini", llm_reasoning_config: str = "openai::o3-mini") -> MindMap: + """Test dynamic mindmap generation with two LLMs.""" + logger.info("=" * 60) + logger.info("TEST 4: Dynamic MindMap Generation with Two LLMs") + logger.info("=" * 60) + mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config, llm_model_config_reasoning=llm_reasoning_config) + mindmap = mindmap_generator.generate_dynamic( + instructions = instructions, + focus = focus, + main_theme = main_theme, + month_intervals = [["2025-10-01", "2025-10-31"], ["2025-11-01", "2025-11-30"], ["2025-12-01", "2025-12-31"]], + month_names = ['October_2025', 'November_2025', 'December_2025'],) + logger.info("Results: %s", mindmap['base_mindmap']) + logger.info("Results: %s", mindmap['October_2025']) + logger.info("") + +def main(MAIN_THEME = "Political Change in Japan.", + INSTRUCTIONS = 'Create a mindmap according to a given risk scenario. Map by risk type for any industry and assess short term impact only.', + FOCUS = "Provide a detailed taxonomy of risks related to changes in the Japanese political landscape. Evaluate how the resignation of the Prime Minister and the pre-election of Sanae Takaichi will affect companies, their strategy and operations. Take into consideration their increased conservative stance on immigration, energy, and trade. Add any other risk areas that may arise from these political changes. The mind map should be as comprehensive as possible and cover all major risk areas.", + map_type = 'risk'): + """Run all tests.""" + logger.info("Testing Grounded MindMap Generation") + logger.info("=" * 60) + + try: + # base_mindmap = test_one_shot_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini") + # test_refined_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") + # test_refined_mindmap2(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") + test_dynamic_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini", llm_reasoning_config="openai::o3-mini") + + logger.info("=" * 60) + logger.info("All tests completed successfully") + + except Exception as e: + logger.error("Error during testing: %s", e) + raise + +if __name__ == "__main__": + main() diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 8296733..3dc72fe 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -245,7 +245,6 @@ def get_tools_response( self, chat_history: list[dict[str, str]], tools: list[dict[str, str]], - temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: """ @@ -332,7 +331,6 @@ def get_tools_response( self, chat_history: list[dict[str, str]], tools: list[dict[str, str]], - temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: """ @@ -353,7 +351,7 @@ def get_tools_response( - text (str): The text content of the message, if any. """ return self.provider.get_tools_response( - chat_history, tools, temperature, **kwargs + chat_history, tools, **kwargs ) diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 57152bd..6c4670c 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -59,7 +59,6 @@ async def get_tools_response( self, chat_history: list[dict[str, str]], tools: list[dict[str, str]], - temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: """ @@ -85,7 +84,6 @@ async def get_tools_response( messages=chat_history, model=self.model, tools=tools, - temperature=temperature, **kwargs, ) message = response.choices[0].message @@ -170,9 +168,8 @@ def get_tools_response( self, chat_history: list[dict[str, str]], tools: list[dict[str, str]], - temperature: float = 0, **kwargs, - ) -> dict[str, list[dict] | str]: + ) -> dict[list, list, list[dict] | str, dict]: """ Get the response from an LLM model from OpenAI with tools. Args: @@ -196,19 +193,22 @@ def get_tools_response( messages=chat_history, model=self.model, tools=tools, - temperature=temperature, **kwargs, ) message = response.choices[0].message output = { + "id" : [], "func_names": [], "arguments": [], "text": message.content, - } + "tool_calls": {}} + if function_calls := message.tool_calls if message.tool_calls else None: output = { + "id" : [f.id for f in function_calls], "func_names": [f.function.name for f in function_calls], "arguments": [loads(f.function.arguments) for f in function_calls], + "tool_calls": response.model_dump().get("choices", [])[0].get("message", {}).get("tool_calls", []) } return output diff --git a/src/bigdata_research_tools/tree.py b/src/bigdata_research_tools/mindmap/mindmap.py similarity index 86% rename from src/bigdata_research_tools/tree.py rename to src/bigdata_research_tools/mindmap/mindmap.py index 018451e..c047dc4 100644 --- a/src/bigdata_research_tools/tree.py +++ b/src/bigdata_research_tools/mindmap/mindmap.py @@ -30,7 +30,7 @@ @dataclass -class SemanticTree: +class MindMap: """ A hierarchical tree structure where each node represents a semantically meaningful unit, or node, that guide the analyst's research process. @@ -44,36 +44,36 @@ class SemanticTree: summary (str): A brief explanation of the node's relevance. For the root node, this describes the overall relevance of the tree; for sub-nodes, it explains their connection to the parent node. - children (list[SemanticTree] | None): A list of child nodes representing sub-units. + children (list[MindMap] | None): A list of child nodes representing sub-units. keywords (list[str] | None): A list of keywords summarizing the current node. """ label: str node: int summary: str = "" - children: list["SemanticTree"] = field(default_factory=list) + children: list["MindMap"] = field(default_factory=list) keywords: list[str] = field(default_factory=list) def __str__(self) -> str: return self.as_string() @staticmethod - def from_dict(tree_dict: dict) -> "SemanticTree": + def from_dict(tree_dict: dict) -> "MindMap": """ - Create a SemanticTree object from a dictionary. + Create a MindMap object from a dictionary. Args: - tree_dict (dict): A dictionary representing the SemanticTree structure. + tree_dict (dict): A dictionary representing the MindMap structure. Returns: - SemanticTree: The SemanticTree object generated from the dictionary. + MindMap: The MindMap object generated from the dictionary. """ # Handle case sensitivity in keys tree_dict = dict_keys_to_lowercase(tree_dict) - tree = SemanticTree(**tree_dict) # ty: ignore[missing-argument] + tree = MindMap(**tree_dict) # ty: ignore[missing-argument] tree.children = [ - SemanticTree.from_dict(child) for child in tree_dict.get("children", []) + MindMap.from_dict(child) for child in tree_dict.get("children", []) ] return tree @@ -110,7 +110,7 @@ def get_label_summaries(self) -> dict[str, str]: Extract the label summaries from the tree. Returns: - dict[str, str]: Dictionary with all the labels of the SemanticTree as keys and their associated summaries as values. + dict[str, str]: Dictionary with all the labels of the MindMap as keys and their associated summaries as values. """ label_summary = {self.label: self.summary} for child in self.children: @@ -119,7 +119,7 @@ def get_label_summaries(self) -> dict[str, str]: def get_summaries(self) -> list[str]: """ - Extract the node summaries from a SemanticTree. + Extract the node summaries from a MindMap. Returns: list[str]: List of all 'summary' values in the tree, including its children. @@ -134,7 +134,7 @@ def get_terminal_label_summaries(self) -> dict[str, str]: Extract the items (labels, summaries) from terminal nodes of the tree. Returns: - dict[str, str]: Dictionary with the labels of the SemanticTree as keys and + dict[str, str]: Dictionary with the labels of the MindMap as keys and their associated summaries as values, only using terminal nodes. """ label_summary = {} @@ -214,7 +214,7 @@ def _visualize_graphviz(self) -> graphviz.Digraph: splines="curved", ) - def add_nodes(node: SemanticTree): + def add_nodes(node: MindMap): # Determine if the node is a terminal (leaf) node is_terminal = not node.children @@ -272,7 +272,7 @@ def _visualize_plotly(self) -> None: "please install `bigdata_research_tools[plotly]` to enable them." ) - def extract_labels(node: SemanticTree, parent_label=""): + def extract_labels(node: MindMap, parent_label=""): labels.append(node.label) parents.append(parent_label) for child in node.children: @@ -330,14 +330,44 @@ def save_json(self, filepath: str, **kwargs) -> None: with open(filepath, "w", encoding="utf-8") as f: json.dump(self._to_dict(), f, ensure_ascii=False, indent=2, **kwargs) + def to_rows(self, parent_label=None): + """ + Flatten tree to rows for DataFrame: each row is (Parent, Label, Node, Summary) + """ + rows = [] + rows.append({ + "Parent": parent_label, + "Label": self.label, + "Node": self.node, + "Summary": self.summary + }) + for child in self.children: + rows.extend(child.to_rows(parent_label=self.label)) + return rows + + def to_dataframe(self, leaves_only=False): + import pandas as pd + rows = self.to_rows(parent_label=None) + # Exclude rows where Parent is None or Parent == self.label (root node) + filtered = [row for row in rows if row["Parent"] not in (None, self.label)] + if leaves_only: + # Only keep rows that are leaves (i.e., have no children) + leaf_labels = {row["Label"] for row in filtered} + filtered = [row for row in filtered if row["Label"] not in {r["Parent"] for r in filtered}] + return pd.DataFrame(filtered) + + def to_json(self): + + return json.dumps(self._to_dict(), indent=2) + def generate_theme_tree( main_theme: str, focus: str = "", llm_model_config: LLMConfig | dict | str = "openai::gpt-4o-mini", -) -> SemanticTree: +) -> MindMap: """ - Generate a `SemanticTree` class from a main theme and focus. + Generate a `MindMap` class from a main theme and focus. Args: main_theme (str): The primary theme to analyze. @@ -354,7 +384,7 @@ def generate_theme_tree( - `seed` (int) Returns: - SemanticTree: The generated theme tree. + MindMap: The generated theme tree. """ if isinstance(llm_model_config, dict): llm_model_config = LLMConfig(**llm_model_config) @@ -380,7 +410,7 @@ def generate_theme_tree( tree_str = repair_json(tree_str) tree_dict = ast.literal_eval(tree_str) - return SemanticTree.from_dict(tree_dict) + return MindMap.from_dict(tree_dict) def dict_keys_to_lowercase(d: dict[str, Any]) -> dict[str, Any]: @@ -404,10 +434,10 @@ def dict_keys_to_lowercase(d: dict[str, Any]) -> dict[str, Any]: def stringify_label_summaries(label_summaries: dict[str, str]) -> list[str]: """ - Convert the label summaries of a SemanticTree into a list of strings. + Convert the label summaries of a MindMap into a list of strings. Args: - label_summaries (dict[str, str]): A dictionary of label summaries of SemanticTree. + label_summaries (dict[str, str]): A dictionary of label summaries of MindMap. Expected format: {label: summary}. Returns: List[str]: A list of strings, each one containing a label and its summary, i.e. @@ -420,9 +450,9 @@ def generate_risk_tree( main_theme: str, focus: str = "", llm_model_config: LLMConfig | dict | str = "openai::gpt-4o-mini", -) -> SemanticTree: +) -> MindMap: """ - Generate a `SemanticTree` class from a main theme and analyst focus. + Generate a `MindMap` class from a main theme and analyst focus. Args: main_theme (str): The primary theme to analyze. @@ -440,7 +470,7 @@ def generate_risk_tree( - `seed` (int) Returns: - SemanticTree: The generated theme tree. + MindMap: The generated theme tree. """ if isinstance(llm_model_config, dict): llm_model_config = LLMConfig(**llm_model_config) @@ -465,7 +495,7 @@ def generate_risk_tree( tree_dict = ast.literal_eval(tree_str) - return SemanticTree.from_dict(tree_dict) + return MindMap.from_dict(tree_dict) def get_default_tree_config(llm_model: str) -> LLMConfig: diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py new file mode 100644 index 0000000..a53480c --- /dev/null +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -0,0 +1,719 @@ +from typing import Any, List, Dict, Optional, Tuple +from bigdata_research_tools.llm.base import LLMConfig +from bigdata_research_tools.llm import LLMEngine + +from bigdata_research_tools.search.query_builder import build_batched_query +# from bigdata_research_tools.search.query_builder import ( +# EntitiesToSearch, +# build_batched_query, +# create_date_ranges, +# ) +# cannot use query builder because it is to error-prone to build EntitiesToSearch based on the LLM output +from bigdata_research_tools.search.search import run_search +from bigdata_research_tools.client import bigdata_connection +from bigdata_client.query import ( + Any, + Keyword, + Similarity, +) + +from bigdata_research_tools.mindmap.mindmap_utils import format_mindmap_to_dataframe, save_results_to_file, load_results_from_file, prompts_dict +import os +import json +import re +import json +import ast +from concurrent.futures import ThreadPoolExecutor, as_completed +from tqdm import tqdm +from bigdata_research_tools.mindmap.mindmap import MindMap, get_default_tree_config +from logging import Logger, getLogger +from bigdata_client.models.search import DocumentType, SortBy +from bigdata_client.daterange import RollingDateRange, AbsoluteDateRange +logger: Logger = getLogger(__name__) + +bigdata_tool_description = [{ + "type": "function", + "function": { + "name": "bigdata_search", + "description": "Run a semantic similarity search on news content using Bigdata API.", + "parameters": { + "type": "object", + "properties": { + "search_list": { + "type": "array", + "items": {"type": "string"}, + "description": "The list of strings containing various detailed sentences to search in News documents.", + }, + "entities_list": { + "type": "array", + "items": {"type": "string"}, + "description": "The list of entities (People, Places or Organizations) to focus the search on. They will be added as search context with an OR logic.", + }, + "keywords_list": { + "type": "array", + "items": {"type": "string"}, + "description": "The list of keywords (one or two words defining topics or concepts) to focus the search on. They will be added as search context with an OR logic.", + } + }, + "required": ["search_list", "entities_list", "keywords_list"] + } + } + }] + +class MindMapGenerator: + """ + Core orchestrator for generating, refining, and dynamically evolving mind maps using LLMs and Bigdata search. + + Features: + - One-shot mind map generation (optionally grounded in search results) + - Refined mind map generation (LLM proposes searches to enhance an initial mind map) + - Dynamic mind map evolution over time intervals (each step refines previous map with new search context) + """ + + def __init__(self, + llm_model_config_base: LLMConfig | dict | str = "openai::gpt-4o-mini", + llm_model_config_reasoning: Optional[LLMConfig | dict | str] = None, + ): + """ + Args: + llm_client: Handles LLM chat and tool-calling. + """ + self.bigdata_connection = bigdata_connection() + + llm_model_config_reasoning = llm_model_config_reasoning if llm_model_config_reasoning else llm_model_config_base + + if isinstance(llm_model_config_base, dict): + self.llm_model_config_base = LLMConfig(**llm_model_config_base) + elif isinstance(llm_model_config_base, str): + self.llm_model_config_base = get_default_tree_config(llm_model_config_base) + + if isinstance(llm_model_config_reasoning, dict): + self.llm_model_config_reasoning = LLMConfig(**llm_model_config_reasoning) + elif isinstance(llm_model_config_reasoning, str): + self.llm_model_config_reasoning = get_default_tree_config(llm_model_config_reasoning) + + print(self.llm_model_config_base) + self.llm_base = LLMEngine(model=self.llm_model_config_base.model, **self.llm_model_config_base.connection_config) + print(self.llm_model_config_reasoning) + self.llm_reasoning = LLMEngine(model=self.llm_model_config_reasoning.model, **self.llm_model_config_reasoning.connection_config) + + def _parse_llm_to_themetree(self, mindmap_text: str) -> MindMap: + """ + Parse LLM output (expected to be a valid JSON object) into a MindMap. + Strictly enforce JSON/dict structure, required fields, and allowed keys. If parsing or validation fails, raises an error with details. + """ + import collections.abc + text = mindmap_text.strip() + # Remove code block markers and language tags (minimal cleaning) + text = re.sub(r'^```[a-zA-Z]*\s*', '', text) + text = re.sub(r'```$', '', text) + # Remove accidental language tags at the start (e.g., "json\n{") + text = re.sub(r'^[a-zA-Z]+\s*\n*{', '{', text) + # Remove any prefix before the first { or [ + text = re.sub(r'^[^({\[]*({|\[)', r'\1', text, flags=re.DOTALL) + # Try JSON, then ast.literal_eval + try: + tree_dict = json.loads(text) + except Exception: + try: + tree_dict = ast.literal_eval(text) + except Exception as e: + raise ValueError(f"Failed to parse LLM output as JSON or Python dict.\nRaw output:\n{mindmap_text}\nCLEANED OUTPUT:\n{text}\nError: {e}") + + # --- Strict validation of required fields and allowed keys --- + allowed_keys = {"label", "node", "summary", "children"} + def validate_node(node, path="root"): + if not isinstance(node, dict): + raise ValueError(f"Node at {path} is not a dict: {node}") + # Check for illegal keys + illegal_keys = set(node.keys()) - allowed_keys + if illegal_keys: + raise ValueError(f"Illegal key(s) {illegal_keys} at {path}. Node: {node}") + # Check for required fields + for key in allowed_keys: + if key not in node or node[key] is None: + raise ValueError(f"Missing or null required field '{key}' at {path}. Node: {node}") + if not isinstance(node["children"], list): + raise ValueError(f"'children' field at {path} is not a list. Node: {node}") + for idx, child in enumerate(node["children"]): + validate_node(child, path=f"{path} -> children[{idx}]") + + # Lowercase keys for robustness + def dict_keys_to_lowercase(d): + if isinstance(d, dict): + return {k.lower(): dict_keys_to_lowercase(v) for k, v in d.items()} + elif isinstance(d, list): + return [dict_keys_to_lowercase(i) for i in d] + else: + return d + tree_dict = dict_keys_to_lowercase(tree_dict) + try: + validate_node(tree_dict) + except Exception as e: + raise ValueError(f"Mind map structure validation failed: {e}\nParsed dict:\n{json.dumps(tree_dict, indent=2)}") + try: + theme_tree = MindMap.from_dict(tree_dict) + except Exception as e: + raise ValueError(f"Failed to build ThemeTree from dict: {e}\nParsed dict:\n{json.dumps(tree_dict, indent=2)}") + return theme_tree + + def _themetree_to_dataframe(self, theme_tree: MindMap): + """ + Convert a ThemeTree object to a pandas DataFrame. + """ + try: + df = theme_tree.to_dataframe() + except Exception as e: + raise ValueError(f"Failed to convert ThemeTree to DataFrame: {e}\nThemeTree:\n{theme_tree}") + return df + + def compose_base_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str]) -> list: + # Explicit, step-by-step prompt (robust, as in working repo, minus Keywords) + enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + messages = [ + {"role": "system", "content": f"{instructions} {focus}\n{enforce_structure}"}, + {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)} + ] + return messages + + def compose_tool_call_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str], initial_mindmap: Optional[str]) -> list: + enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + tool_prompt = f"{instructions} {focus} You can use news search to find relevant information about the topic. \nUse the Bigdata API to search for news articles related to the topic and use them to inform your response." + if initial_mindmap: + + tool_prompt+=f"Starting from the following mind map:\n{initial_mindmap}" + + tool_prompt+=f"\nReturn a list of searches you would like to perform to enhance it.\n{enforce_structure}" + + messages = [ + {"role": "system", "content": tool_prompt}, + {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)} + ] + + return messages + + def send_tool_call(self, messages: list, llm_client:LLMEngine, llm_kwargs: dict) -> list: + + llm_kwargs.update({"tool_choice": {"type": "function", "function": {"name": "bigdata_search"}}}) + + response_dict = llm_client.get_tools_response( + messages,tools=bigdata_tool_description, **llm_kwargs) + + try: + if response_dict["tool_calls"] is not None: + + tool_call_id = response_dict["id"][0] + arguments = response_dict["arguments"][0] + search_list = arguments.get("search_list", []) + entities_list = arguments.get("entities_list", []) + keywords_list = arguments.get("keywords_list", []) + return tool_call_id, response_dict["tool_calls"], search_list, entities_list, keywords_list + else: + print("No tool call found in the response.") + + return None, None, response_dict["text"], None, None + except Exception as e: + raise RuntimeError(f"Failed to parse OpenAI tool call response: {e}") + + def compose_final_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str], tool_calls, tool_call_id, context) -> list: + enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + + final_message = [ + {"role": "system", "content": f"{instructions} {focus}. IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant. \n{enforce_structure}"}, + {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)}, + { + "role": "assistant", + "content": None, + "tool_calls": tool_calls + }, + { + "role": "tool", + "tool_call_id": tool_call_id, + "content": context + } + ] + + return final_message + + def compose_refinement_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str], initial_mindmap: str, context: str, tool_calls, tool_call_id) -> list: + + enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + + refine_prompt = ( + f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus} " + "Based on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text." + "IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant." + f"{enforce_structure}." + + ) + refinement_messages = [ + {"role": "system", "content": refine_prompt}, + {"role": "user", "content": initial_mindmap}, + { + "role": "assistant", + "content": None, + "tool_calls": tool_calls + }, + { + "role": "tool", + "tool_call_id": tool_call_id, + "content": context + } + ] + + return refinement_messages + + def generate_one_shot( + self, + focus: str, + main_theme: str, + instructions: Optional[str] = None, + allow_grounding: bool = False, + grounding_method: str = "tool_call", + date_range: Optional[Tuple[str, str]] = None, + map_type: str = "risk", + ) -> Dict[str, Any]: + """ + Generate a mind map in one LLM call, optionally allowing the LLM to request grounding. + If allow_grounding is True, use the specified grounding_method ("tool_call" or "chat"). + Optionally log intermediate steps to disk. + """ + + + messages = self.compose_base_message(main_theme, focus, map_type, instructions) + + llm_kwargs = self.llm_model_config_base.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + if allow_grounding: + if grounding_method == "tool_call": + messages.append({"role": "user", "content": "You can use news search to find relevant information about the topic. " + "Use the Bigdata API to search for news articles related to the topic and use them to inform your response. You will need to specify a list of sentences, a list of entities, and a list of keywords."}) + tool_call_id, tool_calls, search_list, entities_list, keywords_list = self.send_tool_call(messages,self.llm_base, llm_kwargs) + + if search_list and isinstance(search_list, list): + context = self._run_and_collate_search(search_list, entities_list, keywords_list, date_range=date_range) + + final_messages = self.compose_final_message(main_theme, focus, map_type, instructions, tool_calls, tool_call_id, context) + + mindmap_text = self.llm_base.get_response(final_messages) + + theme_tree = self._parse_llm_to_themetree(mindmap_text) + df = self._themetree_to_dataframe(theme_tree) + return { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), ##where does this come from? + "grounded": True, + "search_queries": search_list, + "search_context": context + } + else: + #decide if this fallback should be simplified + mindmap_text = search_list if isinstance(search_list, str) else "" + theme_tree = self._parse_llm_to_themetree(mindmap_text) ## check if correct + df = format_mindmap_to_dataframe(mindmap_text) + return { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "grounded": False + } + else: + #decide if this fallback should be simplified + messages[0]["content"] += ( + " You may request news search to ground your mind map. " + "If you want to search, return a list of queries." + ) + response = self.llm_base.get_response(messages) + + queries = self._parse_queries(response) + + if queries: + context = self._run_and_collate_search(queries, [], []) + + followup_messages = [ + {"role": "system", "content": f"{instructions} {focus}"}, + {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)}, + {"role": "assistant", "content": "News search results:\n" + context} + ] + mindmap_text = self.llm_base.get_response(followup_messages) + + df = format_mindmap_to_dataframe(mindmap_text) + return { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "grounded": True, + "search_queries": queries, + "search_context": context + } + # Default: just generate mind map + mindmap_text = self.llm_base.get_response(messages) + + theme_tree = self._parse_llm_to_themetree(mindmap_text) + df = self._themetree_to_dataframe(theme_tree) + return { + "mindmap_text": mindmap_text, + "mindmap_tree": theme_tree, + "mindmap_json": theme_tree.to_json(), + "mindmap_df": df, + "grounded": False + } + + def generate_refined( + self, + focus: str, + main_theme: str, + initial_mindmap: str, + grounding_method: str = "tool_call", + output_dir:str = "./refined_mindmaps", + filename:str = "refined_mindmap.json", + map_type: str = "risk", + instructions: Optional[str] = None, + search_scope: Optional[Any] = None, + sortby: Optional[Any] = None, + date_range: Optional[Any] = None, + chunk_limit: Optional[int] = 20, + **llm_kwargs + ) -> Dict[str, Any]: + """ + Refine an initial mind map: LLM proposes searches, search is run, LLM refines mind map with search results. + Optionally log intermediate steps to disk. + """ + messages = self.compose_tool_call_message(main_theme, focus, map_type, instructions, initial_mindmap) + llm_kwargs = self.llm_model_config_reasoning.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + if grounding_method == "tool_call": + tool_call_id, tool_calls, search_list, entities_list, keywords_list = self.send_tool_call( + messages,self.llm_reasoning, llm_kwargs=llm_kwargs) + + if search_list and isinstance(search_list, list): + context = self._run_and_collate_search( + search_list, entities_list, keywords_list, search_scope, sortby, date_range, chunk_limit + ) + + refinement_messages = self.compose_refinement_message(main_theme, focus, map_type, instructions, initial_mindmap, context, tool_calls, tool_call_id) + mindmap_text = self.llm_reasoning.get_response(refinement_messages) + + theme_tree = self._parse_llm_to_themetree(mindmap_text) + df = self._themetree_to_dataframe(theme_tree) + result_dict = { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "search_queries": search_list, + "search_context": context + } + save_results_to_file(result_dict, output_dir, filename) + return result_dict + else: + mindmap_text = search_list if isinstance(search_list, str) else "" + df = format_mindmap_to_dataframe(mindmap_text) + result_dict = { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "search_queries": [], + "search_context": "" + } + save_results_to_file(result_dict, output_dir, filename) + return result_dict + else: + queries_json = self.llm_reasoning.get_response(messages) + + search_queries = self._parse_queries(queries_json) + context = self._run_and_collate_search( + search_queries, [], [], search_scope, sortby, date_range, chunk_limit + ) + + refinement_messages = self.compose_refinement_message(main_theme, focus, map_type, instructions, initial_mindmap, context, tool_calls, tool_call_id) + mindmap_text = self.llm_reasoning.get_response(refinement_messages) + + theme_tree = self._parse_llm_to_themetree(mindmap_text) + df = self._themetree_to_dataframe(theme_tree) + result_dict = { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "search_queries": search_queries, + "search_context": context + } + save_results_to_file(result_dict, output_dir, filename) + return result_dict + + def generate_or_load_refined(self, instructions: str, + focus: str, + main_theme: str, + map_type: str, + initial_mindmap: str, + llm_model: str = "o3-mini", + reasoning_effort: str = "high", + search_scope: Any = None, + sortby: Any = None, + date_range: Any = None, + chunk_limit: int = 20, + grounding_method: str = "tool_call", + output_dir:str = "./bootstrapped_mindmaps", + filename: str = "refined_mindmap", + i: int = 0): + if f"{filename}_{i}.json" in os.listdir(output_dir): + result = load_results_from_file(output_dir, f"{filename}_{i}.json") + print(f"Loaded existing result for {filename}_{i}.json") + else: + try: + result = self.generate_refined( + instructions=instructions, + focus=focus, + main_theme=main_theme, + map_type=map_type, + initial_mindmap=initial_mindmap, + reasoning_effort=reasoning_effort, + grounding_method=grounding_method, + date_range=date_range, + output_dir=output_dir, + filename = f"{filename}_{i}.json" + ) + #save_results_to_file(result, output_dir, ) + except Exception as e: + print(e) + result = self.generate_refined( + instructions=instructions, + focus=focus, + main_theme=main_theme, + map_type=map_type, + initial_mindmap=initial_mindmap, + reasoning_effort=reasoning_effort, + grounding_method=grounding_method, + date_range=date_range, + output_dir=output_dir, + filename = f"{filename}_{i}.json" + ) + #save_results_to_file(result, output_dir, f"{filename}_{i}.json") + return result + + def bootstrap_refined(self, instructions: str, + focus: str, + main_theme: str, + map_type: str, + initial_mindmap: str, + search_scope: Any = None, + sortby: Any = None, + date_range: Any = None, + chunk_limit: int = 20, + grounding_method: str = "tool_call", + output_dir: str = "./bootstrapped_mindmaps", + filename: str = "refined_mindmap", + n_elements: int = 50, + max_workers: int = 10): + """ + Generate multiple refined mindmaps in parallel using ThreadPoolExecutor. + + Generates n_elements mindmaps by calling generate_or_load_refined for each index. + Uses a thread pool to parallelize the generation process for better efficiency. + Each mindmap is saved with an index suffix to the output_dir. + + Returns a list of all generated mindmap results. + """ + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + refined_results = [] + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Create a mapping of futures to their corresponding indices + future_to_index = {} + + # Submit all tasks and track which future corresponds to which index + for i in range(n_elements): + future = executor.submit( + self.generate_or_load_refined, + instructions=instructions, + focus=focus, + main_theme=main_theme, + map_type=map_type, + initial_mindmap=initial_mindmap, + search_scope=search_scope, + sortby=sortby, + date_range=date_range, + chunk_limit=chunk_limit, + grounding_method=grounding_method, + output_dir=output_dir, + filename=filename, + i=i + ) + future_to_index[future] = i + + # Process futures as they complete + for future in tqdm( + as_completed(future_to_index), total=n_elements, desc="Bootstrapping Refined Mindmaps..." + ): + i = future_to_index[future] + try: + # Store the result in the list + refined_results.append(future.result()) + except Exception as e: + print(f"Error in generating mindmap {i}: {e}") + + return refined_results + + def generate_dynamic( + self, + instructions: str, + focus: str, + main_theme: str, + month_intervals: List[Tuple[str, str]], + month_names: List[str], + search_scope: Any = None, + sortby: Any = None, + chunk_limit: int = 20, + grounding_method: str = "tool_call", + map_type: str = "risk", + output_dir: str = "./dynamic_mindmaps", + **llm_kwargs + ) -> List[Dict[str, Any]]: + """ + Dynamic/iterative mind map generation over time intervals. + Returns a list of dicts, one per interval. + Each step: generate/refine mind map for the given interval, grounded in search results for that period. + """ + results = {} + # Step 1: Generate initial mind map for t0 + one_shot = self.generate_one_shot( + instructions, focus, main_theme, map_type=map_type, **llm_kwargs + ) + prev_mindmap = one_shot["mindmap_text"] + results['base_mindmap'] = one_shot + # Step 2: For each subsequent interval, refine using previous mind map and new search, including starting month + for i, (interval, month_name) in enumerate(zip(month_intervals, month_names), start=0): + date_range = self._make_absolute_date_range(interval) + refined = self.generate_refined(focus = focus, + main_theme=main_theme, + initial_mindmap=prev_mindmap, + grounding_method=grounding_method, + output_dir=output_dir, + filename=f"{month_name}.json", + map_type=map_type, + instructions=instructions, + search_scope=search_scope, + sortby=sortby, + date_range=date_range, + chunk_limit=chunk_limit, + **llm_kwargs + ) + + results[month_name] = refined + prev_mindmap = refined["mindmap_text"] + return results + + def _run_and_collate_search( + self, + search_list: List[str], + entities_list: List[str], + keywords_list: List[str], + search_scope: Any = None, + sortby: Any = None, + date_range: Any = None, + chunk_limit: int = 20 + ) -> str: + """ + Run Bigdata search for each query and collate results for LLM context. + Uses sensible defaults for scope, sortby, and date_range. + If date_range is a list of one tuple (e.g. [('2025-01-01', '2025-01-31')]), unpacks it. + If date_range is a tuple/list of two strings, converts to AbsoluteDateRange. + """ + + # Set defaults if not provided + scope = search_scope if search_scope is not None else DocumentType.NEWS + sortby = sortby if sortby is not None else SortBy.RELEVANCE + + # --- Robust date_range parsing --- + # If date_range is a list of one tuple, unpack it + if isinstance(date_range, list) and len(date_range) == 1 and isinstance(date_range[0], (tuple, list)) and len(date_range[0]) == 2: + date_range = date_range[0] + # If date_range is a tuple/list of two strings, convert to AbsoluteDateRange + if isinstance(date_range, (tuple, list)) and len(date_range) == 2 and all(isinstance(x, str) for x in date_range): + date_range = AbsoluteDateRange(start=date_range[0], end=date_range[1]) + elif date_range is None: + date_range = RollingDateRange.LAST_THIRTY_DAYS + + if entities_list: + print(f"Entities List: {entities_list}") + entity_objs = [] + for entity_name in entities_list: + try: + entity = self.bigdata_connection.knowledge_graph.autosuggest(entity_name, limit=1)[0] + entity_objs.append(entity) + except Exception as e: + print(f"Warning: Autosuggest failed for '{entity_name}': {e}") + continue + print(f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}") + confirmed_entities = [entity for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name] + if confirmed_entities: + entities = Any(confirmed_entities) + else: + entities = None + else: + entities = None + if keywords_list: + print(f"Searching with keywords: {keywords_list}") + keywords = Any([Keyword(kw) for kw in keywords_list]) + else: + keywords = None + + queries = [Similarity(sentence)&keywords&entities if keywords or entities else Similarity(sentence) for sentence in search_list] + + all_results = run_search(queries=queries, + date_ranges = date_range, + sortby = sortby, + scope = scope, + limit = chunk_limit, + only_results = False, + rerank_threshold = None) + + return self.collate_results(all_results) + + def collate_results(self, results: List[Tuple[str, Any]]) -> str: + """ + Collate a list of (query, result) tuples into a single string for LLM context. + + Args: + results (list): List of (query, result) tuples. + + Returns: + str: Collated string for LLM context. + """ + doctexts = [] + for (text_query, date_range), result in results.items(): + docstr = f"###Query: {text_query}\n ### Results:\n" + for doc in result: + headline = getattr(doc, "headline", "No headline") + docstr += f"## {headline}\n\n##" + docstr += f"Date: {doc.timestamp.strftime('%Y-%m-%d')}\n\n" + if hasattr(doc, "chunks"): + for chunk in doc.chunks: + docstr += f"{chunk.text}\n" + doctexts.append(docstr) + return "\n".join(doctexts) + + @staticmethod + def _parse_queries(self, queries_json: str) -> List[str]: + """ + Parse LLM output (JSON or text) into a list of search queries. + """ + import json + try: + queries = json.loads(queries_json) + if isinstance(queries, list): + return queries + elif isinstance(queries, dict) and "search_list" in queries: + return queries["search_list"] + elif isinstance(queries, dict) and "queries" in queries: + return queries["queries"] + except Exception: + # Fallback: split by lines + return [q.strip() for q in queries_json.splitlines() if q.strip()] + return [] + + @staticmethod + def _make_absolute_date_range(interval: Tuple[str, str]) -> Any: + """ + Helper to create an AbsoluteDateRange object from a (start, end) tuple. + """ + return AbsoluteDateRange(start=interval[0], end=interval[1]) \ No newline at end of file diff --git a/src/bigdata_research_tools/mindmap/mindmap_utils.py b/src/bigdata_research_tools/mindmap/mindmap_utils.py new file mode 100644 index 0000000..7265641 --- /dev/null +++ b/src/bigdata_research_tools/mindmap/mindmap_utils.py @@ -0,0 +1,121 @@ +import pandas as pd +from io import StringIO +import os +import json + +prompts_dict = {'theme':{'qualifier':'Main Theme', + 'user_prompt_message':'Your given Theme is: {main_theme}', + 'enforce_structure_string':("""IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" + "- `node`: an integer representing the unique identifier for the node.\n" + "- `label`: a string for the name of the sub-theme.\n" + "- `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme.\n" + "- For the node referring to the main theme, just define briefly in maximum 15 words the theme.\n" + "- `children`: an array of child nodes.\n" + "Format the JSON object as a nested dictionary. Be careful when specifying keys and items.\n" + "Avoid overlapping labels. Break down joint concepts into unique parents so that each parent represents ONLY ONE concept. AVOID creating branch names such as 'Compliance and Regulatory Risk'. Keep risks separate and create a single branch for each risk, such as 'Compliance Risk' and 'Regulatory Risk', each with their own children.\n" + "Return ONLY the JSON object, with no extra text, explanation, or markdown.\n" + "You MUST use ONLY these field names: label, node, summary, children. Do NOT use underscores, spaces, or any other characters in field names. If you use any other field names, your answer will be rejected.\n" + "## Example Structure:\n" + "**Theme: Global Warming**\n\n" + "{\n" + " \"node\": 1,\n" + " \"label\": \"Global Warming\",\n" + " \"summary\": \"Global Warming is a serious risk\",\n" + " \"children\": [\n" + " {\"node\": 2, \"label\": \"Renewable Energy Adoption\", \"summary\": \"Renewable energy reduces greenhouse gas emissions and thereby global warming and climate change effects\", \"children\": [\n" + " {\"node\": 5, \"label\": \"Solar Energy\", \"summary\": \"Solar energy reduces greenhouse gas emissions\"},\n" + " {\"node\": 6, \"label\": \"Wind Energy\", \"summary\": \"Wind energy reduces greenhouse gas emissions\"},\n" + " {\"node\": 7, \"label\": \"Hydropower\", \"summary\": \"Hydropower reduces greenhouse gas emissions\"}\n" + " ]},\n" + " {\"node\": 3, \"label\": \"Carbon Emission Reduction\", \"summary\": \"Carbon emission reduction decreases greenhouse gases\", \"children\": [\n" + " {\"node\": 8, \"label\": \"Carbon Capture Technology\", \"summary\": \"Carbon capture technology reduces atmospheric CO2\"},\n" + " {\"node\": 9, \"label\": \"Emission Trading Systems\", \"summary\": \"Emission trading systems incentivize reductions in greenhouse gases\"}\n" + " ]}\n" + " ]\n" + "}\n" + """)}, + 'risk':{'qualifier':'Risk Scenario', + 'user_prompt_message':'Your given Risk Scenario is: {main_theme}', + 'enforce_structure_string':( + """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" + " - `node`: an integer representing the unique identifier for the node.\n" + " - `label`: a string for the name of the sub-theme.\n" + " - `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the main theme or risk.\n" + " - `children`: an array of child nodes.\n" + "Format the JSON object as a nested dictionary. Be careful when specifying keys and items.\n" + "Avoid overlapping labels. Break down joint concepts into unique parents so that each parent represents ONLY ONE concept. AVOID creating branch names such as 'Compliance and Regulatory Risk'. Keep risks separate and create a single branch for each risk, such as 'Compliance Risk' and 'Regulatory Risk', each with their own children.\n" + "Return ONLY the JSON object, with no extra text, explanation, or markdown.\n" + "You MUST use ONLY these field names: label, node, summary, children. Do NOT use underscores, spaces, or any other characters in field names. If you use any other field names, your answer will be rejected.\n" + "## Example Structure:\n" + "**Theme: Global Warming**\n\n" + "{\n" + " \"node\": 1,\n" + " \"label\": \"Global Warming\",\n" + " \"summary\": \"Global Warming is a serious risk\",\n" + " \"children\": [\n" + " {\"node\": 2, \"label\": \"Renewable Energy Adoption\", \"summary\": \"Renewable energy reduces greenhouse gas emissions and thereby global warming and climate change effects\", \"children\": [\n" + " {\"node\": 5, \"label\": \"Solar Energy\", \"summary\": \"Solar energy reduces greenhouse gas emissions\"},\n" + " {\"node\": 6, \"label\": \"Wind Energy\", \"summary\": \"Wind energy reduces greenhouse gas emissions\"},\n" + " {\"node\": 7, \"label\": \"Hydropower\", \"summary\": \"Hydropower reduces greenhouse gas emissions\"}\n" + " ]},\n" + " {\"node\": 3, \"label\": \"Carbon Emission Reduction\", \"summary\": \"Carbon emission reduction decreases greenhouse gases\", \"children\": [\n" + " {\"node\": 8, \"label\": \"Carbon Capture Technology\", \"summary\": \"Carbon capture technology reduces atmospheric CO2\"},\n" + " {\"node\": 9, \"label\": \"Emission Trading Systems\", \"summary\": \"Emission trading systems incentivize reductions in greenhouse gases\"}\n" + " ]}\n" + " ]\n" + "}\n" + """) + } +} + +def format_mindmap_to_dataframe(mindmap_text): + """ + Parse a mind map in pipe-delimited table format into a cleaned pandas DataFrame. + Strips whitespace and removes unnamed columns. + + Args: + mindmap_text (str): The mind map content as a string in pipe-delimited format. + + Returns: + pd.DataFrame: A pandas DataFrame containing the cleaned data from the mind map. + + Raises: + ValueError: If the resulting DataFrame does not contain the required columns. + """ + try: + df = pd.read_csv(StringIO(mindmap_text.strip()), sep="|", engine="python", skiprows=[1]) + df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + except Exception as e: + try: + df = pd.read_csv( + StringIO(mindmap_text.strip()), + sep="|", + engine="python", + skiprows=[1], + on_bad_lines='skip' + ) + df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + except Exception as e2: + raise ValueError(f"Failed to parse mindmap text to DataFrame: {e2}") + required_columns = {"Main Branches", "Sub-Branches", "Description"} + if not required_columns.issubset(set(df.columns)): + raise ValueError(f"Missing required columns in mindmap table: {df.columns}") + return df + +def save_results_to_file(results, output_dir, filename): + """ + Save the results to a JSON file. + """ + os.makedirs(output_dir, exist_ok=True) + output_file = os.path.join(output_dir, filename) + + with open(output_file, "w") as f: + json.dump(results, f, default=str, indent=2) + +def load_results_from_file(output_dir, filename): + """ + Load the results from a JSON file. + """ + input_file = os.path.join(output_dir, filename) + with open(input_file, "r") as f: + return json.load(f) \ No newline at end of file diff --git a/src/bigdata_research_tools/search/query_builder.py b/src/bigdata_research_tools/search/query_builder.py index 6b4bf13..a653d82 100644 --- a/src/bigdata_research_tools/search/query_builder.py +++ b/src/bigdata_research_tools/search/query_builder.py @@ -73,7 +73,7 @@ def build_similarity_queries(sentences: list[str]) -> list[QueryComponent]: def build_batched_query( - sentences: list[str], + sentences: list[str] | None, keywords: list[str] | None, entities: EntitiesToSearch | None, control_entities: EntitiesToSearch | None, diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 541fa50..6c7ec06 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -34,6 +34,7 @@ tuple[datetime, datetime], RollingDateRange, list[tuple[datetime, datetime] | RollingDateRange], + AbsoluteDateRange, ] SEARCH_QUERY_RESULTS_TYPE = dict[ tuple[QueryComponent, Union[AbsoluteDateRange, RollingDateRange]], list[Document] @@ -185,7 +186,7 @@ def _search( def concurrent_search( self, queries: list[QueryComponent], - date_ranges: list[tuple[datetime, datetime] | RollingDateRange], + date_ranges: list[tuple[datetime, datetime] | RollingDateRange | AbsoluteDateRange], sortby: SortBy = SortBy.RELEVANCE, scope: DocumentType = DocumentType.ALL, limit: int = 10, @@ -244,7 +245,13 @@ def concurrent_search( as_completed(futures), total=len(futures), desc="Querying Bigdata..." ): query, date_range = futures[future] + try: + if isinstance(date_range, AbsoluteDateRange): + date_range = f"{date_range.start_dt.isoformat()}_{date_range.end_dt.isoformat()}" + elif isinstance(date_range, tuple): + date_range = f"{date_range[0].isoformat()}_{date_range[1].isoformat()}" + results[(query, date_range)] = future.result() except Exception as e: raise e @@ -265,7 +272,7 @@ def get_quota_consumed(self) -> float: def normalize_date_range( date_ranges: INPUT_DATE_RANGE, -) -> list[tuple[datetime, datetime] | RollingDateRange]: +) -> list[tuple[datetime, datetime] | RollingDateRange | AbsoluteDateRange]: if not isinstance(date_ranges, list): date_ranges = [date_ranges] @@ -335,7 +342,8 @@ def run_search( the list of the corresponding search results. """ date_ranges = normalize_date_range(date_ranges) - date_ranges.sort(key=lambda x: x[0]) + if isinstance(date_ranges[0], tuple) or isinstance(date_ranges[0], list): + date_ranges.sort(key=lambda x: x[0]) workflow_start = datetime.now() workflow_status = WorkflowStatus.UNKNOWN diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index bb9873f..9eef2ea 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -17,8 +17,8 @@ WorkflowTraceEvent, send_trace, ) -from bigdata_research_tools.tree import ( - SemanticTree, +from bigdata_research_tools.mindmap.mindmap import ( + MindMap, generate_risk_tree, ) from bigdata_research_tools.workflows.base import Workflow @@ -90,7 +90,7 @@ def __init__( def create_taxonomy(self): """Create a risk taxonomy based on the main theme and focus. Returns: - SemanticTree: The generated risk tree. + MindMap: The generated risk tree. List[str]: A list of risk summaries for the terminal nodes. List[str]: A list of terminal labels for the risk categories. """ @@ -174,7 +174,7 @@ def label_search_results( self, df_sentences, terminal_labels, - risk_tree: SemanticTree, + risk_tree: MindMap, additional_prompt_fields: list[str] | None = None, ): """ @@ -183,7 +183,7 @@ def label_search_results( Args: df_sentences (DataFrame): The DataFrame containing the search results. terminal_labels (List[str]): The terminal labels for the risk categories. - risk_tree (SemanticTree): The SemanticTree object containing the risk taxonomy. + risk_tree (MindMap): The MindMap object containing the risk taxonomy. prompt_fields (Dict): Additional fields to be used in the labeling prompt. Returns: @@ -273,7 +273,7 @@ def save_results( df_company: DataFrame, df_industry: DataFrame, motivation_df: DataFrame, - risk_tree: SemanticTree, + risk_tree: MindMap, export_path: str, ): """ @@ -330,7 +330,7 @@ def screen_companies( - df_company: The DataFrame with the output by company. - df_industry: The DataFrame with the output by industry. - df_motivation: The DataFrame with the generated motivations. - - risk_tree: The SemanticTree created for the screening. + - risk_tree: The MindMap created for the screening. """ if export_path and not check_excel_dependencies(): diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 9e83e7d..cbf62f8 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -17,7 +17,7 @@ WorkflowTraceEvent, send_trace, ) -from bigdata_research_tools.tree import generate_theme_tree +from bigdata_research_tools.mindmap.mindmap import generate_theme_tree from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df From 5a0a50d52f1ddf69607e180a612d334a3c0a7c36 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Mon, 17 Nov 2025 17:37:25 +0000 Subject: [PATCH 67/82] extracting sentence from query --- examples/grounded_mindmaps.py | 6 +++--- src/bigdata_research_tools/mindmap/mindmap_generator.py | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index f032b8d..5e4b8bc 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -93,9 +93,9 @@ def main(MAIN_THEME = "Political Change in Japan.", logger.info("=" * 60) try: - # base_mindmap = test_one_shot_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini") - # test_refined_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") - # test_refined_mindmap2(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") + base_mindmap = test_one_shot_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini") + test_refined_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") + test_refined_mindmap2(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") test_dynamic_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini", llm_reasoning_config="openai::o3-mini") logger.info("=" * 60) diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index a53480c..dc4de88 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -681,7 +681,11 @@ def collate_results(self, results: List[Tuple[str, Any]]) -> str: """ doctexts = [] for (text_query, date_range), result in results.items(): - docstr = f"###Query: {text_query}\n ### Results:\n" + for item in text_query.items: + dictitem = item.to_dict() + if dictitem['type']=='similarity': + sentence = dictitem['value'] + docstr = f"###Query: {sentence}\n ### Results:\n" for doc in result: headline = getattr(doc, "headline", "No headline") docstr += f"## {headline}\n\n##" From ce09f9a252db7217a945d6f39545273434d89b9b Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Tue, 18 Nov 2025 10:01:28 +0000 Subject: [PATCH 68/82] added presentation style visuals --- examples/grounded_mindmaps.py | 8 +- pyproject.toml | 1 + .../mindmap/__init__.py | 4 + src/bigdata_research_tools/mindmap/mindmap.py | 18 +- .../visuals/mindmap_visuals.py | 959 ++++++++++++++++++ uv.lock | 418 +++++++- 6 files changed, 1402 insertions(+), 6 deletions(-) create mode 100644 src/bigdata_research_tools/mindmap/__init__.py create mode 100644 src/bigdata_research_tools/visuals/mindmap_visuals.py diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index 5e4b8bc..52097dd 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -3,10 +3,9 @@ from bigdata_client import Bigdata from bigdata_client.models.search import DocumentType from dotenv import load_dotenv -from traitlets import Any - from bigdata_research_tools.mindmap.mindmap import MindMap from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator +from bigdata_research_tools.visuals.mindmap_visuals import plot_mindmap # Load environment variables for authentication print(f"Environment variables loaded: {load_dotenv()}") @@ -31,7 +30,7 @@ def test_one_shot_mindmap(main_theme, focus, map_type, instructions, llm_base_co allow_grounding=False, ) logger.info("Results: %s", mindmap['mindmap_text']) - return mindmap["mindmap_json"] + return mindmap["mindmap_df"], mindmap["mindmap_json"] def test_refined_mindmap(main_theme, focus, map_type, instructions, base_mindmap: str, llm_base_config: str = "openai::o3-mini") -> MindMap: @@ -93,7 +92,8 @@ def main(MAIN_THEME = "Political Change in Japan.", logger.info("=" * 60) try: - base_mindmap = test_one_shot_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini") + df_mindmap, base_mindmap = test_one_shot_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini") + plot_mindmap(df_mindmap, MAIN_THEME) test_refined_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") test_refined_mindmap2(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") test_dynamic_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini", llm_reasoning_config="openai::o3-mini") diff --git a/pyproject.toml b/pyproject.toml index 209308f..3db1bf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "json-repair>=0.44.1", "tabulate>=0.9.0,<1.0.0", "plotly>=6.0.0,<7.0.0", + "matplotlib>=3.10.6,<4.0.0" ] [project.urls] diff --git a/src/bigdata_research_tools/mindmap/__init__.py b/src/bigdata_research_tools/mindmap/__init__.py new file mode 100644 index 0000000..10b1154 --- /dev/null +++ b/src/bigdata_research_tools/mindmap/__init__.py @@ -0,0 +1,4 @@ +from bigdata_research_tools.mindmap.mindmap import MindMap, generate_theme_tree, generate_risk_tree +from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator + +__all__ = ["MindMap", "MindMapGenerator", "generate_theme_tree", "generate_risk_tree"] diff --git a/src/bigdata_research_tools/mindmap/mindmap.py b/src/bigdata_research_tools/mindmap/mindmap.py index c047dc4..2846a9d 100644 --- a/src/bigdata_research_tools/mindmap/mindmap.py +++ b/src/bigdata_research_tools/mindmap/mindmap.py @@ -192,11 +192,27 @@ def visualize(self, engine: str = "graphviz") -> None: self._visualize_graphviz() elif engine == "plotly": self._visualize_plotly() + elif engine == "matplotlib": + self._visualize_matplotlib() else: raise ValueError( f"Unsupported engine '{engine}'. " - f"Supported engines are 'graphviz' and 'plotly'." + f"Supported engines are 'graphviz', 'plotly', and 'matplotlib'." ) + + def _visualize_matplotlib(self): + """ + Auxiliary function to visualize the tree using Matplotlib. + + Returns: + A Matplotlib Plot rendering the mindmap. + """ + import matplotlib + matplotlib.use('Agg') # Use non-interactive backend + from bigdata_research_tools.visuals.mindmap_visuals import plot_mindmap + + plot_mindmap(self.to_dataframe(), main_theme=self.label) + def _visualize_graphviz(self) -> graphviz.Digraph: """ diff --git a/src/bigdata_research_tools/visuals/mindmap_visuals.py b/src/bigdata_research_tools/visuals/mindmap_visuals.py new file mode 100644 index 0000000..3e1a2f7 --- /dev/null +++ b/src/bigdata_research_tools/visuals/mindmap_visuals.py @@ -0,0 +1,959 @@ +""" +Mindmap Plotter - A graphviz-inspired tool for creating mindmap visualizations. + +Clean, structured layout with proper text fitting and spacing. + +Algorithm Overview: +------------------- +The tool uses a multi-stage layout algorithm to ensure all text fits within rectangles +and the entire mindmap fits within the chart area: + +1. **Layout Calculation Phase**: + - Calculates base font size based on total available area and number of elements + - Determines uniform dimensions for each node level (theme, main branches, sub-branches) + - Applies horizontal and vertical scaling if needed to fit within chart bounds + - All scaling happens BEFORE text fitting to ensure accurate measurements + +2. **Text Fitting Algorithm**: + - Uses binary search (30-40 iterations) to find optimal font size for each text element + - Text wrapping respects word boundaries only (no mid-word breaks) + - Separate fitting strategies for main branches (strict, 88% safety margin) and + sub-branches (prioritizes larger fonts, 90% safety margin, minimal wrapping) + - Measures actual rendered text dimensions on the target figure for accuracy + +3. **Iterative Refinement Loop**: + - Renders the plot and measures actual text dimensions + - Detects overflow (text exceeding rectangle bounds or rectangles exceeding chart area) + - Adjusts font sizes and dimensions iteratively (up to 5 iterations) + - Continues until no overflow is detected + +4. **Boundary Enforcement**: + - Accounts for rectangle border linewidth (extends outward from edges) + - Uses absolute bottom boundary to prevent bottom border cutoff + - Centers sub-branch blocks with their main branch while respecting boundaries + - Clips all elements to axes limits to prevent rendering outside chart area + +The algorithm guarantees no text overflow, no ellipsis, and proper spacing while +maintaining visual hierarchy and readability. +""" +import matplotlib +matplotlib.use('Agg') # Use non-interactive backend +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib import rcParams +import pandas as pd +import numpy as np +import math +import os +from typing import Tuple, Dict, List, Optional +from bigdata_research_tools.mindmap.mindmap import MindMap + +##Note: In case the mindmap overflows from the chart area and edges, you can try to adjust the padding system by shrinking the 'available chart area'. For example, if padding does not work, you can try to have the algorithm think that the available area is 95% of the original available area. In this way, the algorithm will have less space to work with and will try to fit the text within the available area. + +class MindmapPlotter: + """Main class for plotting mindmaps with graphviz-inspired layout.""" + + def __init__( + self, + mindmap: pd.DataFrame | MindMap, + main_theme: str, + title: str = "Mind Map", + color_scheme: str = "gold", + output_dir: str = "./outputs", + aspect_ratio: float = 8/9 + ): + """Initialize the mindmap plotter.""" + if isinstance(mindmap, pd.DataFrame): + self.df = mindmap.copy() + elif isinstance(mindmap, MindMap): + self.df = mindmap.to_dataframe() + + # Handle flexible column names: either (Parent, Label) or (Main Branches, Sub-Branches) + if 'Main Branches' in self.df.columns and 'Sub-Branches' in self.df.columns: + # Already in correct format + pass + elif 'Parent' in self.df.columns and 'Label' in self.df.columns: + # Rename to standard format + self.df = self.df.rename(columns={'Parent': 'Main Branches', 'Label': 'Sub-Branches'}) + else: + # Check what columns we have + has_main = 'Main Branches' in self.df.columns + has_sub = 'Sub-Branches' in self.df.columns + has_parent = 'Parent' in self.df.columns + has_label = 'Label' in self.df.columns + + raise ValueError( + f"DataFrame must have either (Parent, Label) or (Main Branches, Sub-Branches) columns. " + f"Found columns: {list(self.df.columns)}. " + f"Has Main Branches: {has_main}, Has Sub-Branches: {has_sub}, " + f"Has Parent: {has_parent}, Has Label: {has_label}" + ) + + # Assert required columns exist + assert 'Main Branches' in self.df.columns, "Missing 'Main Branches' column after processing" + assert 'Sub-Branches' in self.df.columns, "Missing 'Sub-Branches' column after processing" + + self.df['Main Branches'] = self.df['Main Branches'].astype(str).str.strip() + self.df['Sub-Branches'] = self.df['Sub-Branches'].astype(str).str.strip() + + self.main_theme = main_theme + self.title = title + self.color_scheme = color_scheme + self.output_dir = output_dir + self.aspect_ratio = aspect_ratio + + os.makedirs(output_dir, exist_ok=True) + + plt.rcParams['svg.fonttype'] = 'none' + rcParams['font.family'] = 'DejaVu Sans' + + self.colors = self._get_color_scheme(color_scheme) + + self.main_branches = self.df['Main Branches'].unique() + self.n_main = len(self.main_branches) + + self.sub_branches = { + main: self.df[self.df['Main Branches'] == main]['Sub-Branches'].tolist() + for main in self.main_branches + } + self.n_sub_total = sum(len(subs) for subs in self.sub_branches.values()) + + self.fig_width = 10.0 + self.fig_height = self.fig_width / aspect_ratio + self.title_height = 0 # No title - removed to maximize space + + # Add padding to prevent rectangles from going over chart edges + self.edge_padding = 0.25 # Padding on sides + self.top_padding = 0.1 # Minimal top padding + self.bottom_padding = 0.6 # Extra bottom padding to ensure borders don't get cut (increased further) + # Account for linewidth - borders extend outward by half linewidth on each side + self.max_linewidth = 2.5 # Maximum linewidth used (for main branches/theme) + self.sub_linewidth = 2.0 # Sub-branch linewidth + # Linewidth extends by half on each side, convert points to inches (72 points per inch) + # Be very conservative: use 3x linewidth as padding to ensure borders don't get cut + self.linewidth_padding = (self.max_linewidth / 72.0) * 3.0 # Very conservative padding + self.sub_linewidth_padding = (self.sub_linewidth / 72.0) * 3.0 # Sub-branch linewidth padding + # Use minimal top padding, extra bottom padding + # Reduce available_height to account for linewidth extension at bottom + self.available_height = self.fig_height - self.title_height - self.top_padding - (self.bottom_padding + self.sub_linewidth_padding) + self.available_width = self.fig_width - 2 * (self.edge_padding + self.linewidth_padding) + # Calculate absolute bottom boundary - no rectangle should exceed this + # Use sub_linewidth_padding since sub-branches are at the bottom + self.absolute_bottom = self.fig_height - self.bottom_padding - self.sub_linewidth_padding + + # Increased padding for text inside boxes + self.pad_x = 0.7 # Increased from 0.5 + self.pad_y = 0.7 # Increased from 0.5 + + def _get_color_scheme(self, scheme: str) -> Dict[str, str]: + """Get color scheme dictionary.""" + schemes = { + "gold": { + "edge": "#eab720", + "link": "#eab720", + "background": "none", # Transparent + "text": "black" + }, + "light_blue": { + "edge": "#206EB5", + "link": "#206EB5", + "background": "none", # Transparent + "text": "black" + }, + "dark_blue": { + "edge": "#2C318C", + "link": "#2C318C", + "background": "none", # Transparent + "text": "black" + } + } + return schemes.get(scheme, schemes["gold"]) + + def _measure_text(self, ax, text: str, fontsize: float) -> Tuple[float, float]: + """Measure text dimensions accurately.""" + if not text: + return self.pad_x, self.pad_y + + t = ax.text(0, 0, text, fontsize=fontsize, ha='left', va='bottom') + if not hasattr(ax.figure.canvas, 'renderer'): + ax.figure.canvas.draw() + renderer = ax.figure.canvas.get_renderer() + bbox = t.get_window_extent(renderer=renderer) + t.remove() + + width = bbox.width / ax.figure.dpi + self.pad_x + height = bbox.height / ax.figure.dpi + self.pad_y + + return width, height + + def _wrap_text(self, text: str, max_chars: int) -> str: + """Wrap text at word boundaries.""" + if not text: + return text + + lines = [] + for para in text.split('\n'): + if not para: + lines.append('') + continue + + words = para.split() + if not words: + lines.append('') + continue + + line = words[0] + for word in words[1:]: + if len(line + ' ' + word) <= max_chars: + line += ' ' + word + else: + lines.append(line) + line = word + lines.append(line) + + return '\n'.join(lines) + + def _fit_text_main( + self, + ax, + text: str, + max_width: float, + max_height: float, + initial_font: float, + min_font: float = 9 + ) -> Tuple[float, str, float, float]: + """ + Fit text for main branches - STRICT no overflow. + Returns: fontsize, wrapped_text, actual_width, actual_height + """ + if not text: + return min_font, '', self.pad_x, self.pad_y + + available_w = max(0.1, max_width - self.pad_x) + available_h = max(0.1, max_height - self.pad_y) + + # Binary search for optimal font size + low_font = min_font + high_font = initial_font * 1.3 + best = None + + for _ in range(40): + test_font = (low_font + high_font) / 2.0 + + # Less aggressive wrapping - prioritize font size + chars_per_inch = test_font / 12.0 * 7 # More chars per inch + wrap_chars = max(20, int(available_w * chars_per_inch)) # Minimum 20 chars + + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, test_font) + + if w <= max_width * 0.88 and h <= max_height * 0.88: + best = (test_font, wrapped, w, h) + low_font = test_font + 0.2 + else: + high_font = test_font - 0.2 + + if high_font < low_font: + break + + if best: + return best + + # Fallback: ensure it fits + fontsize = min_font + chars_per_inch = fontsize / 12.0 * 7 + wrap_chars = max(20, int(available_w * chars_per_inch)) + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, fontsize) + + # Keep reducing until it fits - more aggressive + max_attempts = 60 + attempt = 0 + while (w > max_width * 0.88 or h > max_height * 0.88) and fontsize > min_font * 0.7 and attempt < max_attempts: + attempt += 1 + scale = min(max_width * 0.88 / max(0.01, w), max_height * 0.88 / max(0.01, h)) * 0.97 + fontsize = max(min_font * 0.7, fontsize * scale) + chars_per_inch = fontsize / 12.0 * 7 + wrap_chars = max(20, int(available_w * chars_per_inch)) + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, fontsize) + + # Final verification - ensure text actually fits + w, h = self._measure_text(ax, wrapped, fontsize) + if w > max_width * 0.88 or h > max_height * 0.88: + # Force one more reduction + scale = min(max_width * 0.88 / max(0.01, w), max_height * 0.88 / max(0.01, h)) * 0.98 + fontsize = max(min_font * 0.7, fontsize * scale) + chars_per_inch = fontsize / 12.0 * 7 + wrap_chars = max(20, int((max_width - self.pad_x) * chars_per_inch)) + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, fontsize) + + # Final safety check - clip dimensions + w = min(w, max_width * 0.88) + h = min(h, max_height * 0.88) + + return fontsize, wrapped, w, h + + def _fit_text_sub( + self, + ax, + text: str, + max_width: float, + max_height: float, + initial_font: float, + min_font: float = 8 + ) -> Tuple[float, str, float, float]: + """ + Fit text for sub-branches - prioritize large font, minimal wrapping. + Returns: fontsize, wrapped_text, actual_width, actual_height + """ + if not text: + return min_font, '', self.pad_x, self.pad_y + + available_w = max(0.1, max_width - self.pad_x) + available_h = max(0.1, max_height - self.pad_y) + + # Binary search prioritizing larger fonts + low_font = min_font + high_font = initial_font * 1.5 + best = None + + for _ in range(40): + test_font = (low_font + high_font) / 2.0 + + # Much less aggressive wrapping - wide lines preferred + chars_per_inch = test_font / 12.0 * 8 # Even more chars per inch + wrap_chars = max(25, int(available_w * chars_per_inch)) # Minimum 25 chars, prefer wide lines + + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, test_font) + + if w <= max_width * 0.90 and h <= max_height * 0.90: + best = (test_font, wrapped, w, h) + low_font = test_font + 0.3 # Try even larger + else: + high_font = test_font - 0.3 + + if high_font < low_font: + break + + if best: + return best + + # Fallback: use minimum font with wide wrapping + fontsize = min_font + chars_per_inch = fontsize / 12.0 * 8 + wrap_chars = max(25, int(available_w * chars_per_inch)) + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, fontsize) + + # Only reduce if absolutely necessary + max_attempts = 30 + attempt = 0 + while (w > max_width * 0.90 or h > max_height * 0.90) and fontsize > min_font * 0.85 and attempt < max_attempts: + attempt += 1 + scale = min(max_width * 0.90 / max(0.01, w), max_height * 0.90 / max(0.01, h)) * 0.98 + fontsize = max(min_font * 0.85, fontsize * scale) + chars_per_inch = fontsize / 12.0 * 8 + wrap_chars = max(25, int(available_w * chars_per_inch)) + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, fontsize) + + # Final verification - ensure text actually fits + w, h = self._measure_text(ax, wrapped, fontsize) + if w > max_width * 0.90 or h > max_height * 0.90: + # Force one more reduction + scale = min(max_width * 0.90 / max(0.01, w), max_height * 0.90 / max(0.01, h)) * 0.99 + fontsize = max(min_font * 0.85, fontsize * scale) + chars_per_inch = fontsize / 12.0 * 8 + wrap_chars = max(25, int((max_width - self.pad_x) * chars_per_inch)) + wrapped = self._wrap_text(text, wrap_chars) + w, h = self._measure_text(ax, wrapped, fontsize) + + # Final safety check - clip dimensions + w = min(w, max_width * 0.90) + h = min(h, max_height * 0.90) + + return fontsize, wrapped, w, h + + def _calculate_layout(self) -> Dict: + """Calculate complete layout with uniform dimensions and scaling.""" + fig_temp = plt.figure(figsize=(self.fig_width, self.fig_height)) + ax_temp = fig_temp.add_subplot(111) + ax_temp.axis('off') + + # Base font calculation + total_elements = self.n_main + self.n_sub_total + area_per_element = (self.available_height * self.available_width) / max(1, total_elements) + + ref_area = 1.0 + if area_per_element > ref_area: + area_factor = math.pow(area_per_element / ref_area, 0.6) + else: + area_factor = math.pow(area_per_element / ref_area, 0.7) + + area_factor = max(0.4, min(3.5, area_factor)) + base_font = max(8, min(26, 12 * area_factor)) + + # Initial dimensions (will be scaled if needed) + main_spacing = self.available_height * 0.06 + available_h = self.available_height - (self.n_main - 1) * main_spacing + uniform_main_h = max(0.7, min(1.6, available_h / max(1, self.n_main))) + + max_main_len = max([len(m) for m in self.main_branches], default=20) + uniform_main_w = max(1.3, min(2.5, max_main_len * 0.10)) + + # Theme width matches main branch width + theme_w = uniform_main_w + theme_h = uniform_main_h + + # Sub-branch dimensions + max_subs = max([len(subs) for subs in self.sub_branches.values()], default=1) + spacing_extra = main_spacing * 0.4 + available_sub_h = uniform_main_h + 2 * spacing_extra + + if max_subs > 0: + sub_spacing = 0.12 + available_for_subs = available_sub_h - (max_subs - 1) * sub_spacing + uniform_sub_h = max(0.55, min(1.5, available_for_subs / max_subs)) + else: + uniform_sub_h = 0.8 + sub_spacing = 0.12 + + all_subs = [s for subs in self.sub_branches.values() for s in subs] + max_sub_len = max([len(s) for s in all_subs], default=30) if all_subs else 30 + max_w = self.available_width * 0.28 + uniform_sub_w = max(2.2, min(max_w, max_sub_len * 0.12)) + + # Calculate column positions (accounting for edge padding and linewidth) + margin = self.edge_padding + self.linewidth_padding + col_spacing = self.available_width * 0.07 + + x_theme = margin + x_main = x_theme + theme_w + col_spacing + x_sub = x_main + uniform_main_w + col_spacing + total_width_needed = x_sub + uniform_sub_w + margin + + # Horizontal scaling if needed + h_scale = 1.0 + if total_width_needed > self.available_width * 0.96: + h_scale = (self.available_width * 0.96) / total_width_needed + uniform_main_w *= h_scale + theme_w *= h_scale + uniform_sub_w *= h_scale + margin *= h_scale + col_spacing *= h_scale + # Recalculate positions + x_theme = margin + x_main = x_theme + theme_w + col_spacing + x_sub = x_main + uniform_main_w + col_spacing + + # Vertical scaling check + total_h = uniform_main_h * self.n_main + main_spacing * (self.n_main - 1) + v_scale = 1.0 + if total_h > self.available_height * 0.94: + v_scale = (self.available_height * 0.94) / total_h + uniform_main_h *= v_scale + theme_h *= v_scale + uniform_sub_h *= v_scale + main_spacing *= v_scale + sub_spacing *= v_scale + + # Now fit text with final scaled dimensions + # Fit all main branches to find uniform font + main_fonts = [] + for main in self.main_branches: + font, _, _, _ = self._fit_text_main(ax_temp, main, uniform_main_w, uniform_main_h, base_font * 0.9, 10) + main_fonts.append(font) + + uniform_main_font = min(main_fonts) if main_fonts else base_font * 0.9 + + # Fit all main branches with uniform dimensions - STRICT no overflow + main_data = {} + for main in self.main_branches: + font, wrapped, w, h = self._fit_text_main(ax_temp, main, uniform_main_w, uniform_main_h, uniform_main_font, 10) + main_data[main] = { + 'fontsize': font, + 'text': wrapped, + 'width': uniform_main_w, + 'height': uniform_main_h + } + + # Theme layout - use same width as main + theme_font, theme_wrapped, theme_w_actual, theme_h_actual = self._fit_text_main( + ax_temp, self.main_theme, theme_w, theme_h, base_font * 1.0, 11 + ) + # Use target dimensions, not measured + theme_w = uniform_main_w + theme_h = uniform_main_h + + # Fit all sub-branches - prioritize large font, minimal wrapping + sub_fonts = [] + for sub in all_subs: + font, _, _, _ = self._fit_text_sub(ax_temp, sub, uniform_sub_w * 0.94, uniform_sub_h * 0.94, base_font * 1.0, 9) + sub_fonts.append(font) + + uniform_sub_font = min(sub_fonts) if sub_fonts else base_font * 1.0 + + # Fit all sub-branches with uniform dimensions + sub_data = {} + for main in self.main_branches: + subs = self.sub_branches[main] + main_sub_data = [] + + for sub in subs: + font, wrapped, w, h = self._fit_text_sub( + ax_temp, sub, uniform_sub_w * 0.94, uniform_sub_h * 0.94, uniform_sub_font, 9 + ) + + main_sub_data.append({ + 'fontsize': font, + 'text': wrapped, + 'width': uniform_sub_w, + 'height': uniform_sub_h + }) + + sub_data[main] = main_sub_data + + plt.close(fig_temp) + + return { + 'theme': {'fontsize': theme_font, 'text': theme_wrapped, 'width': theme_w, 'height': theme_h}, + 'main_data': main_data, + 'sub_data': sub_data, + 'uniform_main_w': uniform_main_w, + 'uniform_main_h': uniform_main_h, + 'uniform_sub_w': uniform_sub_w, + 'uniform_sub_h': uniform_sub_h, + 'main_spacing': main_spacing, + 'sub_spacing': sub_spacing, + 'margin': margin, + 'col_spacing': col_spacing, + 'x_theme': x_theme, + 'x_main': x_main, + 'x_sub': x_sub + } + + def _check_overflow(self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) -> Dict: + """Check for text and rectangle overflow, return adjustments needed.""" + issues = { + 'theme_overflow': False, + 'main_overflows': {}, + 'sub_overflows': {}, + 'bottom_cutoff': False + } + + # Check theme text overflow + theme_text = ax.text( + x_theme + layout['theme']['width'] / 2, + theme_y + layout['theme']['height'] / 2, + layout['theme']['text'], + ha='center', va='center', fontsize=layout['theme']['fontsize'], + fontweight='bold' + ) + renderer = ax.figure.canvas.get_renderer() + bbox = theme_text.get_window_extent(renderer=renderer) + theme_text.remove() + text_w = bbox.width / ax.figure.dpi + text_h = bbox.height / ax.figure.dpi + if text_w > layout['theme']['width'] * 0.88 or text_h > layout['theme']['height'] * 0.88: + issues['theme_overflow'] = True + + # Check main branch text overflow + for main in self.main_branches: + main_y = main_positions[main] + main_info = layout['main_data'][main] + main_text = ax.text( + x_main + layout['uniform_main_w'] / 2, + main_y + layout['uniform_main_h'] / 2, + main_info['text'], + ha='center', va='center', fontsize=main_info['fontsize'], + fontweight='bold' + ) + bbox = main_text.get_window_extent(renderer=renderer) + main_text.remove() + text_w = bbox.width / ax.figure.dpi + text_h = bbox.height / ax.figure.dpi + if text_w > layout['uniform_main_w'] * 0.88 or text_h > layout['uniform_main_h'] * 0.88: + issues['main_overflows'][main] = True + + # Check sub-branch overflow and bottom cutoff + max_bottom_y = 0 + for main in self.main_branches: + main_y = main_positions[main] + subs = layout['sub_data'].get(main, []) + if subs: + spacing_extra = layout['main_spacing'] * 0.4 + min_sub_y = main_y - spacing_extra + max_sub_y = main_y + layout['uniform_main_h'] + spacing_extra + + sub_spacing = layout['sub_spacing'] + total_sub_h = layout['uniform_sub_h'] * len(subs) + (len(subs) - 1) * sub_spacing + main_center = main_y + layout['uniform_main_h'] / 2 + sub_y_start = main_center - total_sub_h / 2 + + if sub_y_start < min_sub_y: + sub_y_start = min_sub_y + if sub_y_start + total_sub_h > max_sub_y: + sub_y_start = max_sub_y - total_sub_h + # Calculate maximum allowed bottom position (use absolute bottom) + min_top_y = self.top_padding + self.linewidth_padding + + if sub_y_start < min_top_y: + sub_y_start = min_top_y + # Ensure total height doesn't exceed absolute bottom + if sub_y_start + total_sub_h > self.absolute_bottom: + sub_y_start = max(min_top_y, self.absolute_bottom - total_sub_h) + + sub_y = sub_y_start + for i, sub_info in enumerate(subs): + if sub_y + layout['uniform_sub_h'] > max_bottom_y: + issues['bottom_cutoff'] = True + break + + # Check text overflow + sub_text = ax.text( + x_sub + layout['uniform_sub_w'] / 2, + sub_y + layout['uniform_sub_h'] / 2, + sub_info['text'], + ha='center', va='center', fontsize=sub_info['fontsize'], + fontweight='bold' + ) + bbox = sub_text.get_window_extent(renderer=renderer) + sub_text.remove() + text_w = bbox.width / ax.figure.dpi + text_h = bbox.height / ax.figure.dpi + if text_w > layout['uniform_sub_w'] * 0.90 or text_h > layout['uniform_sub_h'] * 0.90: + issues['sub_overflows'][(main, i)] = True + + bottom_y = sub_y + layout['uniform_sub_h'] + if bottom_y > max_bottom_y: + max_bottom_y = bottom_y + + sub_y += layout['uniform_sub_h'] + sub_spacing + if sub_y + layout['uniform_sub_h'] > max_sub_y: + break + + # Check if bottom sub-branch is too close to edge (accounting for padding and linewidth) + if max_bottom_y > self.absolute_bottom - 0.1: + issues['bottom_cutoff'] = True + + return issues + + def plot(self) -> Tuple[plt.Figure, plt.Axes]: + """Create the mindmap plot with iterative refinement.""" + layout = self._calculate_layout() + + # Iterative refinement loop + max_iterations = 5 + for iteration in range(max_iterations): + fig, ax = plt.subplots(figsize=(self.fig_width, self.fig_height)) + ax.axis('off') + + # Use pre-calculated positions from layout + x_theme = layout['x_theme'] + x_main = layout['x_main'] + x_sub = layout['x_sub'] + + # Main branch positions + total_h = layout['uniform_main_h'] * self.n_main + layout['main_spacing'] * (self.n_main - 1) + y_start = (self.available_height - total_h) / 2 + + main_positions = {} + y = y_start + for main in self.main_branches: + main_positions[main] = y + y += layout['uniform_main_h'] + layout['main_spacing'] + + # Theme position (centered with main branches) + theme_y = y_start + (total_h - layout['theme']['height']) / 2 + + # Draw elements to check overflow + self._draw_elements(ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) + + # Ensure figure is drawn for accurate measurement + ax.figure.canvas.draw() + + # Check for overflow + issues = self._check_overflow(ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) + + # If no issues, we're done + if not issues['theme_overflow'] and not issues['main_overflows'] and not issues['sub_overflows'] and not issues['bottom_cutoff']: + plt.close(fig) + break + + # Adjust layout based on issues + if issues['theme_overflow'] or issues['main_overflows'] or issues['bottom_cutoff']: + # Reduce font sizes for theme and main branches + layout = self._adjust_layout_for_overflow(layout, issues, iteration) + plt.close(fig) + continue + + plt.close(fig) + break + + # Final render + fig, ax = plt.subplots(figsize=(self.fig_width, self.fig_height)) + ax.axis('off') + + # Use pre-calculated positions from layout + x_theme = layout['x_theme'] + x_main = layout['x_main'] + x_sub = layout['x_sub'] + + # Main branch positions (accounting for top padding and linewidth) + total_h = layout['uniform_main_h'] * self.n_main + layout['main_spacing'] * (self.n_main - 1) + y_start = self.top_padding + self.linewidth_padding + (self.available_height - total_h) / 2 + + main_positions = {} + y = y_start + for main in self.main_branches: + main_positions[main] = y + y += layout['uniform_main_h'] + layout['main_spacing'] + + # Theme position (centered with main branches) + theme_y = y_start + (total_h - layout['theme']['height']) / 2 + + # Draw all elements + self._draw_elements(ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) + + # Set strict limits to prevent anything from being drawn outside bounds + # Account for linewidth extension - clip everything strictly + ax.set_xlim(0, self.fig_width) + ax.set_ylim(0, self.fig_height) + # Clip all patches and text to axes limits + ax.set_clip_on(True) + + return fig, ax + + def _draw_elements(self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y): + """Draw all mindmap elements.""" + # Draw theme + theme_rect = patches.Rectangle( + (x_theme, theme_y), layout['theme']['width'], layout['theme']['height'], + linewidth=2.5, edgecolor=self.colors['edge'], facecolor=self.colors['background'], zorder=2 + ) + ax.add_patch(theme_rect) + ax.text( + x_theme + layout['theme']['width'] / 2, + theme_y + layout['theme']['height'] / 2, + layout['theme']['text'], + ha='center', va='center', fontsize=layout['theme']['fontsize'], + color=self.colors['text'], fontweight='bold', zorder=3 + ) + + # Draw main branches and sub-branches + for main in self.main_branches: + main_y = main_positions[main] + main_info = layout['main_data'][main] + + # Main branch rectangle + main_rect = patches.Rectangle( + (x_main, main_y), layout['uniform_main_w'], layout['uniform_main_h'], + linewidth=2.5, edgecolor=self.colors['edge'], facecolor=self.colors['background'], zorder=2 + ) + ax.add_patch(main_rect) + ax.text( + x_main + layout['uniform_main_w'] / 2, + main_y + layout['uniform_main_h'] / 2, + main_info['text'], + ha='center', va='center', fontsize=main_info['fontsize'], + color=self.colors['text'], fontweight='bold', zorder=3 + ) + + # Connection theme to main + ax.plot( + [x_theme + layout['theme']['width'], x_main], + [theme_y + layout['theme']['height'] / 2, main_y + layout['uniform_main_h'] / 2], + color=self.colors['link'], linewidth=3, alpha=0.6, zorder=1, solid_capstyle='round' + ) + + # Sub-branches + subs = layout['sub_data'].get(main, []) + if subs: + spacing_extra = layout['main_spacing'] * 0.4 + min_sub_y = main_y - spacing_extra + max_sub_y = main_y + layout['uniform_main_h'] + spacing_extra + # Ensure max_sub_y doesn't exceed absolute bottom + max_sub_y = min(max_sub_y, self.absolute_bottom) + available_sub_h = max_sub_y - min_sub_y + + sub_spacing = layout['sub_spacing'] + total_sub_h = layout['uniform_sub_h'] * len(subs) + (len(subs) - 1) * sub_spacing + + # Adjust spacing if needed + if total_sub_h > available_sub_h: + max_sp = (available_sub_h - layout['uniform_sub_h'] * len(subs)) / max(1, len(subs) - 1) + sub_spacing = max(0.08, min(sub_spacing, max_sp)) + total_sub_h = layout['uniform_sub_h'] * len(subs) + (len(subs) - 1) * sub_spacing + + # Center on main branch - calculate ideal center position + main_center = main_y + layout['uniform_main_h'] / 2 + ideal_sub_y_start = main_center - total_sub_h / 2 + + # Determine available space boundaries (use the most restrictive) + absolute_min = self.top_padding + self.linewidth_padding + absolute_max = self.absolute_bottom + relative_min = min_sub_y + relative_max = max_sub_y + + # Use the most restrictive boundaries + min_top_y = max(absolute_min, relative_min) + max_bottom_y = min(absolute_max, relative_max) + + # Start with ideal centered position + sub_y_start = ideal_sub_y_start + + # If the entire block fits within available space, use centered position + if ideal_sub_y_start >= min_top_y and ideal_sub_y_start + total_sub_h <= max_bottom_y: + sub_y_start = ideal_sub_y_start + else: + # Block doesn't fit centered - adjust to fit while maintaining centering as much as possible + if ideal_sub_y_start < min_top_y: + # Too high - push down to minimum + sub_y_start = min_top_y + elif ideal_sub_y_start + total_sub_h > max_bottom_y: + # Too low - push up to maximum + sub_y_start = max_bottom_y - total_sub_h + # Ensure we don't go below minimum + if sub_y_start < min_top_y: + sub_y_start = min_top_y + + # Final safety check: ensure we don't exceed absolute bottom + if sub_y_start + total_sub_h > absolute_max: + sub_y_start = max(min_top_y, absolute_max - total_sub_h) + + # Draw sub-branches + sub_y = sub_y_start + # Use pre-calculated sub_linewidth_padding + # Use absolute bottom boundary - no rectangle should exceed this + for sub_info in subs: + # STRICT check: rectangle bottom must not exceed absolute bottom + rect_bottom = sub_y + layout['uniform_sub_h'] + + # Don't draw if rectangle itself would exceed absolute bottom + if rect_bottom > self.absolute_bottom: + break # Don't draw this or any subsequent sub-branches + if sub_y + layout['uniform_sub_h'] > max_sub_y: + break + + # Extra safety check - leave margin for linewidth extension + # Linewidth extends by half outward, so ensure rect_bottom + half_linewidth <= absolute_bottom + half_linewidth_extension = (self.sub_linewidth / 2.0) / 72.0 + if rect_bottom + half_linewidth_extension > self.absolute_bottom: + break + + # Sub-branch rectangle - clip to axes to prevent overflow + sub_rect = patches.Rectangle( + (x_sub, sub_y), layout['uniform_sub_w'], layout['uniform_sub_h'], + linewidth=self.sub_linewidth, edgecolor=self.colors['edge'], facecolor=self.colors['background'], zorder=2, + clip_on=True + ) + ax.add_patch(sub_rect) + + # Sub-branch text + ax.text( + x_sub + layout['uniform_sub_w'] / 2, + sub_y + layout['uniform_sub_h'] / 2, + sub_info['text'], + ha='center', va='center', fontsize=sub_info['fontsize'], + color=self.colors['text'], fontweight='bold', zorder=3 + ) + + # Connection main to sub + ax.plot( + [x_main + layout['uniform_main_w'], x_sub], + [main_y + layout['uniform_main_h'] / 2, sub_y + layout['uniform_sub_h'] / 2], + color=self.colors['link'], linewidth=2.5, alpha=0.5, zorder=1, solid_capstyle='round' + ) + + sub_y += layout['uniform_sub_h'] + sub_spacing + + if sub_y + layout['uniform_sub_h'] > max_sub_y: + break + + # Title removed to maximize space for mindmap + + def _adjust_layout_for_overflow(self, layout, issues, iteration): + """Adjust layout to fix overflow issues.""" + # Create temp figure for re-fitting + fig_temp = plt.figure(figsize=(self.fig_width, self.fig_height)) + ax_temp = fig_temp.add_subplot(111) + ax_temp.axis('off') + + # Reduce font sizes more aggressively + reduction_factor = 0.92 - (iteration * 0.02) # More aggressive each iteration + + # Adjust theme + if issues['theme_overflow']: + current_font = layout['theme']['fontsize'] + new_font = max(9, current_font * reduction_factor) + font, wrapped, _, _ = self._fit_text_main( + ax_temp, self.main_theme, layout['theme']['width'], layout['theme']['height'], + new_font, 9 + ) + layout['theme']['fontsize'] = font + layout['theme']['text'] = wrapped + + # Adjust main branches + if issues['main_overflows']: + for main in issues['main_overflows']: + current_font = layout['main_data'][main]['fontsize'] + new_font = max(8, current_font * reduction_factor) + font, wrapped, _, _ = self._fit_text_main( + ax_temp, main, layout['uniform_main_w'], layout['uniform_main_h'], + new_font, 8 + ) + layout['main_data'][main]['fontsize'] = font + layout['main_data'][main]['text'] = wrapped + + # Adjust for bottom cutoff - reduce vertical spacing or sub-branch height + if issues['bottom_cutoff']: + # Reduce sub-branch height slightly + layout['uniform_sub_h'] *= 0.95 + layout['sub_spacing'] *= 0.95 + # Re-fit all sub-branches + for main in self.main_branches: + subs = layout['sub_data'].get(main, []) + for i, sub_info in enumerate(subs): + sub_text = self.sub_branches[main][i] + font, wrapped, _, _ = self._fit_text_sub( + ax_temp, sub_text, layout['uniform_sub_w'] * 0.94, + layout['uniform_sub_h'] * 0.94, layout['sub_data'][main][i]['fontsize'], 8 + ) + layout['sub_data'][main][i]['fontsize'] = font + layout['sub_data'][main][i]['text'] = wrapped + + plt.close(fig_temp) + return layout + + def save(self, fig: plt.Figure): + """Save the figure as PNG and SVG.""" + filename = self.title.replace(' ', '_') + png_path = os.path.join(self.output_dir, f"{filename}.png") + svg_path = os.path.join(self.output_dir, f"{filename}.svg") + + # Don't use bbox_inches='tight' to ensure we stay within figure bounds + # The axes limits are already set correctly in plot() + fig.savefig(png_path, transparent=True, dpi=300) + fig.savefig(svg_path, transparent=True) + + print(f"Saved: {png_path}") + print(f"Saved: {svg_path}") + + +def plot_mindmap( + mindmap: pd.DataFrame | MindMap, + main_theme: str, + title: str = "Mind Map", + color_scheme: str = "gold", + output_dir: str = "./outputs", + aspect_ratio: float = 8/9 +) -> Tuple[plt.Figure, plt.Axes]: + + """Plot a mindmap from a DataFrame.""" + plotter = MindmapPlotter(mindmap, main_theme, title, color_scheme, output_dir, aspect_ratio) + fig, ax = plotter.plot() + plotter.save(fig) + return fig, ax diff --git a/uv.lock b/uv.lock index 0a200bb..ba80bc4 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10, <4.0" resolution-markers = [ "python_full_version >= '3.12'", @@ -222,6 +222,7 @@ dependencies = [ { name = "bigdata-client" }, { name = "graphviz" }, { name = "json-repair" }, + { name = "matplotlib" }, { name = "openpyxl" }, { name = "pandas" }, { name = "pillow" }, @@ -257,6 +258,7 @@ requires-dist = [ { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.24.0,<2.0.0" }, { name = "graphviz", specifier = ">=0.20.3,<0.21.0" }, { name = "json-repair", specifier = ">=0.44.1" }, + { name = "matplotlib", specifier = ">=3.10.6,<4.0.0" }, { name = "openai", marker = "extra == 'azure'", specifier = ">=1.61.1,<2.0.0" }, { name = "openai", marker = "extra == 'openai'", specifier = ">=1.61.1,<2.0.0" }, { name = "openpyxl", specifier = ">=3.1.5,<4.0.0" }, @@ -452,6 +454,162 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "contourpy" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/a3/da4153ec8fe25d263aa48c1a4cbde7f49b59af86f0b6f7862788c60da737/contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934", size = 268551, upload-time = "2025-04-15T17:34:46.581Z" }, + { url = "https://files.pythonhosted.org/packages/2f/6c/330de89ae1087eb622bfca0177d32a7ece50c3ef07b28002de4757d9d875/contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989", size = 253399, upload-time = "2025-04-15T17:34:51.427Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/20c6726b1b7f81a8bee5271bed5c165f0a8e1f572578a9d27e2ccb763cb2/contourpy-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9be002b31c558d1ddf1b9b415b162c603405414bacd6932d031c5b5a8b757f0d", size = 312061, upload-time = "2025-04-15T17:34:55.961Z" }, + { url = "https://files.pythonhosted.org/packages/22/fc/a9665c88f8a2473f823cf1ec601de9e5375050f1958cbb356cdf06ef1ab6/contourpy-1.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2e74acbcba3bfdb6d9d8384cdc4f9260cae86ed9beee8bd5f54fee49a430b9", size = 351956, upload-time = "2025-04-15T17:35:00.992Z" }, + { url = "https://files.pythonhosted.org/packages/25/eb/9f0a0238f305ad8fb7ef42481020d6e20cf15e46be99a1fcf939546a177e/contourpy-1.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e259bced5549ac64410162adc973c5e2fb77f04df4a439d00b478e57a0e65512", size = 320872, upload-time = "2025-04-15T17:35:06.177Z" }, + { url = "https://files.pythonhosted.org/packages/32/5c/1ee32d1c7956923202f00cf8d2a14a62ed7517bdc0ee1e55301227fc273c/contourpy-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad687a04bc802cbe8b9c399c07162a3c35e227e2daccf1668eb1f278cb698631", size = 325027, upload-time = "2025-04-15T17:35:11.244Z" }, + { url = "https://files.pythonhosted.org/packages/83/bf/9baed89785ba743ef329c2b07fd0611d12bfecbedbdd3eeecf929d8d3b52/contourpy-1.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cdd22595308f53ef2f891040ab2b93d79192513ffccbd7fe19be7aa773a5e09f", size = 1306641, upload-time = "2025-04-15T17:35:26.701Z" }, + { url = "https://files.pythonhosted.org/packages/d4/cc/74e5e83d1e35de2d28bd97033426b450bc4fd96e092a1f7a63dc7369b55d/contourpy-1.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4f54d6a2defe9f257327b0f243612dd051cc43825587520b1bf74a31e2f6ef2", size = 1374075, upload-time = "2025-04-15T17:35:43.204Z" }, + { url = "https://files.pythonhosted.org/packages/0c/42/17f3b798fd5e033b46a16f8d9fcb39f1aba051307f5ebf441bad1ecf78f8/contourpy-1.3.2-cp310-cp310-win32.whl", hash = "sha256:f939a054192ddc596e031e50bb13b657ce318cf13d264f095ce9db7dc6ae81c0", size = 177534, upload-time = "2025-04-15T17:35:46.554Z" }, + { url = "https://files.pythonhosted.org/packages/54/ec/5162b8582f2c994721018d0c9ece9dc6ff769d298a8ac6b6a652c307e7df/contourpy-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c440093bbc8fc21c637c03bafcbef95ccd963bc6e0514ad887932c18ca2a759a", size = 221188, upload-time = "2025-04-15T17:35:50.064Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b9/ede788a0b56fc5b071639d06c33cb893f68b1178938f3425debebe2dab78/contourpy-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6a37a2fb93d4df3fc4c0e363ea4d16f83195fc09c891bc8ce072b9d084853445", size = 269636, upload-time = "2025-04-15T17:35:54.473Z" }, + { url = "https://files.pythonhosted.org/packages/e6/75/3469f011d64b8bbfa04f709bfc23e1dd71be54d05b1b083be9f5b22750d1/contourpy-1.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b7cd50c38f500bbcc9b6a46643a40e0913673f869315d8e70de0438817cb7773", size = 254636, upload-time = "2025-04-15T17:35:58.283Z" }, + { url = "https://files.pythonhosted.org/packages/8d/2f/95adb8dae08ce0ebca4fd8e7ad653159565d9739128b2d5977806656fcd2/contourpy-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6658ccc7251a4433eebd89ed2672c2ed96fba367fd25ca9512aa92a4b46c4f1", size = 313053, upload-time = "2025-04-15T17:36:03.235Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a6/8ccf97a50f31adfa36917707fe39c9a0cbc24b3bbb58185577f119736cc9/contourpy-1.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:70771a461aaeb335df14deb6c97439973d253ae70660ca085eec25241137ef43", size = 352985, upload-time = "2025-04-15T17:36:08.275Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b6/7925ab9b77386143f39d9c3243fdd101621b4532eb126743201160ffa7e6/contourpy-1.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65a887a6e8c4cd0897507d814b14c54a8c2e2aa4ac9f7686292f9769fcf9a6ab", size = 323750, upload-time = "2025-04-15T17:36:13.29Z" }, + { url = "https://files.pythonhosted.org/packages/c2/f3/20c5d1ef4f4748e52d60771b8560cf00b69d5c6368b5c2e9311bcfa2a08b/contourpy-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3859783aefa2b8355697f16642695a5b9792e7a46ab86da1118a4a23a51a33d7", size = 326246, upload-time = "2025-04-15T17:36:18.329Z" }, + { url = "https://files.pythonhosted.org/packages/8c/e5/9dae809e7e0b2d9d70c52b3d24cba134dd3dad979eb3e5e71f5df22ed1f5/contourpy-1.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eab0f6db315fa4d70f1d8ab514e527f0366ec021ff853d7ed6a2d33605cf4b83", size = 1308728, upload-time = "2025-04-15T17:36:33.878Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4a/0058ba34aeea35c0b442ae61a4f4d4ca84d6df8f91309bc2d43bb8dd248f/contourpy-1.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d91a3ccc7fea94ca0acab82ceb77f396d50a1f67412efe4c526f5d20264e6ecd", size = 1375762, upload-time = "2025-04-15T17:36:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/09/33/7174bdfc8b7767ef2c08ed81244762d93d5c579336fc0b51ca57b33d1b80/contourpy-1.3.2-cp311-cp311-win32.whl", hash = "sha256:1c48188778d4d2f3d48e4643fb15d8608b1d01e4b4d6b0548d9b336c28fc9b6f", size = 178196, upload-time = "2025-04-15T17:36:55.002Z" }, + { url = "https://files.pythonhosted.org/packages/5e/fe/4029038b4e1c4485cef18e480b0e2cd2d755448bb071eb9977caac80b77b/contourpy-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:5ebac872ba09cb8f2131c46b8739a7ff71de28a24c869bcad554477eb089a878", size = 222017, upload-time = "2025-04-15T17:36:58.576Z" }, + { url = "https://files.pythonhosted.org/packages/34/f7/44785876384eff370c251d58fd65f6ad7f39adce4a093c934d4a67a7c6b6/contourpy-1.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4caf2bcd2969402bf77edc4cb6034c7dd7c0803213b3523f111eb7460a51b8d2", size = 271580, upload-time = "2025-04-15T17:37:03.105Z" }, + { url = "https://files.pythonhosted.org/packages/93/3b/0004767622a9826ea3d95f0e9d98cd8729015768075d61f9fea8eeca42a8/contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82199cb78276249796419fe36b7386bd8d2cc3f28b3bc19fe2454fe2e26c4c15", size = 255530, upload-time = "2025-04-15T17:37:07.026Z" }, + { url = "https://files.pythonhosted.org/packages/e7/bb/7bd49e1f4fa805772d9fd130e0d375554ebc771ed7172f48dfcd4ca61549/contourpy-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:106fab697af11456fcba3e352ad50effe493a90f893fca6c2ca5c033820cea92", size = 307688, upload-time = "2025-04-15T17:37:11.481Z" }, + { url = "https://files.pythonhosted.org/packages/fc/97/e1d5dbbfa170725ef78357a9a0edc996b09ae4af170927ba8ce977e60a5f/contourpy-1.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d14f12932a8d620e307f715857107b1d1845cc44fdb5da2bc8e850f5ceba9f87", size = 347331, upload-time = "2025-04-15T17:37:18.212Z" }, + { url = "https://files.pythonhosted.org/packages/6f/66/e69e6e904f5ecf6901be3dd16e7e54d41b6ec6ae3405a535286d4418ffb4/contourpy-1.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:532fd26e715560721bb0d5fc7610fce279b3699b018600ab999d1be895b09415", size = 318963, upload-time = "2025-04-15T17:37:22.76Z" }, + { url = "https://files.pythonhosted.org/packages/a8/32/b8a1c8965e4f72482ff2d1ac2cd670ce0b542f203c8e1d34e7c3e6925da7/contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b383144cf2d2c29f01a1e8170f50dacf0eac02d64139dcd709a8ac4eb3cfe", size = 323681, upload-time = "2025-04-15T17:37:33.001Z" }, + { url = "https://files.pythonhosted.org/packages/30/c6/12a7e6811d08757c7162a541ca4c5c6a34c0f4e98ef2b338791093518e40/contourpy-1.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c49f73e61f1f774650a55d221803b101d966ca0c5a2d6d5e4320ec3997489441", size = 1308674, upload-time = "2025-04-15T17:37:48.64Z" }, + { url = "https://files.pythonhosted.org/packages/2a/8a/bebe5a3f68b484d3a2b8ffaf84704b3e343ef1addea528132ef148e22b3b/contourpy-1.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d80b2c0300583228ac98d0a927a1ba6a2ba6b8a742463c564f1d419ee5b211e", size = 1380480, upload-time = "2025-04-15T17:38:06.7Z" }, + { url = "https://files.pythonhosted.org/packages/34/db/fcd325f19b5978fb509a7d55e06d99f5f856294c1991097534360b307cf1/contourpy-1.3.2-cp312-cp312-win32.whl", hash = "sha256:90df94c89a91b7362e1142cbee7568f86514412ab8a2c0d0fca72d7e91b62912", size = 178489, upload-time = "2025-04-15T17:38:10.338Z" }, + { url = "https://files.pythonhosted.org/packages/01/c8/fadd0b92ffa7b5eb5949bf340a63a4a496a6930a6c37a7ba0f12acb076d6/contourpy-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c942a01d9163e2e5cfb05cb66110121b8d07ad438a17f9e766317bcb62abf73", size = 223042, upload-time = "2025-04-15T17:38:14.239Z" }, + { url = "https://files.pythonhosted.org/packages/2e/61/5673f7e364b31e4e7ef6f61a4b5121c5f170f941895912f773d95270f3a2/contourpy-1.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:de39db2604ae755316cb5967728f4bea92685884b1e767b7c24e983ef5f771cb", size = 271630, upload-time = "2025-04-15T17:38:19.142Z" }, + { url = "https://files.pythonhosted.org/packages/ff/66/a40badddd1223822c95798c55292844b7e871e50f6bfd9f158cb25e0bd39/contourpy-1.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f9e896f447c5c8618f1edb2bafa9a4030f22a575ec418ad70611450720b5b08", size = 255670, upload-time = "2025-04-15T17:38:23.688Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c7/cf9fdee8200805c9bc3b148f49cb9482a4e3ea2719e772602a425c9b09f8/contourpy-1.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71e2bd4a1c4188f5c2b8d274da78faab884b59df20df63c34f74aa1813c4427c", size = 306694, upload-time = "2025-04-15T17:38:28.238Z" }, + { url = "https://files.pythonhosted.org/packages/dd/e7/ccb9bec80e1ba121efbffad7f38021021cda5be87532ec16fd96533bb2e0/contourpy-1.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de425af81b6cea33101ae95ece1f696af39446db9682a0b56daaa48cfc29f38f", size = 345986, upload-time = "2025-04-15T17:38:33.502Z" }, + { url = "https://files.pythonhosted.org/packages/dc/49/ca13bb2da90391fa4219fdb23b078d6065ada886658ac7818e5441448b78/contourpy-1.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:977e98a0e0480d3fe292246417239d2d45435904afd6d7332d8455981c408b85", size = 318060, upload-time = "2025-04-15T17:38:38.672Z" }, + { url = "https://files.pythonhosted.org/packages/c8/65/5245ce8c548a8422236c13ffcdcdada6a2a812c361e9e0c70548bb40b661/contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:434f0adf84911c924519d2b08fc10491dd282b20bdd3fa8f60fd816ea0b48841", size = 322747, upload-time = "2025-04-15T17:38:43.712Z" }, + { url = "https://files.pythonhosted.org/packages/72/30/669b8eb48e0a01c660ead3752a25b44fdb2e5ebc13a55782f639170772f9/contourpy-1.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c66c4906cdbc50e9cba65978823e6e00b45682eb09adbb78c9775b74eb222422", size = 1308895, upload-time = "2025-04-15T17:39:00.224Z" }, + { url = "https://files.pythonhosted.org/packages/05/5a/b569f4250decee6e8d54498be7bdf29021a4c256e77fe8138c8319ef8eb3/contourpy-1.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b7fc0cd78ba2f4695fd0a6ad81a19e7e3ab825c31b577f384aa9d7817dc3bef", size = 1379098, upload-time = "2025-04-15T17:43:29.649Z" }, + { url = "https://files.pythonhosted.org/packages/19/ba/b227c3886d120e60e41b28740ac3617b2f2b971b9f601c835661194579f1/contourpy-1.3.2-cp313-cp313-win32.whl", hash = "sha256:15ce6ab60957ca74cff444fe66d9045c1fd3e92c8936894ebd1f3eef2fff075f", size = 178535, upload-time = "2025-04-15T17:44:44.532Z" }, + { url = "https://files.pythonhosted.org/packages/12/6e/2fed56cd47ca739b43e892707ae9a13790a486a3173be063681ca67d2262/contourpy-1.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e1578f7eafce927b168752ed7e22646dad6cd9bca673c60bff55889fa236ebf9", size = 223096, upload-time = "2025-04-15T17:44:48.194Z" }, + { url = "https://files.pythonhosted.org/packages/54/4c/e76fe2a03014a7c767d79ea35c86a747e9325537a8b7627e0e5b3ba266b4/contourpy-1.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0475b1f6604896bc7c53bb070e355e9321e1bc0d381735421a2d2068ec56531f", size = 285090, upload-time = "2025-04-15T17:43:34.084Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e2/5aba47debd55d668e00baf9651b721e7733975dc9fc27264a62b0dd26eb8/contourpy-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c85bb486e9be652314bb5b9e2e3b0d1b2e643d5eec4992c0fbe8ac71775da739", size = 268643, upload-time = "2025-04-15T17:43:38.626Z" }, + { url = "https://files.pythonhosted.org/packages/a1/37/cd45f1f051fe6230f751cc5cdd2728bb3a203f5619510ef11e732109593c/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:745b57db7758f3ffc05a10254edd3182a2a83402a89c00957a8e8a22f5582823", size = 310443, upload-time = "2025-04-15T17:43:44.522Z" }, + { url = "https://files.pythonhosted.org/packages/8b/a2/36ea6140c306c9ff6dd38e3bcec80b3b018474ef4d17eb68ceecd26675f4/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:970e9173dbd7eba9b4e01aab19215a48ee5dd3f43cef736eebde064a171f89a5", size = 349865, upload-time = "2025-04-15T17:43:49.545Z" }, + { url = "https://files.pythonhosted.org/packages/95/b7/2fc76bc539693180488f7b6cc518da7acbbb9e3b931fd9280504128bf956/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c4639a9c22230276b7bffb6a850dfc8258a2521305e1faefe804d006b2e532", size = 321162, upload-time = "2025-04-15T17:43:54.203Z" }, + { url = "https://files.pythonhosted.org/packages/f4/10/76d4f778458b0aa83f96e59d65ece72a060bacb20cfbee46cf6cd5ceba41/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc829960f34ba36aad4302e78eabf3ef16a3a100863f0d4eeddf30e8a485a03b", size = 327355, upload-time = "2025-04-15T17:44:01.025Z" }, + { url = "https://files.pythonhosted.org/packages/43/a3/10cf483ea683f9f8ab096c24bad3cce20e0d1dd9a4baa0e2093c1c962d9d/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d32530b534e986374fc19eaa77fcb87e8a99e5431499949b828312bdcd20ac52", size = 1307935, upload-time = "2025-04-15T17:44:17.322Z" }, + { url = "https://files.pythonhosted.org/packages/78/73/69dd9a024444489e22d86108e7b913f3528f56cfc312b5c5727a44188471/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e298e7e70cf4eb179cc1077be1c725b5fd131ebc81181bf0c03525c8abc297fd", size = 1372168, upload-time = "2025-04-15T17:44:33.43Z" }, + { url = "https://files.pythonhosted.org/packages/0f/1b/96d586ccf1b1a9d2004dd519b25fbf104a11589abfd05484ff12199cca21/contourpy-1.3.2-cp313-cp313t-win32.whl", hash = "sha256:d0e589ae0d55204991450bb5c23f571c64fe43adaa53f93fc902a84c96f52fe1", size = 189550, upload-time = "2025-04-15T17:44:37.092Z" }, + { url = "https://files.pythonhosted.org/packages/b0/e6/6000d0094e8a5e32ad62591c8609e269febb6e4db83a1c75ff8868b42731/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69", size = 238214, upload-time = "2025-04-15T17:44:40.827Z" }, + { url = "https://files.pythonhosted.org/packages/33/05/b26e3c6ecc05f349ee0013f0bb850a761016d89cec528a98193a48c34033/contourpy-1.3.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fd93cc7f3139b6dd7aab2f26a90dde0aa9fc264dbf70f6740d498a70b860b82c", size = 265681, upload-time = "2025-04-15T17:44:59.314Z" }, + { url = "https://files.pythonhosted.org/packages/2b/25/ac07d6ad12affa7d1ffed11b77417d0a6308170f44ff20fa1d5aa6333f03/contourpy-1.3.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:107ba8a6a7eec58bb475329e6d3b95deba9440667c4d62b9b6063942b61d7f16", size = 315101, upload-time = "2025-04-15T17:45:04.165Z" }, + { url = "https://files.pythonhosted.org/packages/8f/4d/5bb3192bbe9d3f27e3061a6a8e7733c9120e203cb8515767d30973f71030/contourpy-1.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ded1706ed0c1049224531b81128efbd5084598f18d8a2d9efae833edbd2b40ad", size = 220599, upload-time = "2025-04-15T17:45:08.456Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c0/91f1215d0d9f9f343e4773ba6c9b89e8c0cc7a64a6263f21139da639d848/contourpy-1.3.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5f5964cdad279256c084b69c3f412b7801e15356b16efa9d78aa974041903da0", size = 266807, upload-time = "2025-04-15T17:45:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/d4/79/6be7e90c955c0487e7712660d6cead01fa17bff98e0ea275737cc2bc8e71/contourpy-1.3.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49b65a95d642d4efa8f64ba12558fcb83407e58a2dfba9d796d77b63ccfcaff5", size = 318729, upload-time = "2025-04-15T17:45:20.166Z" }, + { url = "https://files.pythonhosted.org/packages/87/68/7f46fb537958e87427d98a4074bcde4b67a70b04900cfc5ce29bc2f556c1/contourpy-1.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8c5acb8dddb0752bf252e01a3035b21443158910ac16a3b0d20e7fed7d534ce5", size = 221791, upload-time = "2025-04-15T17:45:24.794Z" }, +] + +[[package]] +name = "contourpy" +version = "1.3.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/2e/c4390a31919d8a78b90e8ecf87cd4b4c4f05a5b48d05ec17db8e5404c6f4/contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1", size = 288773, upload-time = "2025-07-26T12:01:02.277Z" }, + { url = "https://files.pythonhosted.org/packages/0d/44/c4b0b6095fef4dc9c420e041799591e3b63e9619e3044f7f4f6c21c0ab24/contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381", size = 270149, upload-time = "2025-07-26T12:01:04.072Z" }, + { url = "https://files.pythonhosted.org/packages/30/2e/dd4ced42fefac8470661d7cb7e264808425e6c5d56d175291e93890cce09/contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7", size = 329222, upload-time = "2025-07-26T12:01:05.688Z" }, + { url = "https://files.pythonhosted.org/packages/f2/74/cc6ec2548e3d276c71389ea4802a774b7aa3558223b7bade3f25787fafc2/contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1", size = 377234, upload-time = "2025-07-26T12:01:07.054Z" }, + { url = "https://files.pythonhosted.org/packages/03/b3/64ef723029f917410f75c09da54254c5f9ea90ef89b143ccadb09df14c15/contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a", size = 380555, upload-time = "2025-07-26T12:01:08.801Z" }, + { url = "https://files.pythonhosted.org/packages/5f/4b/6157f24ca425b89fe2eb7e7be642375711ab671135be21e6faa100f7448c/contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db", size = 355238, upload-time = "2025-07-26T12:01:10.319Z" }, + { url = "https://files.pythonhosted.org/packages/98/56/f914f0dd678480708a04cfd2206e7c382533249bc5001eb9f58aa693e200/contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620", size = 1326218, upload-time = "2025-07-26T12:01:12.659Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d7/4a972334a0c971acd5172389671113ae82aa7527073980c38d5868ff1161/contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f", size = 1392867, upload-time = "2025-07-26T12:01:15.533Z" }, + { url = "https://files.pythonhosted.org/packages/75/3e/f2cc6cd56dc8cff46b1a56232eabc6feea52720083ea71ab15523daab796/contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff", size = 183677, upload-time = "2025-07-26T12:01:17.088Z" }, + { url = "https://files.pythonhosted.org/packages/98/4b/9bd370b004b5c9d8045c6c33cf65bae018b27aca550a3f657cdc99acdbd8/contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42", size = 225234, upload-time = "2025-07-26T12:01:18.256Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b6/71771e02c2e004450c12b1120a5f488cad2e4d5b590b1af8bad060360fe4/contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470", size = 193123, upload-time = "2025-07-26T12:01:19.848Z" }, + { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" }, + { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" }, + { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" }, + { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" }, + { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" }, + { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" }, + { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" }, + { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" }, + { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" }, + { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" }, + { url = "https://files.pythonhosted.org/packages/68/35/0167aad910bbdb9599272bd96d01a9ec6852f36b9455cf2ca67bd4cc2d23/contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5", size = 293257, upload-time = "2025-07-26T12:01:39.367Z" }, + { url = "https://files.pythonhosted.org/packages/96/e4/7adcd9c8362745b2210728f209bfbcf7d91ba868a2c5f40d8b58f54c509b/contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1", size = 274034, upload-time = "2025-07-26T12:01:40.645Z" }, + { url = "https://files.pythonhosted.org/packages/73/23/90e31ceeed1de63058a02cb04b12f2de4b40e3bef5e082a7c18d9c8ae281/contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286", size = 334672, upload-time = "2025-07-26T12:01:41.942Z" }, + { url = "https://files.pythonhosted.org/packages/ed/93/b43d8acbe67392e659e1d984700e79eb67e2acb2bd7f62012b583a7f1b55/contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5", size = 381234, upload-time = "2025-07-26T12:01:43.499Z" }, + { url = "https://files.pythonhosted.org/packages/46/3b/bec82a3ea06f66711520f75a40c8fc0b113b2a75edb36aa633eb11c4f50f/contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67", size = 385169, upload-time = "2025-07-26T12:01:45.219Z" }, + { url = "https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9", size = 362859, upload-time = "2025-07-26T12:01:46.519Z" }, + { url = "https://files.pythonhosted.org/packages/33/71/e2a7945b7de4e58af42d708a219f3b2f4cff7386e6b6ab0a0fa0033c49a9/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659", size = 1332062, upload-time = "2025-07-26T12:01:48.964Z" }, + { url = "https://files.pythonhosted.org/packages/12/fc/4e87ac754220ccc0e807284f88e943d6d43b43843614f0a8afa469801db0/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7", size = 1403932, upload-time = "2025-07-26T12:01:51.979Z" }, + { url = "https://files.pythonhosted.org/packages/a6/2e/adc197a37443f934594112222ac1aa7dc9a98faf9c3842884df9a9d8751d/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d", size = 185024, upload-time = "2025-07-26T12:01:53.245Z" }, + { url = "https://files.pythonhosted.org/packages/18/0b/0098c214843213759692cc638fce7de5c289200a830e5035d1791d7a2338/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263", size = 226578, upload-time = "2025-07-26T12:01:54.422Z" }, + { url = "https://files.pythonhosted.org/packages/8a/9a/2f6024a0c5995243cd63afdeb3651c984f0d2bc727fd98066d40e141ad73/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9", size = 193524, upload-time = "2025-07-26T12:01:55.73Z" }, + { url = "https://files.pythonhosted.org/packages/c0/b3/f8a1a86bd3298513f500e5b1f5fd92b69896449f6cab6a146a5d52715479/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d", size = 306730, upload-time = "2025-07-26T12:01:57.051Z" }, + { url = "https://files.pythonhosted.org/packages/3f/11/4780db94ae62fc0c2053909b65dc3246bd7cecfc4f8a20d957ad43aa4ad8/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216", size = 287897, upload-time = "2025-07-26T12:01:58.663Z" }, + { url = "https://files.pythonhosted.org/packages/ae/15/e59f5f3ffdd6f3d4daa3e47114c53daabcb18574a26c21f03dc9e4e42ff0/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae", size = 326751, upload-time = "2025-07-26T12:02:00.343Z" }, + { url = "https://files.pythonhosted.org/packages/0f/81/03b45cfad088e4770b1dcf72ea78d3802d04200009fb364d18a493857210/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20", size = 375486, upload-time = "2025-07-26T12:02:02.128Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ba/49923366492ffbdd4486e970d421b289a670ae8cf539c1ea9a09822b371a/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99", size = 388106, upload-time = "2025-07-26T12:02:03.615Z" }, + { url = "https://files.pythonhosted.org/packages/9f/52/5b00ea89525f8f143651f9f03a0df371d3cbd2fccd21ca9b768c7a6500c2/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b", size = 352548, upload-time = "2025-07-26T12:02:05.165Z" }, + { url = "https://files.pythonhosted.org/packages/32/1d/a209ec1a3a3452d490f6b14dd92e72280c99ae3d1e73da74f8277d4ee08f/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a", size = 1322297, upload-time = "2025-07-26T12:02:07.379Z" }, + { url = "https://files.pythonhosted.org/packages/bc/9e/46f0e8ebdd884ca0e8877e46a3f4e633f6c9c8c4f3f6e72be3fe075994aa/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e", size = 1391023, upload-time = "2025-07-26T12:02:10.171Z" }, + { url = "https://files.pythonhosted.org/packages/b9/70/f308384a3ae9cd2209e0849f33c913f658d3326900d0ff5d378d6a1422d2/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3", size = 196157, upload-time = "2025-07-26T12:02:11.488Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dd/880f890a6663b84d9e34a6f88cded89d78f0091e0045a284427cb6b18521/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8", size = 240570, upload-time = "2025-07-26T12:02:12.754Z" }, + { url = "https://files.pythonhosted.org/packages/80/99/2adc7d8ffead633234817ef8e9a87115c8a11927a94478f6bb3d3f4d4f7d/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301", size = 199713, upload-time = "2025-07-26T12:02:14.4Z" }, + { url = "https://files.pythonhosted.org/packages/72/8b/4546f3ab60f78c514ffb7d01a0bd743f90de36f0019d1be84d0a708a580a/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a", size = 292189, upload-time = "2025-07-26T12:02:16.095Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e1/3542a9cb596cadd76fcef413f19c79216e002623158befe6daa03dbfa88c/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77", size = 273251, upload-time = "2025-07-26T12:02:17.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/71/f93e1e9471d189f79d0ce2497007731c1e6bf9ef6d1d61b911430c3db4e5/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5", size = 335810, upload-time = "2025-07-26T12:02:18.9Z" }, + { url = "https://files.pythonhosted.org/packages/91/f9/e35f4c1c93f9275d4e38681a80506b5510e9327350c51f8d4a5a724d178c/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4", size = 382871, upload-time = "2025-07-26T12:02:20.418Z" }, + { url = "https://files.pythonhosted.org/packages/b5/71/47b512f936f66a0a900d81c396a7e60d73419868fba959c61efed7a8ab46/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36", size = 386264, upload-time = "2025-07-26T12:02:21.916Z" }, + { url = "https://files.pythonhosted.org/packages/04/5f/9ff93450ba96b09c7c2b3f81c94de31c89f92292f1380261bd7195bea4ea/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3", size = 363819, upload-time = "2025-07-26T12:02:23.759Z" }, + { url = "https://files.pythonhosted.org/packages/3e/a6/0b185d4cc480ee494945cde102cb0149ae830b5fa17bf855b95f2e70ad13/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b", size = 1333650, upload-time = "2025-07-26T12:02:26.181Z" }, + { url = "https://files.pythonhosted.org/packages/43/d7/afdc95580ca56f30fbcd3060250f66cedbde69b4547028863abd8aa3b47e/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36", size = 1404833, upload-time = "2025-07-26T12:02:28.782Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e2/366af18a6d386f41132a48f033cbd2102e9b0cf6345d35ff0826cd984566/contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d", size = 189692, upload-time = "2025-07-26T12:02:30.128Z" }, + { url = "https://files.pythonhosted.org/packages/7d/c2/57f54b03d0f22d4044b8afb9ca0e184f8b1afd57b4f735c2fa70883dc601/contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd", size = 232424, upload-time = "2025-07-26T12:02:31.395Z" }, + { url = "https://files.pythonhosted.org/packages/18/79/a9416650df9b525737ab521aa181ccc42d56016d2123ddcb7b58e926a42c/contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339", size = 198300, upload-time = "2025-07-26T12:02:32.956Z" }, + { url = "https://files.pythonhosted.org/packages/1f/42/38c159a7d0f2b7b9c04c64ab317042bb6952b713ba875c1681529a2932fe/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772", size = 306769, upload-time = "2025-07-26T12:02:34.2Z" }, + { url = "https://files.pythonhosted.org/packages/c3/6c/26a8205f24bca10974e77460de68d3d7c63e282e23782f1239f226fcae6f/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77", size = 287892, upload-time = "2025-07-26T12:02:35.807Z" }, + { url = "https://files.pythonhosted.org/packages/66/06/8a475c8ab718ebfd7925661747dbb3c3ee9c82ac834ccb3570be49d129f4/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13", size = 326748, upload-time = "2025-07-26T12:02:37.193Z" }, + { url = "https://files.pythonhosted.org/packages/b4/a3/c5ca9f010a44c223f098fccd8b158bb1cb287378a31ac141f04730dc49be/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe", size = 375554, upload-time = "2025-07-26T12:02:38.894Z" }, + { url = "https://files.pythonhosted.org/packages/80/5b/68bd33ae63fac658a4145088c1e894405e07584a316738710b636c6d0333/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f", size = 388118, upload-time = "2025-07-26T12:02:40.642Z" }, + { url = "https://files.pythonhosted.org/packages/40/52/4c285a6435940ae25d7410a6c36bda5145839bc3f0beb20c707cda18b9d2/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0", size = 352555, upload-time = "2025-07-26T12:02:42.25Z" }, + { url = "https://files.pythonhosted.org/packages/24/ee/3e81e1dd174f5c7fefe50e85d0892de05ca4e26ef1c9a59c2a57e43b865a/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4", size = 1322295, upload-time = "2025-07-26T12:02:44.668Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b2/6d913d4d04e14379de429057cd169e5e00f6c2af3bb13e1710bcbdb5da12/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f", size = 1391027, upload-time = "2025-07-26T12:02:47.09Z" }, + { url = "https://files.pythonhosted.org/packages/93/8a/68a4ec5c55a2971213d29a9374913f7e9f18581945a7a31d1a39b5d2dfe5/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae", size = 202428, upload-time = "2025-07-26T12:02:48.691Z" }, + { url = "https://files.pythonhosted.org/packages/fa/96/fd9f641ffedc4fa3ace923af73b9d07e869496c9cc7a459103e6e978992f/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc", size = 250331, upload-time = "2025-07-26T12:02:50.137Z" }, + { url = "https://files.pythonhosted.org/packages/ae/8c/469afb6465b853afff216f9528ffda78a915ff880ed58813ba4faf4ba0b6/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b", size = 203831, upload-time = "2025-07-26T12:02:51.449Z" }, + { url = "https://files.pythonhosted.org/packages/a5/29/8dcfe16f0107943fa92388c23f6e05cff0ba58058c4c95b00280d4c75a14/contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497", size = 278809, upload-time = "2025-07-26T12:02:52.74Z" }, + { url = "https://files.pythonhosted.org/packages/85/a9/8b37ef4f7dafeb335daee3c8254645ef5725be4d9c6aa70b50ec46ef2f7e/contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8", size = 261593, upload-time = "2025-07-26T12:02:54.037Z" }, + { url = "https://files.pythonhosted.org/packages/0a/59/ebfb8c677c75605cc27f7122c90313fd2f375ff3c8d19a1694bda74aaa63/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e", size = 302202, upload-time = "2025-07-26T12:02:55.947Z" }, + { url = "https://files.pythonhosted.org/packages/3c/37/21972a15834d90bfbfb009b9d004779bd5a07a0ec0234e5ba8f64d5736f4/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989", size = 329207, upload-time = "2025-07-26T12:02:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" }, +] + [[package]] name = "coverage" version = "7.10.7" @@ -603,6 +761,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/87/7ce86f3fa14bc11a5a48c30d8103c26e09b6465f8d8e9d74cf7a0714f043/cryptography-45.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:1f3d56f73595376f4244646dd5c5870c14c196949807be39e79e7bd9bac3da63", size = 3332908, upload-time = "2025-09-01T11:14:58.78Z" }, ] +[[package]] +name = "cycler" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, +] + [[package]] name = "distlib" version = "0.4.0" @@ -651,6 +818,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" }, ] +[[package]] +name = "fonttools" +version = "4.60.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/42/97a13e47a1e51a5a7142475bbcf5107fe3a68fc34aef331c897d5fb98ad0/fonttools-4.60.1.tar.gz", hash = "sha256:ef00af0439ebfee806b25f24c8f92109157ff3fac5731dc7867957812e87b8d9", size = 3559823, upload-time = "2025-09-29T21:13:27.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/70/03e9d89a053caff6ae46053890eba8e4a5665a7c5638279ed4492e6d4b8b/fonttools-4.60.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9a52f254ce051e196b8fe2af4634c2d2f02c981756c6464dc192f1b6050b4e28", size = 2810747, upload-time = "2025-09-29T21:10:59.653Z" }, + { url = "https://files.pythonhosted.org/packages/6f/41/449ad5aff9670ab0df0f61ee593906b67a36d7e0b4d0cd7fa41ac0325bf5/fonttools-4.60.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c7420a2696a44650120cdd269a5d2e56a477e2bfa9d95e86229059beb1c19e15", size = 2346909, upload-time = "2025-09-29T21:11:02.882Z" }, + { url = "https://files.pythonhosted.org/packages/9a/18/e5970aa96c8fad1cb19a9479cc3b7602c0c98d250fcdc06a5da994309c50/fonttools-4.60.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee0c0b3b35b34f782afc673d503167157094a16f442ace7c6c5e0ca80b08f50c", size = 4864572, upload-time = "2025-09-29T21:11:05.096Z" }, + { url = "https://files.pythonhosted.org/packages/ce/20/9b2b4051b6ec6689480787d506b5003f72648f50972a92d04527a456192c/fonttools-4.60.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:282dafa55f9659e8999110bd8ed422ebe1c8aecd0dc396550b038e6c9a08b8ea", size = 4794635, upload-time = "2025-09-29T21:11:08.651Z" }, + { url = "https://files.pythonhosted.org/packages/10/52/c791f57347c1be98f8345e3dca4ac483eb97666dd7c47f3059aeffab8b59/fonttools-4.60.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4ba4bd646e86de16160f0fb72e31c3b9b7d0721c3e5b26b9fa2fc931dfdb2652", size = 4843878, upload-time = "2025-09-29T21:11:10.893Z" }, + { url = "https://files.pythonhosted.org/packages/69/e9/35c24a8d01644cee8c090a22fad34d5b61d1e0a8ecbc9945ad785ebf2e9e/fonttools-4.60.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0b0835ed15dd5b40d726bb61c846a688f5b4ce2208ec68779bc81860adb5851a", size = 4954555, upload-time = "2025-09-29T21:11:13.24Z" }, + { url = "https://files.pythonhosted.org/packages/f7/86/fb1e994971be4bdfe3a307de6373ef69a9df83fb66e3faa9c8114893d4cc/fonttools-4.60.1-cp310-cp310-win32.whl", hash = "sha256:1525796c3ffe27bb6268ed2a1bb0dcf214d561dfaf04728abf01489eb5339dce", size = 2232019, upload-time = "2025-09-29T21:11:15.73Z" }, + { url = "https://files.pythonhosted.org/packages/40/84/62a19e2bd56f0e9fb347486a5b26376bade4bf6bbba64dda2c103bd08c94/fonttools-4.60.1-cp310-cp310-win_amd64.whl", hash = "sha256:268ecda8ca6cb5c4f044b1fb9b3b376e8cd1b361cef275082429dc4174907038", size = 2276803, upload-time = "2025-09-29T21:11:18.152Z" }, + { url = "https://files.pythonhosted.org/packages/ea/85/639aa9bface1537e0fb0f643690672dde0695a5bbbc90736bc571b0b1941/fonttools-4.60.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7b4c32e232a71f63a5d00259ca3d88345ce2a43295bb049d21061f338124246f", size = 2831872, upload-time = "2025-09-29T21:11:20.329Z" }, + { url = "https://files.pythonhosted.org/packages/6b/47/3c63158459c95093be9618794acb1067b3f4d30dcc5c3e8114b70e67a092/fonttools-4.60.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3630e86c484263eaac71d117085d509cbcf7b18f677906824e4bace598fb70d2", size = 2356990, upload-time = "2025-09-29T21:11:22.754Z" }, + { url = "https://files.pythonhosted.org/packages/94/dd/1934b537c86fcf99f9761823f1fc37a98fbd54568e8e613f29a90fed95a9/fonttools-4.60.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5c1015318e4fec75dd4943ad5f6a206d9727adf97410d58b7e32ab644a807914", size = 5042189, upload-time = "2025-09-29T21:11:25.061Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d2/9f4e4c4374dd1daa8367784e1bd910f18ba886db1d6b825b12edf6db3edc/fonttools-4.60.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e6c58beb17380f7c2ea181ea11e7db8c0ceb474c9dd45f48e71e2cb577d146a1", size = 4978683, upload-time = "2025-09-29T21:11:27.693Z" }, + { url = "https://files.pythonhosted.org/packages/cc/c4/0fb2dfd1ecbe9a07954cc13414713ed1eab17b1c0214ef07fc93df234a47/fonttools-4.60.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec3681a0cb34c255d76dd9d865a55f260164adb9fa02628415cdc2d43ee2c05d", size = 5021372, upload-time = "2025-09-29T21:11:30.257Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d5/495fc7ae2fab20223cc87179a8f50f40f9a6f821f271ba8301ae12bb580f/fonttools-4.60.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f4b5c37a5f40e4d733d3bbaaef082149bee5a5ea3156a785ff64d949bd1353fa", size = 5132562, upload-time = "2025-09-29T21:11:32.737Z" }, + { url = "https://files.pythonhosted.org/packages/bc/fa/021dab618526323c744e0206b3f5c8596a2e7ae9aa38db5948a131123e83/fonttools-4.60.1-cp311-cp311-win32.whl", hash = "sha256:398447f3d8c0c786cbf1209711e79080a40761eb44b27cdafffb48f52bcec258", size = 2230288, upload-time = "2025-09-29T21:11:35.015Z" }, + { url = "https://files.pythonhosted.org/packages/bb/78/0e1a6d22b427579ea5c8273e1c07def2f325b977faaf60bb7ddc01456cb1/fonttools-4.60.1-cp311-cp311-win_amd64.whl", hash = "sha256:d066ea419f719ed87bc2c99a4a4bfd77c2e5949cb724588b9dd58f3fd90b92bf", size = 2278184, upload-time = "2025-09-29T21:11:37.434Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f7/a10b101b7a6f8836a5adb47f2791f2075d044a6ca123f35985c42edc82d8/fonttools-4.60.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7b0c6d57ab00dae9529f3faf187f2254ea0aa1e04215cf2f1a8ec277c96661bc", size = 2832953, upload-time = "2025-09-29T21:11:39.616Z" }, + { url = "https://files.pythonhosted.org/packages/ed/fe/7bd094b59c926acf2304d2151354ddbeb74b94812f3dc943c231db09cb41/fonttools-4.60.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:839565cbf14645952d933853e8ade66a463684ed6ed6c9345d0faf1f0e868877", size = 2352706, upload-time = "2025-09-29T21:11:41.826Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ca/4bb48a26ed95a1e7eba175535fe5805887682140ee0a0d10a88e1de84208/fonttools-4.60.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8177ec9676ea6e1793c8a084a90b65a9f778771998eb919d05db6d4b1c0b114c", size = 4923716, upload-time = "2025-09-29T21:11:43.893Z" }, + { url = "https://files.pythonhosted.org/packages/b8/9f/2cb82999f686c1d1ddf06f6ae1a9117a880adbec113611cc9d22b2fdd465/fonttools-4.60.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:996a4d1834524adbb423385d5a629b868ef9d774670856c63c9a0408a3063401", size = 4968175, upload-time = "2025-09-29T21:11:46.439Z" }, + { url = "https://files.pythonhosted.org/packages/18/79/be569699e37d166b78e6218f2cde8c550204f2505038cdd83b42edc469b9/fonttools-4.60.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a46b2f450bc79e06ef3b6394f0c68660529ed51692606ad7f953fc2e448bc903", size = 4911031, upload-time = "2025-09-29T21:11:48.977Z" }, + { url = "https://files.pythonhosted.org/packages/cc/9f/89411cc116effaec5260ad519162f64f9c150e5522a27cbb05eb62d0c05b/fonttools-4.60.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ec722ee589e89a89f5b7574f5c45604030aa6ae24cb2c751e2707193b466fed", size = 5062966, upload-time = "2025-09-29T21:11:54.344Z" }, + { url = "https://files.pythonhosted.org/packages/62/a1/f888221934b5731d46cb9991c7a71f30cb1f97c0ef5fcf37f8da8fce6c8e/fonttools-4.60.1-cp312-cp312-win32.whl", hash = "sha256:b2cf105cee600d2de04ca3cfa1f74f1127f8455b71dbad02b9da6ec266e116d6", size = 2218750, upload-time = "2025-09-29T21:11:56.601Z" }, + { url = "https://files.pythonhosted.org/packages/88/8f/a55b5550cd33cd1028601df41acd057d4be20efa5c958f417b0c0613924d/fonttools-4.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:992775c9fbe2cf794786fa0ffca7f09f564ba3499b8fe9f2f80bd7197db60383", size = 2267026, upload-time = "2025-09-29T21:11:58.852Z" }, + { url = "https://files.pythonhosted.org/packages/7c/5b/cdd2c612277b7ac7ec8c0c9bc41812c43dc7b2d5f2b0897e15fdf5a1f915/fonttools-4.60.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f68576bb4bbf6060c7ab047b1574a1ebe5c50a17de62830079967b211059ebb", size = 2825777, upload-time = "2025-09-29T21:12:01.22Z" }, + { url = "https://files.pythonhosted.org/packages/d6/8a/de9cc0540f542963ba5e8f3a1f6ad48fa211badc3177783b9d5cadf79b5d/fonttools-4.60.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:eedacb5c5d22b7097482fa834bda0dafa3d914a4e829ec83cdea2a01f8c813c4", size = 2348080, upload-time = "2025-09-29T21:12:03.785Z" }, + { url = "https://files.pythonhosted.org/packages/2d/8b/371ab3cec97ee3fe1126b3406b7abd60c8fec8975fd79a3c75cdea0c3d83/fonttools-4.60.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b33a7884fabd72bdf5f910d0cf46be50dce86a0362a65cfc746a4168c67eb96c", size = 4903082, upload-time = "2025-09-29T21:12:06.382Z" }, + { url = "https://files.pythonhosted.org/packages/04/05/06b1455e4bc653fcb2117ac3ef5fa3a8a14919b93c60742d04440605d058/fonttools-4.60.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2409d5fb7b55fd70f715e6d34e7a6e4f7511b8ad29a49d6df225ee76da76dd77", size = 4960125, upload-time = "2025-09-29T21:12:09.314Z" }, + { url = "https://files.pythonhosted.org/packages/8e/37/f3b840fcb2666f6cb97038793606bdd83488dca2d0b0fc542ccc20afa668/fonttools-4.60.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8651e0d4b3bdeda6602b85fdc2abbefc1b41e573ecb37b6779c4ca50753a199", size = 4901454, upload-time = "2025-09-29T21:12:11.931Z" }, + { url = "https://files.pythonhosted.org/packages/fd/9e/eb76f77e82f8d4a46420aadff12cec6237751b0fb9ef1de373186dcffb5f/fonttools-4.60.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:145daa14bf24824b677b9357c5e44fd8895c2a8f53596e1b9ea3496081dc692c", size = 5044495, upload-time = "2025-09-29T21:12:15.241Z" }, + { url = "https://files.pythonhosted.org/packages/f8/b3/cede8f8235d42ff7ae891bae8d619d02c8ac9fd0cfc450c5927a6200c70d/fonttools-4.60.1-cp313-cp313-win32.whl", hash = "sha256:2299df884c11162617a66b7c316957d74a18e3758c0274762d2cc87df7bc0272", size = 2217028, upload-time = "2025-09-29T21:12:17.96Z" }, + { url = "https://files.pythonhosted.org/packages/75/4d/b022c1577807ce8b31ffe055306ec13a866f2337ecee96e75b24b9b753ea/fonttools-4.60.1-cp313-cp313-win_amd64.whl", hash = "sha256:a3db56f153bd4c5c2b619ab02c5db5192e222150ce5a1bc10f16164714bc39ac", size = 2266200, upload-time = "2025-09-29T21:12:20.14Z" }, + { url = "https://files.pythonhosted.org/packages/9a/83/752ca11c1aa9a899b793a130f2e466b79ea0cf7279c8d79c178fc954a07b/fonttools-4.60.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:a884aef09d45ba1206712c7dbda5829562d3fea7726935d3289d343232ecb0d3", size = 2822830, upload-time = "2025-09-29T21:12:24.406Z" }, + { url = "https://files.pythonhosted.org/packages/57/17/bbeab391100331950a96ce55cfbbff27d781c1b85ebafb4167eae50d9fe3/fonttools-4.60.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8a44788d9d91df72d1a5eac49b31aeb887a5f4aab761b4cffc4196c74907ea85", size = 2345524, upload-time = "2025-09-29T21:12:26.819Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2e/d4831caa96d85a84dd0da1d9f90d81cec081f551e0ea216df684092c6c97/fonttools-4.60.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e852d9dda9f93ad3651ae1e3bb770eac544ec93c3807888798eccddf84596537", size = 4843490, upload-time = "2025-09-29T21:12:29.123Z" }, + { url = "https://files.pythonhosted.org/packages/49/13/5e2ea7c7a101b6fc3941be65307ef8df92cbbfa6ec4804032baf1893b434/fonttools-4.60.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:154cb6ee417e417bf5f7c42fe25858c9140c26f647c7347c06f0cc2d47eff003", size = 4944184, upload-time = "2025-09-29T21:12:31.414Z" }, + { url = "https://files.pythonhosted.org/packages/0c/2b/cf9603551c525b73fc47c52ee0b82a891579a93d9651ed694e4e2cd08bb8/fonttools-4.60.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5664fd1a9ea7f244487ac8f10340c4e37664675e8667d6fee420766e0fb3cf08", size = 4890218, upload-time = "2025-09-29T21:12:33.936Z" }, + { url = "https://files.pythonhosted.org/packages/fd/2f/933d2352422e25f2376aae74f79eaa882a50fb3bfef3c0d4f50501267101/fonttools-4.60.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:583b7f8e3c49486e4d489ad1deacfb8d5be54a8ef34d6df824f6a171f8511d99", size = 4999324, upload-time = "2025-09-29T21:12:36.637Z" }, + { url = "https://files.pythonhosted.org/packages/38/99/234594c0391221f66216bc2c886923513b3399a148defaccf81dc3be6560/fonttools-4.60.1-cp314-cp314-win32.whl", hash = "sha256:66929e2ea2810c6533a5184f938502cfdaea4bc3efb7130d8cc02e1c1b4108d6", size = 2220861, upload-time = "2025-09-29T21:12:39.108Z" }, + { url = "https://files.pythonhosted.org/packages/3e/1d/edb5b23726dde50fc4068e1493e4fc7658eeefcaf75d4c5ffce067d07ae5/fonttools-4.60.1-cp314-cp314-win_amd64.whl", hash = "sha256:f3d5be054c461d6a2268831f04091dc82753176f6ea06dc6047a5e168265a987", size = 2270934, upload-time = "2025-09-29T21:12:41.339Z" }, + { url = "https://files.pythonhosted.org/packages/fb/da/1392aaa2170adc7071fe7f9cfd181a5684a7afcde605aebddf1fb4d76df5/fonttools-4.60.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b6379e7546ba4ae4b18f8ae2b9bc5960936007a1c0e30b342f662577e8bc3299", size = 2894340, upload-time = "2025-09-29T21:12:43.774Z" }, + { url = "https://files.pythonhosted.org/packages/bf/a7/3b9f16e010d536ce567058b931a20b590d8f3177b2eda09edd92e392375d/fonttools-4.60.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9d0ced62b59e0430b3690dbc5373df1c2aa7585e9a8ce38eff87f0fd993c5b01", size = 2375073, upload-time = "2025-09-29T21:12:46.437Z" }, + { url = "https://files.pythonhosted.org/packages/9b/b5/e9bcf51980f98e59bb5bb7c382a63c6f6cac0eec5f67de6d8f2322382065/fonttools-4.60.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:875cb7764708b3132637f6c5fb385b16eeba0f7ac9fa45a69d35e09b47045801", size = 4849758, upload-time = "2025-09-29T21:12:48.694Z" }, + { url = "https://files.pythonhosted.org/packages/e3/dc/1d2cf7d1cba82264b2f8385db3f5960e3d8ce756b4dc65b700d2c496f7e9/fonttools-4.60.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a184b2ea57b13680ab6d5fbde99ccef152c95c06746cb7718c583abd8f945ccc", size = 5085598, upload-time = "2025-09-29T21:12:51.081Z" }, + { url = "https://files.pythonhosted.org/packages/5d/4d/279e28ba87fb20e0c69baf72b60bbf1c4d873af1476806a7b5f2b7fac1ff/fonttools-4.60.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:026290e4ec76583881763fac284aca67365e0be9f13a7fb137257096114cb3bc", size = 4957603, upload-time = "2025-09-29T21:12:53.423Z" }, + { url = "https://files.pythonhosted.org/packages/78/d4/ff19976305e0c05aa3340c805475abb00224c954d3c65e82c0a69633d55d/fonttools-4.60.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f0e8817c7d1a0c2eedebf57ef9a9896f3ea23324769a9a2061a80fe8852705ed", size = 4974184, upload-time = "2025-09-29T21:12:55.962Z" }, + { url = "https://files.pythonhosted.org/packages/63/22/8553ff6166f5cd21cfaa115aaacaa0dc73b91c079a8cfd54a482cbc0f4f5/fonttools-4.60.1-cp314-cp314t-win32.whl", hash = "sha256:1410155d0e764a4615774e5c2c6fc516259fe3eca5882f034eb9bfdbee056259", size = 2282241, upload-time = "2025-09-29T21:12:58.179Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cb/fa7b4d148e11d5a72761a22e595344133e83a9507a4c231df972e657579b/fonttools-4.60.1-cp314-cp314t-win_amd64.whl", hash = "sha256:022beaea4b73a70295b688f817ddc24ed3e3418b5036ffcd5658141184ef0d0c", size = 2345760, upload-time = "2025-09-29T21:13:00.375Z" }, + { url = "https://files.pythonhosted.org/packages/c7/93/0dd45cd283c32dea1545151d8c3637b4b8c53cdb3a625aeb2885b184d74d/fonttools-4.60.1-py3-none-any.whl", hash = "sha256:906306ac7afe2156fcf0042173d6ebbb05416af70f6b370967b47f8f00103bbb", size = 1143175, upload-time = "2025-09-29T21:13:24.134Z" }, +] + [[package]] name = "frozenlist" version = "1.7.0" @@ -908,6 +1132,189 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/c2/93368d4c9355e8ad1f6d62b804de241939d0796b2a3a73737f665b802808/json_repair-0.50.0-py3-none-any.whl", hash = "sha256:b15da2c42deb43419b182d97dcfde6cd86d0b18ccd18ed1a887104ce85e7a364", size = 25985, upload-time = "2025-08-20T15:01:56.567Z" }, ] +[[package]] +name = "kiwisolver" +version = "1.4.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/5d/8ce64e36d4e3aac5ca96996457dcf33e34e6051492399a3f1fec5657f30b/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b4b4d74bda2b8ebf4da5bd42af11d02d04428b2c32846e4c2c93219df8a7987b", size = 124159, upload-time = "2025-08-10T21:25:35.472Z" }, + { url = "https://files.pythonhosted.org/packages/96/1e/22f63ec454874378175a5f435d6ea1363dd33fb2af832c6643e4ccea0dc8/kiwisolver-1.4.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fb3b8132019ea572f4611d770991000d7f58127560c4889729248eb5852a102f", size = 66578, upload-time = "2025-08-10T21:25:36.73Z" }, + { url = "https://files.pythonhosted.org/packages/41/4c/1925dcfff47a02d465121967b95151c82d11027d5ec5242771e580e731bd/kiwisolver-1.4.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84fd60810829c27ae375114cd379da1fa65e6918e1da405f356a775d49a62bcf", size = 65312, upload-time = "2025-08-10T21:25:37.658Z" }, + { url = "https://files.pythonhosted.org/packages/d4/42/0f333164e6307a0687d1eb9ad256215aae2f4bd5d28f4653d6cd319a3ba3/kiwisolver-1.4.9-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b78efa4c6e804ecdf727e580dbb9cba85624d2e1c6b5cb059c66290063bd99a9", size = 1628458, upload-time = "2025-08-10T21:25:39.067Z" }, + { url = "https://files.pythonhosted.org/packages/86/b6/2dccb977d651943995a90bfe3495c2ab2ba5cd77093d9f2318a20c9a6f59/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4efec7bcf21671db6a3294ff301d2fc861c31faa3c8740d1a94689234d1b415", size = 1225640, upload-time = "2025-08-10T21:25:40.489Z" }, + { url = "https://files.pythonhosted.org/packages/50/2b/362ebd3eec46c850ccf2bfe3e30f2fc4c008750011f38a850f088c56a1c6/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90f47e70293fc3688b71271100a1a5453aa9944a81d27ff779c108372cf5567b", size = 1244074, upload-time = "2025-08-10T21:25:42.221Z" }, + { url = "https://files.pythonhosted.org/packages/6f/bb/f09a1e66dab8984773d13184a10a29fe67125337649d26bdef547024ed6b/kiwisolver-1.4.9-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fdca1def57a2e88ef339de1737a1449d6dbf5fab184c54a1fca01d541317154", size = 1293036, upload-time = "2025-08-10T21:25:43.801Z" }, + { url = "https://files.pythonhosted.org/packages/ea/01/11ecf892f201cafda0f68fa59212edaea93e96c37884b747c181303fccd1/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9cf554f21be770f5111a1690d42313e140355e687e05cf82cb23d0a721a64a48", size = 2175310, upload-time = "2025-08-10T21:25:45.045Z" }, + { url = "https://files.pythonhosted.org/packages/7f/5f/bfe11d5b934f500cc004314819ea92427e6e5462706a498c1d4fc052e08f/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fc1795ac5cd0510207482c3d1d3ed781143383b8cfd36f5c645f3897ce066220", size = 2270943, upload-time = "2025-08-10T21:25:46.393Z" }, + { url = "https://files.pythonhosted.org/packages/3d/de/259f786bf71f1e03e73d87e2db1a9a3bcab64d7b4fd780167123161630ad/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ccd09f20ccdbbd341b21a67ab50a119b64a403b09288c27481575105283c1586", size = 2440488, upload-time = "2025-08-10T21:25:48.074Z" }, + { url = "https://files.pythonhosted.org/packages/1b/76/c989c278faf037c4d3421ec07a5c452cd3e09545d6dae7f87c15f54e4edf/kiwisolver-1.4.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:540c7c72324d864406a009d72f5d6856f49693db95d1fbb46cf86febef873634", size = 2246787, upload-time = "2025-08-10T21:25:49.442Z" }, + { url = "https://files.pythonhosted.org/packages/a2/55/c2898d84ca440852e560ca9f2a0d28e6e931ac0849b896d77231929900e7/kiwisolver-1.4.9-cp310-cp310-win_amd64.whl", hash = "sha256:ede8c6d533bc6601a47ad4046080d36b8fc99f81e6f1c17b0ac3c2dc91ac7611", size = 73730, upload-time = "2025-08-10T21:25:51.102Z" }, + { url = "https://files.pythonhosted.org/packages/e8/09/486d6ac523dd33b80b368247f238125d027964cfacb45c654841e88fb2ae/kiwisolver-1.4.9-cp310-cp310-win_arm64.whl", hash = "sha256:7b4da0d01ac866a57dd61ac258c5607b4cd677f63abaec7b148354d2b2cdd536", size = 65036, upload-time = "2025-08-10T21:25:52.063Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/c80b0d5a9d8a1a65f4f815f2afff9798b12c3b9f31f1d304dd233dd920e2/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:eb14a5da6dc7642b0f3a18f13654847cd8b7a2550e2645a5bda677862b03ba16", size = 124167, upload-time = "2025-08-10T21:25:53.403Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c0/27fe1a68a39cf62472a300e2879ffc13c0538546c359b86f149cc19f6ac3/kiwisolver-1.4.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:39a219e1c81ae3b103643d2aedb90f1ef22650deb266ff12a19e7773f3e5f089", size = 66579, upload-time = "2025-08-10T21:25:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/31/a2/a12a503ac1fd4943c50f9822678e8015a790a13b5490354c68afb8489814/kiwisolver-1.4.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2405a7d98604b87f3fc28b1716783534b1b4b8510d8142adca34ee0bc3c87543", size = 65309, upload-time = "2025-08-10T21:25:55.76Z" }, + { url = "https://files.pythonhosted.org/packages/66/e1/e533435c0be77c3f64040d68d7a657771194a63c279f55573188161e81ca/kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dc1ae486f9abcef254b5618dfb4113dd49f94c68e3e027d03cf0143f3f772b61", size = 1435596, upload-time = "2025-08-10T21:25:56.861Z" }, + { url = "https://files.pythonhosted.org/packages/67/1e/51b73c7347f9aabdc7215aa79e8b15299097dc2f8e67dee2b095faca9cb0/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a1f570ce4d62d718dce3f179ee78dac3b545ac16c0c04bb363b7607a949c0d1", size = 1246548, upload-time = "2025-08-10T21:25:58.246Z" }, + { url = "https://files.pythonhosted.org/packages/21/aa/72a1c5d1e430294f2d32adb9542719cfb441b5da368d09d268c7757af46c/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb27e7b78d716c591e88e0a09a2139c6577865d7f2e152488c2cc6257f460872", size = 1263618, upload-time = "2025-08-10T21:25:59.857Z" }, + { url = "https://files.pythonhosted.org/packages/a3/af/db1509a9e79dbf4c260ce0cfa3903ea8945f6240e9e59d1e4deb731b1a40/kiwisolver-1.4.9-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:15163165efc2f627eb9687ea5f3a28137217d217ac4024893d753f46bce9de26", size = 1317437, upload-time = "2025-08-10T21:26:01.105Z" }, + { url = "https://files.pythonhosted.org/packages/e0/f2/3ea5ee5d52abacdd12013a94130436e19969fa183faa1e7c7fbc89e9a42f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bdee92c56a71d2b24c33a7d4c2856bd6419d017e08caa7802d2963870e315028", size = 2195742, upload-time = "2025-08-10T21:26:02.675Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9b/1efdd3013c2d9a2566aa6a337e9923a00590c516add9a1e89a768a3eb2fc/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:412f287c55a6f54b0650bd9b6dce5aceddb95864a1a90c87af16979d37c89771", size = 2290810, upload-time = "2025-08-10T21:26:04.009Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e5/cfdc36109ae4e67361f9bc5b41323648cb24a01b9ade18784657e022e65f/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2c93f00dcba2eea70af2be5f11a830a742fe6b579a1d4e00f47760ef13be247a", size = 2461579, upload-time = "2025-08-10T21:26:05.317Z" }, + { url = "https://files.pythonhosted.org/packages/62/86/b589e5e86c7610842213994cdea5add00960076bef4ae290c5fa68589cac/kiwisolver-1.4.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f117e1a089d9411663a3207ba874f31be9ac8eaa5b533787024dc07aeb74f464", size = 2268071, upload-time = "2025-08-10T21:26:06.686Z" }, + { url = "https://files.pythonhosted.org/packages/3b/c6/f8df8509fd1eee6c622febe54384a96cfaf4d43bf2ccec7a0cc17e4715c9/kiwisolver-1.4.9-cp311-cp311-win_amd64.whl", hash = "sha256:be6a04e6c79819c9a8c2373317d19a96048e5a3f90bec587787e86a1153883c2", size = 73840, upload-time = "2025-08-10T21:26:07.94Z" }, + { url = "https://files.pythonhosted.org/packages/e2/2d/16e0581daafd147bc11ac53f032a2b45eabac897f42a338d0a13c1e5c436/kiwisolver-1.4.9-cp311-cp311-win_arm64.whl", hash = "sha256:0ae37737256ba2de764ddc12aed4956460277f00c4996d51a197e72f62f5eec7", size = 65159, upload-time = "2025-08-10T21:26:09.048Z" }, + { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" }, + { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" }, + { url = "https://files.pythonhosted.org/packages/70/90/6d240beb0f24b74371762873e9b7f499f1e02166a2d9c5801f4dbf8fa12e/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04", size = 1474756, upload-time = "2025-08-10T21:26:13.096Z" }, + { url = "https://files.pythonhosted.org/packages/12/42/f36816eaf465220f683fb711efdd1bbf7a7005a2473d0e4ed421389bd26c/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752", size = 1276404, upload-time = "2025-08-10T21:26:14.457Z" }, + { url = "https://files.pythonhosted.org/packages/2e/64/bc2de94800adc830c476dce44e9b40fd0809cddeef1fde9fcf0f73da301f/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77", size = 1294410, upload-time = "2025-08-10T21:26:15.73Z" }, + { url = "https://files.pythonhosted.org/packages/5f/42/2dc82330a70aa8e55b6d395b11018045e58d0bb00834502bf11509f79091/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198", size = 1343631, upload-time = "2025-08-10T21:26:17.045Z" }, + { url = "https://files.pythonhosted.org/packages/22/fd/f4c67a6ed1aab149ec5a8a401c323cee7a1cbe364381bb6c9c0d564e0e20/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d", size = 2224963, upload-time = "2025-08-10T21:26:18.737Z" }, + { url = "https://files.pythonhosted.org/packages/45/aa/76720bd4cb3713314677d9ec94dcc21ced3f1baf4830adde5bb9b2430a5f/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab", size = 2321295, upload-time = "2025-08-10T21:26:20.11Z" }, + { url = "https://files.pythonhosted.org/packages/80/19/d3ec0d9ab711242f56ae0dc2fc5d70e298bb4a1f9dfab44c027668c673a1/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2", size = 2487987, upload-time = "2025-08-10T21:26:21.49Z" }, + { url = "https://files.pythonhosted.org/packages/39/e9/61e4813b2c97e86b6fdbd4dd824bf72d28bcd8d4849b8084a357bc0dd64d/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145", size = 2291817, upload-time = "2025-08-10T21:26:22.812Z" }, + { url = "https://files.pythonhosted.org/packages/a0/41/85d82b0291db7504da3c2defe35c9a8a5c9803a730f297bd823d11d5fb77/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54", size = 73895, upload-time = "2025-08-10T21:26:24.37Z" }, + { url = "https://files.pythonhosted.org/packages/e2/92/5f3068cf15ee5cb624a0c7596e67e2a0bb2adee33f71c379054a491d07da/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60", size = 64992, upload-time = "2025-08-10T21:26:25.732Z" }, + { url = "https://files.pythonhosted.org/packages/31/c1/c2686cda909742ab66c7388e9a1a8521a59eb89f8bcfbee28fc980d07e24/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8", size = 123681, upload-time = "2025-08-10T21:26:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/ca/f0/f44f50c9f5b1a1860261092e3bc91ecdc9acda848a8b8c6abfda4a24dd5c/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2", size = 66464, upload-time = "2025-08-10T21:26:27.733Z" }, + { url = "https://files.pythonhosted.org/packages/2d/7a/9d90a151f558e29c3936b8a47ac770235f436f2120aca41a6d5f3d62ae8d/kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f", size = 64961, upload-time = "2025-08-10T21:26:28.729Z" }, + { url = "https://files.pythonhosted.org/packages/e9/e9/f218a2cb3a9ffbe324ca29a9e399fa2d2866d7f348ec3a88df87fc248fc5/kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098", size = 1474607, upload-time = "2025-08-10T21:26:29.798Z" }, + { url = "https://files.pythonhosted.org/packages/d9/28/aac26d4c882f14de59041636292bc838db8961373825df23b8eeb807e198/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed", size = 1276546, upload-time = "2025-08-10T21:26:31.401Z" }, + { url = "https://files.pythonhosted.org/packages/8b/ad/8bfc1c93d4cc565e5069162f610ba2f48ff39b7de4b5b8d93f69f30c4bed/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525", size = 1294482, upload-time = "2025-08-10T21:26:32.721Z" }, + { url = "https://files.pythonhosted.org/packages/da/f1/6aca55ff798901d8ce403206d00e033191f63d82dd708a186e0ed2067e9c/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78", size = 1343720, upload-time = "2025-08-10T21:26:34.032Z" }, + { url = "https://files.pythonhosted.org/packages/d1/91/eed031876c595c81d90d0f6fc681ece250e14bf6998c3d7c419466b523b7/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b", size = 2224907, upload-time = "2025-08-10T21:26:35.824Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ec/4d1925f2e49617b9cca9c34bfa11adefad49d00db038e692a559454dfb2e/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799", size = 2321334, upload-time = "2025-08-10T21:26:37.534Z" }, + { url = "https://files.pythonhosted.org/packages/43/cb/450cd4499356f68802750c6ddc18647b8ea01ffa28f50d20598e0befe6e9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3", size = 2488313, upload-time = "2025-08-10T21:26:39.191Z" }, + { url = "https://files.pythonhosted.org/packages/71/67/fc76242bd99f885651128a5d4fa6083e5524694b7c88b489b1b55fdc491d/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c", size = 2291970, upload-time = "2025-08-10T21:26:40.828Z" }, + { url = "https://files.pythonhosted.org/packages/75/bd/f1a5d894000941739f2ae1b65a32892349423ad49c2e6d0771d0bad3fae4/kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d", size = 73894, upload-time = "2025-08-10T21:26:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/95/38/dce480814d25b99a391abbddadc78f7c117c6da34be68ca8b02d5848b424/kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2", size = 64995, upload-time = "2025-08-10T21:26:43.889Z" }, + { url = "https://files.pythonhosted.org/packages/e2/37/7d218ce5d92dadc5ebdd9070d903e0c7cf7edfe03f179433ac4d13ce659c/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1", size = 126510, upload-time = "2025-08-10T21:26:44.915Z" }, + { url = "https://files.pythonhosted.org/packages/23/b0/e85a2b48233daef4b648fb657ebbb6f8367696a2d9548a00b4ee0eb67803/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1", size = 67903, upload-time = "2025-08-10T21:26:45.934Z" }, + { url = "https://files.pythonhosted.org/packages/44/98/f2425bc0113ad7de24da6bb4dae1343476e95e1d738be7c04d31a5d037fd/kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11", size = 66402, upload-time = "2025-08-10T21:26:47.101Z" }, + { url = "https://files.pythonhosted.org/packages/98/d8/594657886df9f34c4177cc353cc28ca7e6e5eb562d37ccc233bff43bbe2a/kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c", size = 1582135, upload-time = "2025-08-10T21:26:48.665Z" }, + { url = "https://files.pythonhosted.org/packages/5c/c6/38a115b7170f8b306fc929e166340c24958347308ea3012c2b44e7e295db/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197", size = 1389409, upload-time = "2025-08-10T21:26:50.335Z" }, + { url = "https://files.pythonhosted.org/packages/bf/3b/e04883dace81f24a568bcee6eb3001da4ba05114afa622ec9b6fafdc1f5e/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c", size = 1401763, upload-time = "2025-08-10T21:26:51.867Z" }, + { url = "https://files.pythonhosted.org/packages/9f/80/20ace48e33408947af49d7d15c341eaee69e4e0304aab4b7660e234d6288/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185", size = 1453643, upload-time = "2025-08-10T21:26:53.592Z" }, + { url = "https://files.pythonhosted.org/packages/64/31/6ce4380a4cd1f515bdda976a1e90e547ccd47b67a1546d63884463c92ca9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748", size = 2330818, upload-time = "2025-08-10T21:26:55.051Z" }, + { url = "https://files.pythonhosted.org/packages/fa/e9/3f3fcba3bcc7432c795b82646306e822f3fd74df0ee81f0fa067a1f95668/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64", size = 2419963, upload-time = "2025-08-10T21:26:56.421Z" }, + { url = "https://files.pythonhosted.org/packages/99/43/7320c50e4133575c66e9f7dadead35ab22d7c012a3b09bb35647792b2a6d/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff", size = 2594639, upload-time = "2025-08-10T21:26:57.882Z" }, + { url = "https://files.pythonhosted.org/packages/65/d6/17ae4a270d4a987ef8a385b906d2bdfc9fce502d6dc0d3aea865b47f548c/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07", size = 2391741, upload-time = "2025-08-10T21:26:59.237Z" }, + { url = "https://files.pythonhosted.org/packages/2a/8f/8f6f491d595a9e5912971f3f863d81baddccc8a4d0c3749d6a0dd9ffc9df/kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c", size = 68646, upload-time = "2025-08-10T21:27:00.52Z" }, + { url = "https://files.pythonhosted.org/packages/6b/32/6cc0fbc9c54d06c2969faa9c1d29f5751a2e51809dd55c69055e62d9b426/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386", size = 123806, upload-time = "2025-08-10T21:27:01.537Z" }, + { url = "https://files.pythonhosted.org/packages/b2/dd/2bfb1d4a4823d92e8cbb420fe024b8d2167f72079b3bb941207c42570bdf/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552", size = 66605, upload-time = "2025-08-10T21:27:03.335Z" }, + { url = "https://files.pythonhosted.org/packages/f7/69/00aafdb4e4509c2ca6064646cba9cd4b37933898f426756adb2cb92ebbed/kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3", size = 64925, upload-time = "2025-08-10T21:27:04.339Z" }, + { url = "https://files.pythonhosted.org/packages/43/dc/51acc6791aa14e5cb6d8a2e28cefb0dc2886d8862795449d021334c0df20/kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58", size = 1472414, upload-time = "2025-08-10T21:27:05.437Z" }, + { url = "https://files.pythonhosted.org/packages/3d/bb/93fa64a81db304ac8a246f834d5094fae4b13baf53c839d6bb6e81177129/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4", size = 1281272, upload-time = "2025-08-10T21:27:07.063Z" }, + { url = "https://files.pythonhosted.org/packages/70/e6/6df102916960fb8d05069d4bd92d6d9a8202d5a3e2444494e7cd50f65b7a/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df", size = 1298578, upload-time = "2025-08-10T21:27:08.452Z" }, + { url = "https://files.pythonhosted.org/packages/7c/47/e142aaa612f5343736b087864dbaebc53ea8831453fb47e7521fa8658f30/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6", size = 1345607, upload-time = "2025-08-10T21:27:10.125Z" }, + { url = "https://files.pythonhosted.org/packages/54/89/d641a746194a0f4d1a3670fb900d0dbaa786fb98341056814bc3f058fa52/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5", size = 2230150, upload-time = "2025-08-10T21:27:11.484Z" }, + { url = "https://files.pythonhosted.org/packages/aa/6b/5ee1207198febdf16ac11f78c5ae40861b809cbe0e6d2a8d5b0b3044b199/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf", size = 2325979, upload-time = "2025-08-10T21:27:12.917Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ff/b269eefd90f4ae14dcc74973d5a0f6d28d3b9bb1afd8c0340513afe6b39a/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5", size = 2491456, upload-time = "2025-08-10T21:27:14.353Z" }, + { url = "https://files.pythonhosted.org/packages/fc/d4/10303190bd4d30de547534601e259a4fbf014eed94aae3e5521129215086/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce", size = 2294621, upload-time = "2025-08-10T21:27:15.808Z" }, + { url = "https://files.pythonhosted.org/packages/28/e0/a9a90416fce5c0be25742729c2ea52105d62eda6c4be4d803c2a7be1fa50/kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7", size = 75417, upload-time = "2025-08-10T21:27:17.436Z" }, + { url = "https://files.pythonhosted.org/packages/1f/10/6949958215b7a9a264299a7db195564e87900f709db9245e4ebdd3c70779/kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c", size = 66582, upload-time = "2025-08-10T21:27:18.436Z" }, + { url = "https://files.pythonhosted.org/packages/ec/79/60e53067903d3bc5469b369fe0dfc6b3482e2133e85dae9daa9527535991/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548", size = 126514, upload-time = "2025-08-10T21:27:19.465Z" }, + { url = "https://files.pythonhosted.org/packages/25/d1/4843d3e8d46b072c12a38c97c57fab4608d36e13fe47d47ee96b4d61ba6f/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d", size = 67905, upload-time = "2025-08-10T21:27:20.51Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ae/29ffcbd239aea8b93108de1278271ae764dfc0d803a5693914975f200596/kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c", size = 66399, upload-time = "2025-08-10T21:27:21.496Z" }, + { url = "https://files.pythonhosted.org/packages/a1/ae/d7ba902aa604152c2ceba5d352d7b62106bedbccc8e95c3934d94472bfa3/kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122", size = 1582197, upload-time = "2025-08-10T21:27:22.604Z" }, + { url = "https://files.pythonhosted.org/packages/f2/41/27c70d427eddb8bc7e4f16420a20fefc6f480312122a59a959fdfe0445ad/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64", size = 1390125, upload-time = "2025-08-10T21:27:24.036Z" }, + { url = "https://files.pythonhosted.org/packages/41/42/b3799a12bafc76d962ad69083f8b43b12bf4fe78b097b12e105d75c9b8f1/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134", size = 1402612, upload-time = "2025-08-10T21:27:25.773Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b5/a210ea073ea1cfaca1bb5c55a62307d8252f531beb364e18aa1e0888b5a0/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370", size = 1453990, upload-time = "2025-08-10T21:27:27.089Z" }, + { url = "https://files.pythonhosted.org/packages/5f/ce/a829eb8c033e977d7ea03ed32fb3c1781b4fa0433fbadfff29e39c676f32/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21", size = 2331601, upload-time = "2025-08-10T21:27:29.343Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4b/b5e97eb142eb9cd0072dacfcdcd31b1c66dc7352b0f7c7255d339c0edf00/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a", size = 2422041, upload-time = "2025-08-10T21:27:30.754Z" }, + { url = "https://files.pythonhosted.org/packages/40/be/8eb4cd53e1b85ba4edc3a9321666f12b83113a178845593307a3e7891f44/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f", size = 2594897, upload-time = "2025-08-10T21:27:32.803Z" }, + { url = "https://files.pythonhosted.org/packages/99/dd/841e9a66c4715477ea0abc78da039832fbb09dac5c35c58dc4c41a407b8a/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369", size = 2391835, upload-time = "2025-08-10T21:27:34.23Z" }, + { url = "https://files.pythonhosted.org/packages/0c/28/4b2e5c47a0da96896fdfdb006340ade064afa1e63675d01ea5ac222b6d52/kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891", size = 79988, upload-time = "2025-08-10T21:27:35.587Z" }, + { url = "https://files.pythonhosted.org/packages/80/be/3578e8afd18c88cdf9cb4cffde75a96d2be38c5a903f1ed0ceec061bd09e/kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32", size = 70260, upload-time = "2025-08-10T21:27:36.606Z" }, + { url = "https://files.pythonhosted.org/packages/a2/63/fde392691690f55b38d5dd7b3710f5353bf7a8e52de93a22968801ab8978/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4d1d9e582ad4d63062d34077a9a1e9f3c34088a2ec5135b1f7190c07cf366527", size = 60183, upload-time = "2025-08-10T21:27:37.669Z" }, + { url = "https://files.pythonhosted.org/packages/27/b1/6aad34edfdb7cced27f371866f211332bba215bfd918ad3322a58f480d8b/kiwisolver-1.4.9-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:deed0c7258ceb4c44ad5ec7d9918f9f14fd05b2be86378d86cf50e63d1e7b771", size = 58675, upload-time = "2025-08-10T21:27:39.031Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1a/23d855a702bb35a76faed5ae2ba3de57d323f48b1f6b17ee2176c4849463/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a590506f303f512dff6b7f75fd2fd18e16943efee932008fe7140e5fa91d80e", size = 80277, upload-time = "2025-08-10T21:27:40.129Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5b/5239e3c2b8fb5afa1e8508f721bb77325f740ab6994d963e61b2b7abcc1e/kiwisolver-1.4.9-pp310-pypy310_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e09c2279a4d01f099f52d5c4b3d9e208e91edcbd1a175c9662a8b16e000fece9", size = 77994, upload-time = "2025-08-10T21:27:41.181Z" }, + { url = "https://files.pythonhosted.org/packages/f9/1c/5d4d468fb16f8410e596ed0eac02d2c68752aa7dc92997fe9d60a7147665/kiwisolver-1.4.9-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e7cdf45d594ee04d5be1b24dd9d49f3d1590959b2271fb30b5ca2b262c00fb", size = 73744, upload-time = "2025-08-10T21:27:42.254Z" }, + { url = "https://files.pythonhosted.org/packages/a3/0f/36d89194b5a32c054ce93e586d4049b6c2c22887b0eb229c61c68afd3078/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:720e05574713db64c356e86732c0f3c5252818d05f9df320f0ad8380641acea5", size = 60104, upload-time = "2025-08-10T21:27:43.287Z" }, + { url = "https://files.pythonhosted.org/packages/52/ba/4ed75f59e4658fd21fe7dde1fee0ac397c678ec3befba3fe6482d987af87/kiwisolver-1.4.9-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:17680d737d5335b552994a2008fab4c851bcd7de33094a82067ef3a576ff02fa", size = 58592, upload-time = "2025-08-10T21:27:44.314Z" }, + { url = "https://files.pythonhosted.org/packages/33/01/a8ea7c5ea32a9b45ceeaee051a04c8ed4320f5add3c51bfa20879b765b70/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:85b5352f94e490c028926ea567fc569c52ec79ce131dadb968d3853e809518c2", size = 80281, upload-time = "2025-08-10T21:27:45.369Z" }, + { url = "https://files.pythonhosted.org/packages/da/e3/dbd2ecdce306f1d07a1aaf324817ee993aab7aee9db47ceac757deabafbe/kiwisolver-1.4.9-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:464415881e4801295659462c49461a24fb107c140de781d55518c4b80cb6790f", size = 78009, upload-time = "2025-08-10T21:27:46.376Z" }, + { url = "https://files.pythonhosted.org/packages/da/e9/0d4add7873a73e462aeb45c036a2dead2562b825aa46ba326727b3f31016/kiwisolver-1.4.9-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fb940820c63a9590d31d88b815e7a3aa5915cad3ce735ab45f0c730b39547de1", size = 73929, upload-time = "2025-08-10T21:27:48.236Z" }, +] + +[[package]] +name = "matplotlib" +version = "3.10.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "contourpy", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "cycler" }, + { name = "fonttools" }, + { name = "kiwisolver" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "pyparsing" }, + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/e2/d2d5295be2f44c678ebaf3544ba32d20c1f9ef08c49fe47f496180e1db15/matplotlib-3.10.7.tar.gz", hash = "sha256:a06ba7e2a2ef9131c79c49e63dad355d2d878413a0376c1727c8b9335ff731c7", size = 34804865, upload-time = "2025-10-09T00:28:00.669Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/87/3932d5778ab4c025db22710b61f49ccaed3956c5cf46ffb2ffa7492b06d9/matplotlib-3.10.7-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7ac81eee3b7c266dd92cee1cd658407b16c57eed08c7421fa354ed68234de380", size = 8247141, upload-time = "2025-10-09T00:26:06.023Z" }, + { url = "https://files.pythonhosted.org/packages/45/a8/bfed45339160102bce21a44e38a358a1134a5f84c26166de03fb4a53208f/matplotlib-3.10.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:667ecd5d8d37813a845053d8f5bf110b534c3c9f30e69ebd25d4701385935a6d", size = 8107995, upload-time = "2025-10-09T00:26:08.669Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3c/5692a2d9a5ba848fda3f48d2b607037df96460b941a59ef236404b39776b/matplotlib-3.10.7-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc1c51b846aca49a5a8b44fbba6a92d583a35c64590ad9e1e950dc88940a4297", size = 8680503, upload-time = "2025-10-09T00:26:10.607Z" }, + { url = "https://files.pythonhosted.org/packages/ab/a0/86ace53c48b05d0e6e9c127b2ace097434901f3e7b93f050791c8243201a/matplotlib-3.10.7-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a11c2e9e72e7de09b7b72e62f3df23317c888299c875e2b778abf1eda8c0a42", size = 9514982, upload-time = "2025-10-09T00:26:12.594Z" }, + { url = "https://files.pythonhosted.org/packages/a6/81/ead71e2824da8f72640a64166d10e62300df4ae4db01a0bac56c5b39fa51/matplotlib-3.10.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f19410b486fdd139885ace124e57f938c1e6a3210ea13dd29cab58f5d4bc12c7", size = 9566429, upload-time = "2025-10-09T00:26:14.758Z" }, + { url = "https://files.pythonhosted.org/packages/65/7d/954b3067120456f472cce8fdcacaf4a5fcd522478db0c37bb243c7cb59dd/matplotlib-3.10.7-cp310-cp310-win_amd64.whl", hash = "sha256:b498e9e4022f93de2d5a37615200ca01297ceebbb56fe4c833f46862a490f9e3", size = 8108174, upload-time = "2025-10-09T00:26:17.015Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bc/0fb489005669127ec13f51be0c6adc074d7cf191075dab1da9fe3b7a3cfc/matplotlib-3.10.7-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:53b492410a6cd66c7a471de6c924f6ede976e963c0f3097a3b7abfadddc67d0a", size = 8257507, upload-time = "2025-10-09T00:26:19.073Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6a/d42588ad895279ff6708924645b5d2ed54a7fb2dc045c8a804e955aeace1/matplotlib-3.10.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d9749313deb729f08207718d29c86246beb2ea3fdba753595b55901dee5d2fd6", size = 8119565, upload-time = "2025-10-09T00:26:21.023Z" }, + { url = "https://files.pythonhosted.org/packages/10/b7/4aa196155b4d846bd749cf82aa5a4c300cf55a8b5e0dfa5b722a63c0f8a0/matplotlib-3.10.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2222c7ba2cbde7fe63032769f6eb7e83ab3227f47d997a8453377709b7fe3a5a", size = 8692668, upload-time = "2025-10-09T00:26:22.967Z" }, + { url = "https://files.pythonhosted.org/packages/e6/e7/664d2b97016f46683a02d854d730cfcf54ff92c1dafa424beebef50f831d/matplotlib-3.10.7-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e91f61a064c92c307c5a9dc8c05dc9f8a68f0a3be199d9a002a0622e13f874a1", size = 9521051, upload-time = "2025-10-09T00:26:25.041Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a3/37aef1404efa615f49b5758a5e0261c16dd88f389bc1861e722620e4a754/matplotlib-3.10.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6f1851eab59ca082c95df5a500106bad73672645625e04538b3ad0f69471ffcc", size = 9576878, upload-time = "2025-10-09T00:26:27.478Z" }, + { url = "https://files.pythonhosted.org/packages/33/cd/b145f9797126f3f809d177ca378de57c45413c5099c5990de2658760594a/matplotlib-3.10.7-cp311-cp311-win_amd64.whl", hash = "sha256:6516ce375109c60ceec579e699524e9d504cd7578506f01150f7a6bc174a775e", size = 8115142, upload-time = "2025-10-09T00:26:29.774Z" }, + { url = "https://files.pythonhosted.org/packages/2e/39/63bca9d2b78455ed497fcf51a9c71df200a11048f48249038f06447fa947/matplotlib-3.10.7-cp311-cp311-win_arm64.whl", hash = "sha256:b172db79759f5f9bc13ef1c3ef8b9ee7b37b0247f987fbbbdaa15e4f87fd46a9", size = 7992439, upload-time = "2025-10-09T00:26:40.32Z" }, + { url = "https://files.pythonhosted.org/packages/be/b3/09eb0f7796932826ec20c25b517d568627754f6c6462fca19e12c02f2e12/matplotlib-3.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a0edb7209e21840e8361e91ea84ea676658aa93edd5f8762793dec77a4a6748", size = 8272389, upload-time = "2025-10-09T00:26:42.474Z" }, + { url = "https://files.pythonhosted.org/packages/11/0b/1ae80ddafb8652fd8046cb5c8460ecc8d4afccb89e2c6d6bec61e04e1eaf/matplotlib-3.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c380371d3c23e0eadf8ebff114445b9f970aff2010198d498d4ab4c3b41eea4f", size = 8128247, upload-time = "2025-10-09T00:26:44.77Z" }, + { url = "https://files.pythonhosted.org/packages/7d/18/95ae2e242d4a5c98bd6e90e36e128d71cf1c7e39b0874feaed3ef782e789/matplotlib-3.10.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d5f256d49fea31f40f166a5e3131235a5d2f4b7f44520b1cf0baf1ce568ccff0", size = 8696996, upload-time = "2025-10-09T00:26:46.792Z" }, + { url = "https://files.pythonhosted.org/packages/7e/3d/5b559efc800bd05cb2033aa85f7e13af51958136a48327f7c261801ff90a/matplotlib-3.10.7-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11ae579ac83cdf3fb72573bb89f70e0534de05266728740d478f0f818983c695", size = 9530153, upload-time = "2025-10-09T00:26:49.07Z" }, + { url = "https://files.pythonhosted.org/packages/88/57/eab4a719fd110312d3c220595d63a3c85ec2a39723f0f4e7fa7e6e3f74ba/matplotlib-3.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4c14b6acd16cddc3569a2d515cfdd81c7a68ac5639b76548cfc1a9e48b20eb65", size = 9593093, upload-time = "2025-10-09T00:26:51.067Z" }, + { url = "https://files.pythonhosted.org/packages/31/3c/80816f027b3a4a28cd2a0a6ef7f89a2db22310e945cd886ec25bfb399221/matplotlib-3.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:0d8c32b7ea6fb80b1aeff5a2ceb3fb9778e2759e899d9beff75584714afcc5ee", size = 8122771, upload-time = "2025-10-09T00:26:53.296Z" }, + { url = "https://files.pythonhosted.org/packages/de/77/ef1fc78bfe99999b2675435cc52120887191c566b25017d78beaabef7f2d/matplotlib-3.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:5f3f6d315dcc176ba7ca6e74c7768fb7e4cf566c49cb143f6bc257b62e634ed8", size = 7992812, upload-time = "2025-10-09T00:26:54.882Z" }, + { url = "https://files.pythonhosted.org/packages/02/9c/207547916a02c78f6bdd83448d9b21afbc42f6379ed887ecf610984f3b4e/matplotlib-3.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1d9d3713a237970569156cfb4de7533b7c4eacdd61789726f444f96a0d28f57f", size = 8273212, upload-time = "2025-10-09T00:26:56.752Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d0/b3d3338d467d3fc937f0bb7f256711395cae6f78e22cef0656159950adf0/matplotlib-3.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37a1fea41153dd6ee061d21ab69c9cf2cf543160b1b85d89cd3d2e2a7902ca4c", size = 8128713, upload-time = "2025-10-09T00:26:59.001Z" }, + { url = "https://files.pythonhosted.org/packages/22/ff/6425bf5c20d79aa5b959d1ce9e65f599632345391381c9a104133fe0b171/matplotlib-3.10.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b3c4ea4948d93c9c29dc01c0c23eef66f2101bf75158c291b88de6525c55c3d1", size = 8698527, upload-time = "2025-10-09T00:27:00.69Z" }, + { url = "https://files.pythonhosted.org/packages/d0/7f/ccdca06f4c2e6c7989270ed7829b8679466682f4cfc0f8c9986241c023b6/matplotlib-3.10.7-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22df30ffaa89f6643206cf13877191c63a50e8f800b038bc39bee9d2d4957632", size = 9529690, upload-time = "2025-10-09T00:27:02.664Z" }, + { url = "https://files.pythonhosted.org/packages/b8/95/b80fc2c1f269f21ff3d193ca697358e24408c33ce2b106a7438a45407b63/matplotlib-3.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b69676845a0a66f9da30e87f48be36734d6748024b525ec4710be40194282c84", size = 9593732, upload-time = "2025-10-09T00:27:04.653Z" }, + { url = "https://files.pythonhosted.org/packages/e1/b6/23064a96308b9aeceeffa65e96bcde459a2ea4934d311dee20afde7407a0/matplotlib-3.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:744991e0cc863dd669c8dc9136ca4e6e0082be2070b9d793cbd64bec872a6815", size = 8122727, upload-time = "2025-10-09T00:27:06.814Z" }, + { url = "https://files.pythonhosted.org/packages/b3/a6/2faaf48133b82cf3607759027f82b5c702aa99cdfcefb7f93d6ccf26a424/matplotlib-3.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:fba2974df0bf8ce3c995fa84b79cde38326e0f7b5409e7a3a481c1141340bcf7", size = 7992958, upload-time = "2025-10-09T00:27:08.567Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f0/b018fed0b599bd48d84c08794cb242227fe3341952da102ee9d9682db574/matplotlib-3.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:932c55d1fa7af4423422cb6a492a31cbcbdbe68fd1a9a3f545aa5e7a143b5355", size = 8316849, upload-time = "2025-10-09T00:27:10.254Z" }, + { url = "https://files.pythonhosted.org/packages/b0/b7/bb4f23856197659f275e11a2a164e36e65e9b48ea3e93c4ec25b4f163198/matplotlib-3.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e38c2d581d62ee729a6e144c47a71b3f42fb4187508dbbf4fe71d5612c3433b", size = 8178225, upload-time = "2025-10-09T00:27:12.241Z" }, + { url = "https://files.pythonhosted.org/packages/62/56/0600609893ff277e6f3ab3c0cef4eafa6e61006c058e84286c467223d4d5/matplotlib-3.10.7-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:786656bb13c237bbcebcd402f65f44dd61ead60ee3deb045af429d889c8dbc67", size = 8711708, upload-time = "2025-10-09T00:27:13.879Z" }, + { url = "https://files.pythonhosted.org/packages/d8/1a/6bfecb0cafe94d6658f2f1af22c43b76cf7a1c2f0dc34ef84cbb6809617e/matplotlib-3.10.7-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09d7945a70ea43bf9248f4b6582734c2fe726723204a76eca233f24cffc7ef67", size = 9541409, upload-time = "2025-10-09T00:27:15.684Z" }, + { url = "https://files.pythonhosted.org/packages/08/50/95122a407d7f2e446fd865e2388a232a23f2b81934960ea802f3171518e4/matplotlib-3.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d0b181e9fa8daf1d9f2d4c547527b167cb8838fc587deabca7b5c01f97199e84", size = 9594054, upload-time = "2025-10-09T00:27:17.547Z" }, + { url = "https://files.pythonhosted.org/packages/13/76/75b194a43b81583478a81e78a07da8d9ca6ddf50dd0a2ccabf258059481d/matplotlib-3.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:31963603041634ce1a96053047b40961f7a29eb8f9a62e80cc2c0427aa1d22a2", size = 8200100, upload-time = "2025-10-09T00:27:20.039Z" }, + { url = "https://files.pythonhosted.org/packages/f5/9e/6aefebdc9f8235c12bdeeda44cc0383d89c1e41da2c400caf3ee2073a3ce/matplotlib-3.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:aebed7b50aa6ac698c90f60f854b47e48cd2252b30510e7a1feddaf5a3f72cbf", size = 8042131, upload-time = "2025-10-09T00:27:21.608Z" }, + { url = "https://files.pythonhosted.org/packages/0d/4b/e5bc2c321b6a7e3a75638d937d19ea267c34bd5a90e12bee76c4d7c7a0d9/matplotlib-3.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d883460c43e8c6b173fef244a2341f7f7c0e9725c7fe68306e8e44ed9c8fb100", size = 8273787, upload-time = "2025-10-09T00:27:23.27Z" }, + { url = "https://files.pythonhosted.org/packages/86/ad/6efae459c56c2fbc404da154e13e3a6039129f3c942b0152624f1c621f05/matplotlib-3.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:07124afcf7a6504eafcb8ce94091c5898bbdd351519a1beb5c45f7a38c67e77f", size = 8131348, upload-time = "2025-10-09T00:27:24.926Z" }, + { url = "https://files.pythonhosted.org/packages/a6/5a/a4284d2958dee4116359cc05d7e19c057e64ece1b4ac986ab0f2f4d52d5a/matplotlib-3.10.7-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c17398b709a6cce3d9fdb1595c33e356d91c098cd9486cb2cc21ea2ea418e715", size = 9533949, upload-time = "2025-10-09T00:27:26.704Z" }, + { url = "https://files.pythonhosted.org/packages/de/ff/f3781b5057fa3786623ad8976fc9f7b0d02b2f28534751fd5a44240de4cf/matplotlib-3.10.7-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7146d64f561498764561e9cd0ed64fcf582e570fc519e6f521e2d0cfd43365e1", size = 9804247, upload-time = "2025-10-09T00:27:28.514Z" }, + { url = "https://files.pythonhosted.org/packages/47/5a/993a59facb8444efb0e197bf55f545ee449902dcee86a4dfc580c3b61314/matplotlib-3.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:90ad854c0a435da3104c01e2c6f0028d7e719b690998a2333d7218db80950722", size = 9595497, upload-time = "2025-10-09T00:27:30.418Z" }, + { url = "https://files.pythonhosted.org/packages/0d/a5/77c95aaa9bb32c345cbb49626ad8eb15550cba2e6d4c88081a6c2ac7b08d/matplotlib-3.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:4645fc5d9d20ffa3a39361fcdbcec731382763b623b72627806bf251b6388866", size = 8252732, upload-time = "2025-10-09T00:27:32.332Z" }, + { url = "https://files.pythonhosted.org/packages/74/04/45d269b4268d222390d7817dae77b159651909669a34ee9fdee336db5883/matplotlib-3.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:9257be2f2a03415f9105c486d304a321168e61ad450f6153d77c69504ad764bb", size = 8124240, upload-time = "2025-10-09T00:27:33.94Z" }, + { url = "https://files.pythonhosted.org/packages/4b/c7/ca01c607bb827158b439208c153d6f14ddb9fb640768f06f7ca3488ae67b/matplotlib-3.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1e4bbad66c177a8fdfa53972e5ef8be72a5f27e6a607cec0d8579abd0f3102b1", size = 8316938, upload-time = "2025-10-09T00:27:35.534Z" }, + { url = "https://files.pythonhosted.org/packages/84/d2/5539e66e9f56d2fdec94bb8436f5e449683b4e199bcc897c44fbe3c99e28/matplotlib-3.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d8eb7194b084b12feb19142262165832fc6ee879b945491d1c3d4660748020c4", size = 8178245, upload-time = "2025-10-09T00:27:37.334Z" }, + { url = "https://files.pythonhosted.org/packages/77/b5/e6ca22901fd3e4fe433a82e583436dd872f6c966fca7e63cf806b40356f8/matplotlib-3.10.7-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d41379b05528091f00e1728004f9a8d7191260f3862178b88e8fd770206318", size = 9541411, upload-time = "2025-10-09T00:27:39.387Z" }, + { url = "https://files.pythonhosted.org/packages/9e/99/a4524db57cad8fee54b7237239a8f8360bfcfa3170d37c9e71c090c0f409/matplotlib-3.10.7-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4a74f79fafb2e177f240579bc83f0b60f82cc47d2f1d260f422a0627207008ca", size = 9803664, upload-time = "2025-10-09T00:27:41.492Z" }, + { url = "https://files.pythonhosted.org/packages/e6/a5/85e2edf76ea0ad4288d174926d9454ea85f3ce5390cc4e6fab196cbf250b/matplotlib-3.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:702590829c30aada1e8cef0568ddbffa77ca747b4d6e36c6d173f66e301f89cc", size = 9594066, upload-time = "2025-10-09T00:27:43.694Z" }, + { url = "https://files.pythonhosted.org/packages/39/69/9684368a314f6d83fe5c5ad2a4121a3a8e03723d2e5c8ea17b66c1bad0e7/matplotlib-3.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:f79d5de970fc90cd5591f60053aecfce1fcd736e0303d9f0bf86be649fa68fb8", size = 8342832, upload-time = "2025-10-09T00:27:45.543Z" }, + { url = "https://files.pythonhosted.org/packages/04/5f/e22e08da14bc1a0894184640d47819d2338b792732e20d292bf86e5ab785/matplotlib-3.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:cb783436e47fcf82064baca52ce748af71725d0352e1d31564cbe9c95df92b9c", size = 8172585, upload-time = "2025-10-09T00:27:47.185Z" }, + { url = "https://files.pythonhosted.org/packages/1e/6c/a9bcf03e9afb2a873e0a5855f79bce476d1023f26f8212969f2b7504756c/matplotlib-3.10.7-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5c09cf8f2793f81368f49f118b6f9f937456362bee282eac575cca7f84cda537", size = 8241204, upload-time = "2025-10-09T00:27:48.806Z" }, + { url = "https://files.pythonhosted.org/packages/5b/fd/0e6f5aa762ed689d9fa8750b08f1932628ffa7ed30e76423c399d19407d2/matplotlib-3.10.7-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:de66744b2bb88d5cd27e80dfc2ec9f0517d0a46d204ff98fe9e5f2864eb67657", size = 8104607, upload-time = "2025-10-09T00:27:50.876Z" }, + { url = "https://files.pythonhosted.org/packages/b9/a9/21c9439d698fac5f0de8fc68b2405b738ed1f00e1279c76f2d9aa5521ead/matplotlib-3.10.7-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:53cc80662dd197ece414dd5b66e07370201515a3eaf52e7c518c68c16814773b", size = 8682257, upload-time = "2025-10-09T00:27:52.597Z" }, + { url = "https://files.pythonhosted.org/packages/58/8f/76d5dc21ac64a49e5498d7f0472c0781dae442dd266a67458baec38288ec/matplotlib-3.10.7-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15112bcbaef211bd663fa935ec33313b948e214454d949b723998a43357b17b0", size = 8252283, upload-time = "2025-10-09T00:27:54.739Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/9c5d4c2317feb31d819e38c9f947c942f42ebd4eb935fc6fd3518a11eaa7/matplotlib-3.10.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d2a959c640cdeecdd2ec3136e8ea0441da59bcaf58d67e9c590740addba2cb68", size = 8116733, upload-time = "2025-10-09T00:27:56.406Z" }, + { url = "https://files.pythonhosted.org/packages/9a/cc/3fe688ff1355010937713164caacf9ed443675ac48a997bab6ed23b3f7c0/matplotlib-3.10.7-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3886e47f64611046bc1db523a09dd0a0a6bed6081e6f90e13806dd1d1d1b5e91", size = 8693919, upload-time = "2025-10-09T00:27:58.41Z" }, +] + [[package]] name = "msal" version = "1.33.0" @@ -1688,6 +2095,15 @@ crypto = [ { name = "cryptography" }, ] +[[package]] +name = "pyparsing" +version = "3.2.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274, upload-time = "2025-09-21T04:11:06.277Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890, upload-time = "2025-09-21T04:11:04.117Z" }, +] + [[package]] name = "pytest" version = "8.4.1" From 2580cc03ed7ad4656e3cd1e37be9d6000b8c11ad Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Tue, 18 Nov 2025 13:40:23 +0000 Subject: [PATCH 69/82] fix query for grounding --- src/bigdata_research_tools/mindmap/mindmap_generator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index dc4de88..7f4a4ec 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -657,7 +657,11 @@ def _run_and_collate_search( else: keywords = None - queries = [Similarity(sentence)&keywords&entities if keywords or entities else Similarity(sentence) for sentence in search_list] + queries = [Similarity(sentence) for sentence in search_list] + if entities: + queries = [query&entities for query in queries] + if keywords: + queries = [query&keywords for query in queries] all_results = run_search(queries=queries, date_ranges = date_range, From c7e046167734504ab98f9137cc14a9d48106d581 Mon Sep 17 00:00:00 2001 From: jaldana Date: Tue, 18 Nov 2025 15:40:11 +0100 Subject: [PATCH 70/82] Formatting and linting --- examples/grounded_mindmaps.py | 194 ++-- src/bigdata_research_tools/llm/base.py | 4 +- src/bigdata_research_tools/llm/openai.py | 14 +- .../mindmap/__init__.py | 6 +- src/bigdata_research_tools/mindmap/mindmap.py | 33 +- .../mindmap/mindmap_generator.py | 665 ++++++++----- .../mindmap/mindmap_utils.py | 49 +- src/bigdata_research_tools/search/search.py | 13 +- .../visuals/mindmap_visuals.py | 882 +++++++++++------- .../workflows/risk_analyzer.py | 8 +- .../workflows/thematic_screener.py | 2 +- 11 files changed, 1164 insertions(+), 706 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index 52097dd..b3750bc 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -1,8 +1,7 @@ import logging -from bigdata_client import Bigdata -from bigdata_client.models.search import DocumentType from dotenv import load_dotenv + from bigdata_research_tools.mindmap.mindmap import MindMap from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator from bigdata_research_tools.visuals.mindmap_visuals import plot_mindmap @@ -16,87 +15,167 @@ ) logger = logging.getLogger(__name__) -def test_one_shot_mindmap(main_theme, focus, map_type, instructions, llm_base_config: str = "openai::gpt-4o-mini") -> MindMap: + +def test_one_shot_mindmap( + main_theme, + focus, + map_type, + instructions, + llm_base_config: str = "openai::gpt-4o-mini", +) -> MindMap: """Test one-shot mind map generation with base LLM.""" logger.info("=" * 60) logger.info("TEST 1: One-Shot Mind Map Generation with Base LLM") logger.info("=" * 60) - mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config,) + mindmap_generator = MindMapGenerator( + llm_model_config_base=llm_base_config, + ) mindmap = mindmap_generator.generate_one_shot( - instructions=instructions, - focus=focus, - main_theme=main_theme, - map_type = map_type, - allow_grounding=False, -) - logger.info("Results: %s", mindmap['mindmap_text']) + instructions=instructions, + focus=focus, + main_theme=main_theme, + map_type=map_type, + allow_grounding=False, + ) + logger.info("Results: %s", mindmap["mindmap_text"]) return mindmap["mindmap_df"], mindmap["mindmap_json"] - - -def test_refined_mindmap(main_theme, focus, map_type, instructions, base_mindmap: str, llm_base_config: str = "openai::o3-mini") -> MindMap: + + +def test_refined_mindmap( + main_theme, + focus, + map_type, + instructions, + base_mindmap: str, + llm_base_config: str = "openai::o3-mini", +) -> MindMap: """Test refined mindmap generation with reasoning LLM sent in the base config.""" logger.info("=" * 60) logger.info("TEST 2: Refined MindMap Generation with Reasoning LLM in Base Config") logger.info("=" * 60) - mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config,) - mindmap = mindmap_generator.generate_refined(focus = focus, - main_theme = main_theme, - initial_mindmap = base_mindmap, - grounding_method = "tool_call", - output_dir = "./refined_mindmaps", - filename = "refined_mindmap.json", - map_type = map_type, - instructions = instructions, - ) - logger.info("Results: %s", mindmap['mindmap_text']) - -def test_refined_mindmap2(main_theme, focus, map_type, instructions, base_mindmap: str, llm_base_config: str | None = None, llm_reasoning_config: str = "openai::o3-mini") -> MindMap: + mindmap_generator = MindMapGenerator( + llm_model_config_base=llm_base_config, + ) + mindmap = mindmap_generator.generate_refined( + focus=focus, + main_theme=main_theme, + initial_mindmap=base_mindmap, + grounding_method="tool_call", + output_dir="./refined_mindmaps", + filename="refined_mindmap.json", + map_type=map_type, + instructions=instructions, + ) + logger.info("Results: %s", mindmap["mindmap_text"]) + + +def test_refined_mindmap2( + main_theme, + focus, + map_type, + instructions, + base_mindmap: str, + llm_base_config: str | None = None, + llm_reasoning_config: str = "openai::o3-mini", +) -> MindMap: """Test refined mindmap generation with reasoning LLM sent in the reasoning config.""" logger.info("=" * 60) - logger.info("TEST 3: Refined MindMap Generation with Reasoning LLM in Reasoning Config") + logger.info( + "TEST 3: Refined MindMap Generation with Reasoning LLM in Reasoning Config" + ) logger.info("=" * 60) - mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config, llm_model_config_reasoning=llm_reasoning_config) - mindmap = mindmap_generator.generate_refined(focus = focus, - main_theme = main_theme, - initial_mindmap = base_mindmap, - grounding_method = "tool_call", - output_dir = "./refined_mindmaps", - filename = "refined_mindmap.json", - map_type = map_type, - instructions = instructions, - ) - logger.info("Results: %s", mindmap['mindmap_text']) - -def test_dynamic_mindmap(main_theme, focus, map_type, instructions, llm_base_config: str = "openai::gpt-4o-mini", llm_reasoning_config: str = "openai::o3-mini") -> MindMap: + mindmap_generator = MindMapGenerator( + llm_model_config_base=llm_base_config, + llm_model_config_reasoning=llm_reasoning_config, + ) + mindmap = mindmap_generator.generate_refined( + focus=focus, + main_theme=main_theme, + initial_mindmap=base_mindmap, + grounding_method="tool_call", + output_dir="./refined_mindmaps", + filename="refined_mindmap.json", + map_type=map_type, + instructions=instructions, + ) + logger.info("Results: %s", mindmap["mindmap_text"]) + + +def test_dynamic_mindmap( + main_theme, + focus, + map_type, + instructions, + llm_base_config: str = "openai::gpt-4o-mini", + llm_reasoning_config: str = "openai::o3-mini", +) -> MindMap: """Test dynamic mindmap generation with two LLMs.""" logger.info("=" * 60) logger.info("TEST 4: Dynamic MindMap Generation with Two LLMs") logger.info("=" * 60) - mindmap_generator = MindMapGenerator(llm_model_config_base=llm_base_config, llm_model_config_reasoning=llm_reasoning_config) + mindmap_generator = MindMapGenerator( + llm_model_config_base=llm_base_config, + llm_model_config_reasoning=llm_reasoning_config, + ) mindmap = mindmap_generator.generate_dynamic( - instructions = instructions, - focus = focus, - main_theme = main_theme, - month_intervals = [["2025-10-01", "2025-10-31"], ["2025-11-01", "2025-11-30"], ["2025-12-01", "2025-12-31"]], - month_names = ['October_2025', 'November_2025', 'December_2025'],) - logger.info("Results: %s", mindmap['base_mindmap']) - logger.info("Results: %s", mindmap['October_2025']) + instructions=instructions, + focus=focus, + main_theme=main_theme, + month_intervals=[ + ["2025-10-01", "2025-10-31"], + ["2025-11-01", "2025-11-30"], + ["2025-12-01", "2025-12-31"], + ], + month_names=["October_2025", "November_2025", "December_2025"], + ) + logger.info("Results: %s", mindmap["base_mindmap"]) + logger.info("Results: %s", mindmap["October_2025"]) logger.info("") -def main(MAIN_THEME = "Political Change in Japan.", - INSTRUCTIONS = 'Create a mindmap according to a given risk scenario. Map by risk type for any industry and assess short term impact only.', - FOCUS = "Provide a detailed taxonomy of risks related to changes in the Japanese political landscape. Evaluate how the resignation of the Prime Minister and the pre-election of Sanae Takaichi will affect companies, their strategy and operations. Take into consideration their increased conservative stance on immigration, energy, and trade. Add any other risk areas that may arise from these political changes. The mind map should be as comprehensive as possible and cover all major risk areas.", - map_type = 'risk'): + +def main( + MAIN_THEME="Political Change in Japan.", + INSTRUCTIONS="Create a mindmap according to a given risk scenario. Map by risk type for any industry and assess short term impact only.", + FOCUS="Provide a detailed taxonomy of risks related to changes in the Japanese political landscape. Evaluate how the resignation of the Prime Minister and the pre-election of Sanae Takaichi will affect companies, their strategy and operations. Take into consideration their increased conservative stance on immigration, energy, and trade. Add any other risk areas that may arise from these political changes. The mind map should be as comprehensive as possible and cover all major risk areas.", + map_type="risk", +): """Run all tests.""" logger.info("Testing Grounded MindMap Generation") logger.info("=" * 60) try: - df_mindmap, base_mindmap = test_one_shot_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini") + df_mindmap, base_mindmap = test_one_shot_mindmap( + MAIN_THEME, + FOCUS, + map_type, + INSTRUCTIONS, + llm_base_config="openai::gpt-4o-mini", + ) plot_mindmap(df_mindmap, MAIN_THEME) - test_refined_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") - test_refined_mindmap2(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, base_mindmap, llm_base_config="openai::o3-mini") - test_dynamic_mindmap(MAIN_THEME, FOCUS, map_type, INSTRUCTIONS, llm_base_config="openai::gpt-4o-mini", llm_reasoning_config="openai::o3-mini") + test_refined_mindmap( + MAIN_THEME, + FOCUS, + map_type, + INSTRUCTIONS, + base_mindmap, + llm_base_config="openai::o3-mini", + ) + test_refined_mindmap2( + MAIN_THEME, + FOCUS, + map_type, + INSTRUCTIONS, + base_mindmap, + llm_base_config="openai::o3-mini", + ) + test_dynamic_mindmap( + MAIN_THEME, + FOCUS, + map_type, + INSTRUCTIONS, + llm_base_config="openai::gpt-4o-mini", + llm_reasoning_config="openai::o3-mini", + ) logger.info("=" * 60) logger.info("All tests completed successfully") @@ -105,5 +184,6 @@ def main(MAIN_THEME = "Political Change in Japan.", logger.error("Error during testing: %s", e) raise + if __name__ == "__main__": main() diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 3dc72fe..413f420 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -350,9 +350,7 @@ def get_tools_response( - arguments (list[dict]): List of arguments for each function - text (str): The text content of the message, if any. """ - return self.provider.get_tools_response( - chat_history, tools, **kwargs - ) + return self.provider.get_tools_response(chat_history, tools, **kwargs) class NotInitializedLLMProviderError(Exception): diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 6e4d0f7..7ec6fe6 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -197,18 +197,22 @@ def get_tools_response( ) message = response.choices[0].message output = { - "id" : [], + "id": [], "func_names": [], "arguments": [], "text": message.content, - "tool_calls": {}} - + "tool_calls": {}, + } + if function_calls := message.tool_calls if message.tool_calls else None: output = { - "id" : [f.id for f in function_calls], + "id": [f.id for f in function_calls], "func_names": [f.function.name for f in function_calls], "arguments": [loads(f.function.arguments) for f in function_calls], - "tool_calls": response.model_dump().get("choices", [])[0].get("message", {}).get("tool_calls", []) + "tool_calls": response.model_dump() + .get("choices", [])[0] + .get("message", {}) + .get("tool_calls", []), } return output diff --git a/src/bigdata_research_tools/mindmap/__init__.py b/src/bigdata_research_tools/mindmap/__init__.py index 10b1154..2e8809e 100644 --- a/src/bigdata_research_tools/mindmap/__init__.py +++ b/src/bigdata_research_tools/mindmap/__init__.py @@ -1,4 +1,8 @@ -from bigdata_research_tools.mindmap.mindmap import MindMap, generate_theme_tree, generate_risk_tree +from bigdata_research_tools.mindmap.mindmap import ( + MindMap, + generate_risk_tree, + generate_theme_tree, +) from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator __all__ = ["MindMap", "MindMapGenerator", "generate_theme_tree", "generate_risk_tree"] diff --git a/src/bigdata_research_tools/mindmap/mindmap.py b/src/bigdata_research_tools/mindmap/mindmap.py index 2846a9d..f466196 100644 --- a/src/bigdata_research_tools/mindmap/mindmap.py +++ b/src/bigdata_research_tools/mindmap/mindmap.py @@ -199,7 +199,7 @@ def visualize(self, engine: str = "graphviz") -> None: f"Unsupported engine '{engine}'. " f"Supported engines are 'graphviz', 'plotly', and 'matplotlib'." ) - + def _visualize_matplotlib(self): """ Auxiliary function to visualize the tree using Matplotlib. @@ -208,11 +208,11 @@ def _visualize_matplotlib(self): A Matplotlib Plot rendering the mindmap. """ import matplotlib - matplotlib.use('Agg') # Use non-interactive backend + + matplotlib.use("Agg") # Use non-interactive backend from bigdata_research_tools.visuals.mindmap_visuals import plot_mindmap - - plot_mindmap(self.to_dataframe(), main_theme=self.label) + plot_mindmap(self.to_dataframe(), main_theme=self.label) def _visualize_graphviz(self) -> graphviz.Digraph: """ @@ -351,29 +351,34 @@ def to_rows(self, parent_label=None): Flatten tree to rows for DataFrame: each row is (Parent, Label, Node, Summary) """ rows = [] - rows.append({ - "Parent": parent_label, - "Label": self.label, - "Node": self.node, - "Summary": self.summary - }) + rows.append( + { + "Parent": parent_label, + "Label": self.label, + "Node": self.node, + "Summary": self.summary, + } + ) for child in self.children: rows.extend(child.to_rows(parent_label=self.label)) return rows def to_dataframe(self, leaves_only=False): import pandas as pd + rows = self.to_rows(parent_label=None) # Exclude rows where Parent is None or Parent == self.label (root node) filtered = [row for row in rows if row["Parent"] not in (None, self.label)] if leaves_only: # Only keep rows that are leaves (i.e., have no children) - leaf_labels = {row["Label"] for row in filtered} - filtered = [row for row in filtered if row["Label"] not in {r["Parent"] for r in filtered}] + filtered = [ + row + for row in filtered + if row["Label"] not in {r["Parent"] for r in filtered} + ] return pd.DataFrame(filtered) - + def to_json(self): - return json.dumps(self._to_dict(), indent=2) diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 7f4a4ec..7c18f2f 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -1,8 +1,33 @@ -from typing import Any, List, Dict, Optional, Tuple -from bigdata_research_tools.llm.base import LLMConfig +import ast +import json +import os +import re +from concurrent.futures import ThreadPoolExecutor, as_completed +from logging import Logger, getLogger +from typing import Any, Dict, List, Optional, Tuple + +from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange +from bigdata_client.models.search import DocumentType, SortBy +from bigdata_client.query import ( + Any as BigdataAny, +) +from bigdata_client.query import ( + Keyword, + Similarity, +) +from tqdm import tqdm + +from bigdata_research_tools.client import bigdata_connection from bigdata_research_tools.llm import LLMEngine +from bigdata_research_tools.llm.base import LLMConfig +from bigdata_research_tools.mindmap.mindmap import MindMap, get_default_tree_config +from bigdata_research_tools.mindmap.mindmap_utils import ( + format_mindmap_to_dataframe, + load_results_from_file, + prompts_dict, + save_results_to_file, +) -from bigdata_research_tools.search.query_builder import build_batched_query # from bigdata_research_tools.search.query_builder import ( # EntitiesToSearch, # build_batched_query, @@ -10,55 +35,40 @@ # ) # cannot use query builder because it is to error-prone to build EntitiesToSearch based on the LLM output from bigdata_research_tools.search.search import run_search -from bigdata_research_tools.client import bigdata_connection -from bigdata_client.query import ( - Any, - Keyword, - Similarity, -) -from bigdata_research_tools.mindmap.mindmap_utils import format_mindmap_to_dataframe, save_results_to_file, load_results_from_file, prompts_dict -import os -import json -import re -import json -import ast -from concurrent.futures import ThreadPoolExecutor, as_completed -from tqdm import tqdm -from bigdata_research_tools.mindmap.mindmap import MindMap, get_default_tree_config -from logging import Logger, getLogger -from bigdata_client.models.search import DocumentType, SortBy -from bigdata_client.daterange import RollingDateRange, AbsoluteDateRange logger: Logger = getLogger(__name__) -bigdata_tool_description = [{ - "type": "function", - "function": { - "name": "bigdata_search", - "description": "Run a semantic similarity search on news content using Bigdata API.", - "parameters": { - "type": "object", - "properties": { - "search_list": { - "type": "array", - "items": {"type": "string"}, - "description": "The list of strings containing various detailed sentences to search in News documents.", - }, - "entities_list": { - "type": "array", - "items": {"type": "string"}, - "description": "The list of entities (People, Places or Organizations) to focus the search on. They will be added as search context with an OR logic.", - }, - "keywords_list": { - "type": "array", - "items": {"type": "string"}, - "description": "The list of keywords (one or two words defining topics or concepts) to focus the search on. They will be added as search context with an OR logic.", - } - }, - "required": ["search_list", "entities_list", "keywords_list"] - } - } - }] +bigdata_tool_description = [ + { + "type": "function", + "function": { + "name": "bigdata_search", + "description": "Run a semantic similarity search on news content using Bigdata API.", + "parameters": { + "type": "object", + "properties": { + "search_list": { + "type": "array", + "items": {"type": "string"}, + "description": "The list of strings containing various detailed sentences to search in News documents.", + }, + "entities_list": { + "type": "array", + "items": {"type": "string"}, + "description": "The list of entities (People, Places or Organizations) to focus the search on. They will be added as search context with an OR logic.", + }, + "keywords_list": { + "type": "array", + "items": {"type": "string"}, + "description": "The list of keywords (one or two words defining topics or concepts) to focus the search on. They will be added as search context with an OR logic.", + }, + }, + "required": ["search_list", "entities_list", "keywords_list"], + }, + }, + } +] + class MindMapGenerator: """ @@ -70,17 +80,22 @@ class MindMapGenerator: - Dynamic mind map evolution over time intervals (each step refines previous map with new search context) """ - def __init__(self, - llm_model_config_base: LLMConfig | dict | str = "openai::gpt-4o-mini", - llm_model_config_reasoning: Optional[LLMConfig | dict | str] = None, - ): + def __init__( + self, + llm_model_config_base: LLMConfig | dict | str = "openai::gpt-4o-mini", + llm_model_config_reasoning: Optional[LLMConfig | dict | str] = None, + ): """ Args: llm_client: Handles LLM chat and tool-calling. """ self.bigdata_connection = bigdata_connection() - - llm_model_config_reasoning = llm_model_config_reasoning if llm_model_config_reasoning else llm_model_config_base + + llm_model_config_reasoning = ( + llm_model_config_reasoning + if llm_model_config_reasoning + else llm_model_config_base + ) if isinstance(llm_model_config_base, dict): self.llm_model_config_base = LLMConfig(**llm_model_config_base) @@ -90,27 +105,35 @@ def __init__(self, if isinstance(llm_model_config_reasoning, dict): self.llm_model_config_reasoning = LLMConfig(**llm_model_config_reasoning) elif isinstance(llm_model_config_reasoning, str): - self.llm_model_config_reasoning = get_default_tree_config(llm_model_config_reasoning) - + self.llm_model_config_reasoning = get_default_tree_config( + llm_model_config_reasoning + ) + print(self.llm_model_config_base) - self.llm_base = LLMEngine(model=self.llm_model_config_base.model, **self.llm_model_config_base.connection_config) + self.llm_base = LLMEngine( + model=self.llm_model_config_base.model, + **self.llm_model_config_base.connection_config, + ) print(self.llm_model_config_reasoning) - self.llm_reasoning = LLMEngine(model=self.llm_model_config_reasoning.model, **self.llm_model_config_reasoning.connection_config) + self.llm_reasoning = LLMEngine( + model=self.llm_model_config_reasoning.model, + **self.llm_model_config_reasoning.connection_config, + ) def _parse_llm_to_themetree(self, mindmap_text: str) -> MindMap: """ Parse LLM output (expected to be a valid JSON object) into a MindMap. Strictly enforce JSON/dict structure, required fields, and allowed keys. If parsing or validation fails, raises an error with details. """ - import collections.abc + text = mindmap_text.strip() # Remove code block markers and language tags (minimal cleaning) - text = re.sub(r'^```[a-zA-Z]*\s*', '', text) - text = re.sub(r'```$', '', text) + text = re.sub(r"^```[a-zA-Z]*\s*", "", text) + text = re.sub(r"```$", "", text) # Remove accidental language tags at the start (e.g., "json\n{") - text = re.sub(r'^[a-zA-Z]+\s*\n*{', '{', text) + text = re.sub(r"^[a-zA-Z]+\s*\n*{", "{", text) # Remove any prefix before the first { or [ - text = re.sub(r'^[^({\[]*({|\[)', r'\1', text, flags=re.DOTALL) + text = re.sub(r"^[^({\[]*({|\[)", r"\1", text, flags=re.DOTALL) # Try JSON, then ast.literal_eval try: tree_dict = json.loads(text) @@ -118,23 +141,32 @@ def _parse_llm_to_themetree(self, mindmap_text: str) -> MindMap: try: tree_dict = ast.literal_eval(text) except Exception as e: - raise ValueError(f"Failed to parse LLM output as JSON or Python dict.\nRaw output:\n{mindmap_text}\nCLEANED OUTPUT:\n{text}\nError: {e}") + raise ValueError( + f"Failed to parse LLM output as JSON or Python dict.\nRaw output:\n{mindmap_text}\nCLEANED OUTPUT:\n{text}\nError: {e}" + ) # --- Strict validation of required fields and allowed keys --- allowed_keys = {"label", "node", "summary", "children"} + def validate_node(node, path="root"): if not isinstance(node, dict): raise ValueError(f"Node at {path} is not a dict: {node}") # Check for illegal keys illegal_keys = set(node.keys()) - allowed_keys if illegal_keys: - raise ValueError(f"Illegal key(s) {illegal_keys} at {path}. Node: {node}") + raise ValueError( + f"Illegal key(s) {illegal_keys} at {path}. Node: {node}" + ) # Check for required fields for key in allowed_keys: if key not in node or node[key] is None: - raise ValueError(f"Missing or null required field '{key}' at {path}. Node: {node}") + raise ValueError( + f"Missing or null required field '{key}' at {path}. Node: {node}" + ) if not isinstance(node["children"], list): - raise ValueError(f"'children' field at {path} is not a list. Node: {node}") + raise ValueError( + f"'children' field at {path} is not a list. Node: {node}" + ) for idx, child in enumerate(node["children"]): validate_node(child, path=f"{path} -> children[{idx}]") @@ -146,15 +178,20 @@ def dict_keys_to_lowercase(d): return [dict_keys_to_lowercase(i) for i in d] else: return d + tree_dict = dict_keys_to_lowercase(tree_dict) try: validate_node(tree_dict) except Exception as e: - raise ValueError(f"Mind map structure validation failed: {e}\nParsed dict:\n{json.dumps(tree_dict, indent=2)}") + raise ValueError( + f"Mind map structure validation failed: {e}\nParsed dict:\n{json.dumps(tree_dict, indent=2)}" + ) try: theme_tree = MindMap.from_dict(tree_dict) except Exception as e: - raise ValueError(f"Failed to build ThemeTree from dict: {e}\nParsed dict:\n{json.dumps(tree_dict, indent=2)}") + raise ValueError( + f"Failed to build ThemeTree from dict: {e}\nParsed dict:\n{json.dumps(tree_dict, indent=2)}" + ) return theme_tree def _themetree_to_dataframe(self, theme_tree: MindMap): @@ -164,103 +201,149 @@ def _themetree_to_dataframe(self, theme_tree: MindMap): try: df = theme_tree.to_dataframe() except Exception as e: - raise ValueError(f"Failed to convert ThemeTree to DataFrame: {e}\nThemeTree:\n{theme_tree}") + raise ValueError( + f"Failed to convert ThemeTree to DataFrame: {e}\nThemeTree:\n{theme_tree}" + ) return df - - def compose_base_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str]) -> list: + + def compose_base_message( + self, main_theme: str, focus: str, map_type: str, instructions: Optional[str] + ) -> list: # Explicit, step-by-step prompt (robust, as in working repo, minus Keywords) - enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + enforce_structure = prompts_dict[map_type]["enforce_structure_string"] messages = [ - {"role": "system", "content": f"{instructions} {focus}\n{enforce_structure}"}, - {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)} + { + "role": "system", + "content": f"{instructions} {focus}\n{enforce_structure}", + }, + { + "role": "user", + "content": prompts_dict[map_type]["user_prompt_message"].format( + main_theme=main_theme + ), + }, ] return messages - - def compose_tool_call_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str], initial_mindmap: Optional[str]) -> list: - enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + + def compose_tool_call_message( + self, + main_theme: str, + focus: str, + map_type: str, + instructions: Optional[str], + initial_mindmap: Optional[str], + ) -> list: + enforce_structure = prompts_dict[map_type]["enforce_structure_string"] tool_prompt = f"{instructions} {focus} You can use news search to find relevant information about the topic. \nUse the Bigdata API to search for news articles related to the topic and use them to inform your response." if initial_mindmap: + tool_prompt += f"Starting from the following mind map:\n{initial_mindmap}" - tool_prompt+=f"Starting from the following mind map:\n{initial_mindmap}" - - tool_prompt+=f"\nReturn a list of searches you would like to perform to enhance it.\n{enforce_structure}" + tool_prompt += f"\nReturn a list of searches you would like to perform to enhance it.\n{enforce_structure}" messages = [ {"role": "system", "content": tool_prompt}, - {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)} + { + "role": "user", + "content": prompts_dict[map_type]["user_prompt_message"].format( + main_theme=main_theme + ), + }, ] return messages - def send_tool_call(self, messages: list, llm_client:LLMEngine, llm_kwargs: dict) -> list: - - llm_kwargs.update({"tool_choice": {"type": "function", "function": {"name": "bigdata_search"}}}) + def send_tool_call( + self, messages: list, llm_client: LLMEngine, llm_kwargs: dict + ) -> list: + llm_kwargs.update( + { + "tool_choice": { + "type": "function", + "function": {"name": "bigdata_search"}, + } + } + ) response_dict = llm_client.get_tools_response( - messages,tools=bigdata_tool_description, **llm_kwargs) + messages, tools=bigdata_tool_description, **llm_kwargs + ) try: if response_dict["tool_calls"] is not None: - tool_call_id = response_dict["id"][0] arguments = response_dict["arguments"][0] search_list = arguments.get("search_list", []) entities_list = arguments.get("entities_list", []) keywords_list = arguments.get("keywords_list", []) - return tool_call_id, response_dict["tool_calls"], search_list, entities_list, keywords_list + return ( + tool_call_id, + response_dict["tool_calls"], + search_list, + entities_list, + keywords_list, + ) else: print("No tool call found in the response.") - + return None, None, response_dict["text"], None, None except Exception as e: raise RuntimeError(f"Failed to parse OpenAI tool call response: {e}") - def compose_final_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str], tool_calls, tool_call_id, context) -> list: - enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + def compose_final_message( + self, + main_theme: str, + focus: str, + map_type: str, + instructions: Optional[str], + tool_calls, + tool_call_id, + context, + ) -> list: + enforce_structure = prompts_dict[map_type]["enforce_structure_string"] final_message = [ - {"role": "system", "content": f"{instructions} {focus}. IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant. \n{enforce_structure}"}, - {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)}, - { - "role": "assistant", - "content": None, - "tool_calls": tool_calls - }, - { - "role": "tool", - "tool_call_id": tool_call_id, - "content": context - } - ] - + { + "role": "system", + "content": f"{instructions} {focus}. IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant. \n{enforce_structure}", + }, + { + "role": "user", + "content": prompts_dict[map_type]["user_prompt_message"].format( + main_theme=main_theme + ), + }, + {"role": "assistant", "content": None, "tool_calls": tool_calls}, + {"role": "tool", "tool_call_id": tool_call_id, "content": context}, + ] + return final_message - - def compose_refinement_message(self, main_theme: str, focus: str, map_type: str, instructions: Optional[str], initial_mindmap: str, context: str, tool_calls, tool_call_id) -> list: - enforce_structure = prompts_dict[map_type]['enforce_structure_string'] + def compose_refinement_message( + self, + main_theme: str, + focus: str, + map_type: str, + instructions: Optional[str], + initial_mindmap: str, + context: str, + tool_calls, + tool_call_id, + ) -> list: + enforce_structure = prompts_dict[map_type]["enforce_structure_string"] refine_prompt = ( - f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus} " - "Based on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text." - "IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant." - f"{enforce_structure}." - - ) + f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus} " + "Based on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text." + "IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant." + f"{enforce_structure}." + ) refinement_messages = [ - {"role": "system", "content": refine_prompt}, - {"role": "user", "content": initial_mindmap}, - { - "role": "assistant", - "content": None, - "tool_calls": tool_calls - }, - { - "role": "tool", - "tool_call_id": tool_call_id, - "content": context - } - ] - + {"role": "system", "content": refine_prompt}, + {"role": "user", "content": initial_mindmap}, + {"role": "assistant", "content": None, "tool_calls": tool_calls}, + {"role": "tool", "tool_call_id": tool_call_id, "content": context}, + ] + return refinement_messages def generate_one_shot( @@ -278,21 +361,39 @@ def generate_one_shot( If allow_grounding is True, use the specified grounding_method ("tool_call" or "chat"). Optionally log intermediate steps to disk. """ - - + messages = self.compose_base_message(main_theme, focus, map_type, instructions) - llm_kwargs = self.llm_model_config_base.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + llm_kwargs = self.llm_model_config_base.get_llm_kwargs( + remove_max_tokens=True, remove_timeout=True + ) if allow_grounding: if grounding_method == "tool_call": - messages.append({"role": "user", "content": "You can use news search to find relevant information about the topic. " - "Use the Bigdata API to search for news articles related to the topic and use them to inform your response. You will need to specify a list of sentences, a list of entities, and a list of keywords."}) - tool_call_id, tool_calls, search_list, entities_list, keywords_list = self.send_tool_call(messages,self.llm_base, llm_kwargs) - + messages.append( + { + "role": "user", + "content": "You can use news search to find relevant information about the topic. " + "Use the Bigdata API to search for news articles related to the topic and use them to inform your response. You will need to specify a list of sentences, a list of entities, and a list of keywords.", + } + ) + tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( + self.send_tool_call(messages, self.llm_base, llm_kwargs) + ) + if search_list and isinstance(search_list, list): - context = self._run_and_collate_search(search_list, entities_list, keywords_list, date_range=date_range) - - final_messages = self.compose_final_message(main_theme, focus, map_type, instructions, tool_calls, tool_call_id, context) + context = self._run_and_collate_search( + search_list, entities_list, keywords_list, date_range=date_range + ) + + final_messages = self.compose_final_message( + main_theme, + focus, + map_type, + instructions, + tool_calls, + tool_call_id, + context, + ) mindmap_text = self.llm_base.get_response(final_messages) @@ -301,24 +402,26 @@ def generate_one_shot( return { "mindmap_text": mindmap_text, "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), ##where does this come from? + "mindmap_json": theme_tree.to_json(), ##where does this come from? "grounded": True, "search_queries": search_list, - "search_context": context + "search_context": context, } else: - #decide if this fallback should be simplified + # decide if this fallback should be simplified mindmap_text = search_list if isinstance(search_list, str) else "" - theme_tree = self._parse_llm_to_themetree(mindmap_text) ## check if correct + theme_tree = self._parse_llm_to_themetree( + mindmap_text + ) ## check if correct df = format_mindmap_to_dataframe(mindmap_text) return { "mindmap_text": mindmap_text, "mindmap_df": df, "mindmap_json": theme_tree.to_json(), - "grounded": False + "grounded": False, } else: - #decide if this fallback should be simplified + # decide if this fallback should be simplified messages[0]["content"] += ( " You may request news search to ground your mind map. " "If you want to search, return a list of queries." @@ -329,11 +432,19 @@ def generate_one_shot( if queries: context = self._run_and_collate_search(queries, [], []) - + followup_messages = [ {"role": "system", "content": f"{instructions} {focus}"}, - {"role": "user", "content": prompts_dict[map_type]['user_prompt_message'].format(main_theme=main_theme)}, - {"role": "assistant", "content": "News search results:\n" + context} + { + "role": "user", + "content": prompts_dict[map_type][ + "user_prompt_message" + ].format(main_theme=main_theme), + }, + { + "role": "assistant", + "content": "News search results:\n" + context, + }, ] mindmap_text = self.llm_base.get_response(followup_messages) @@ -344,11 +455,11 @@ def generate_one_shot( "mindmap_json": theme_tree.to_json(), "grounded": True, "search_queries": queries, - "search_context": context + "search_context": context, } # Default: just generate mind map mindmap_text = self.llm_base.get_response(messages) - + theme_tree = self._parse_llm_to_themetree(mindmap_text) df = self._themetree_to_dataframe(theme_tree) return { @@ -356,7 +467,7 @@ def generate_one_shot( "mindmap_tree": theme_tree, "mindmap_json": theme_tree.to_json(), "mindmap_df": df, - "grounded": False + "grounded": False, } def generate_refined( @@ -365,32 +476,52 @@ def generate_refined( main_theme: str, initial_mindmap: str, grounding_method: str = "tool_call", - output_dir:str = "./refined_mindmaps", - filename:str = "refined_mindmap.json", + output_dir: str = "./refined_mindmaps", + filename: str = "refined_mindmap.json", map_type: str = "risk", instructions: Optional[str] = None, search_scope: Optional[Any] = None, sortby: Optional[Any] = None, date_range: Optional[Any] = None, chunk_limit: Optional[int] = 20, - **llm_kwargs + **llm_kwargs, ) -> Dict[str, Any]: """ Refine an initial mind map: LLM proposes searches, search is run, LLM refines mind map with search results. Optionally log intermediate steps to disk. """ - messages = self.compose_tool_call_message(main_theme, focus, map_type, instructions, initial_mindmap) - llm_kwargs = self.llm_model_config_reasoning.get_llm_kwargs(remove_max_tokens=True, remove_timeout=True) + messages = self.compose_tool_call_message( + main_theme, focus, map_type, instructions, initial_mindmap + ) + llm_kwargs = self.llm_model_config_reasoning.get_llm_kwargs( + remove_max_tokens=True, remove_timeout=True + ) if grounding_method == "tool_call": - tool_call_id, tool_calls, search_list, entities_list, keywords_list = self.send_tool_call( - messages,self.llm_reasoning, llm_kwargs=llm_kwargs) - + tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( + self.send_tool_call(messages, self.llm_reasoning, llm_kwargs=llm_kwargs) + ) + if search_list and isinstance(search_list, list): context = self._run_and_collate_search( - search_list, entities_list, keywords_list, search_scope, sortby, date_range, chunk_limit + search_list, + entities_list, + keywords_list, + search_scope, + sortby, + date_range, + chunk_limit, + ) + + refinement_messages = self.compose_refinement_message( + main_theme, + focus, + map_type, + instructions, + initial_mindmap, + context, + tool_calls, + tool_call_id, ) - - refinement_messages = self.compose_refinement_message(main_theme, focus, map_type, instructions, initial_mindmap, context, tool_calls, tool_call_id) mindmap_text = self.llm_reasoning.get_response(refinement_messages) theme_tree = self._parse_llm_to_themetree(mindmap_text) @@ -400,7 +531,7 @@ def generate_refined( "mindmap_df": df, "mindmap_json": theme_tree.to_json(), "search_queries": search_list, - "search_context": context + "search_context": context, } save_results_to_file(result_dict, output_dir, filename) return result_dict @@ -412,7 +543,7 @@ def generate_refined( "mindmap_df": df, "mindmap_json": theme_tree.to_json(), "search_queries": [], - "search_context": "" + "search_context": "", } save_results_to_file(result_dict, output_dir, filename) return result_dict @@ -424,7 +555,16 @@ def generate_refined( search_queries, [], [], search_scope, sortby, date_range, chunk_limit ) - refinement_messages = self.compose_refinement_message(main_theme, focus, map_type, instructions, initial_mindmap, context, tool_calls, tool_call_id) + refinement_messages = self.compose_refinement_message( + main_theme, + focus, + map_type, + instructions, + initial_mindmap, + context, + tool_calls, + tool_call_id, + ) mindmap_text = self.llm_reasoning.get_response(refinement_messages) theme_tree = self._parse_llm_to_themetree(mindmap_text) @@ -434,26 +574,29 @@ def generate_refined( "mindmap_df": df, "mindmap_json": theme_tree.to_json(), "search_queries": search_queries, - "search_context": context + "search_context": context, } save_results_to_file(result_dict, output_dir, filename) return result_dict - - def generate_or_load_refined(self, instructions: str, - focus: str, - main_theme: str, - map_type: str, - initial_mindmap: str, - llm_model: str = "o3-mini", - reasoning_effort: str = "high", - search_scope: Any = None, - sortby: Any = None, - date_range: Any = None, - chunk_limit: int = 20, - grounding_method: str = "tool_call", - output_dir:str = "./bootstrapped_mindmaps", - filename: str = "refined_mindmap", - i: int = 0): + + def generate_or_load_refined( + self, + instructions: str, + focus: str, + main_theme: str, + map_type: str, + initial_mindmap: str, + llm_model: str = "o3-mini", + reasoning_effort: str = "high", + search_scope: Any = None, + sortby: Any = None, + date_range: Any = None, + chunk_limit: int = 20, + grounding_method: str = "tool_call", + output_dir: str = "./bootstrapped_mindmaps", + filename: str = "refined_mindmap", + i: int = 0, + ): if f"{filename}_{i}.json" in os.listdir(output_dir): result = load_results_from_file(output_dir, f"{filename}_{i}.json") print(f"Loaded existing result for {filename}_{i}.json") @@ -469,9 +612,9 @@ def generate_or_load_refined(self, instructions: str, grounding_method=grounding_method, date_range=date_range, output_dir=output_dir, - filename = f"{filename}_{i}.json" + filename=f"{filename}_{i}.json", ) - #save_results_to_file(result, output_dir, ) + # save_results_to_file(result, output_dir, ) except Exception as e: print(e) result = self.generate_refined( @@ -484,42 +627,45 @@ def generate_or_load_refined(self, instructions: str, grounding_method=grounding_method, date_range=date_range, output_dir=output_dir, - filename = f"{filename}_{i}.json" + filename=f"{filename}_{i}.json", ) - #save_results_to_file(result, output_dir, f"{filename}_{i}.json") + # save_results_to_file(result, output_dir, f"{filename}_{i}.json") return result - def bootstrap_refined(self, instructions: str, - focus: str, - main_theme: str, - map_type: str, - initial_mindmap: str, - search_scope: Any = None, - sortby: Any = None, - date_range: Any = None, - chunk_limit: int = 20, - grounding_method: str = "tool_call", - output_dir: str = "./bootstrapped_mindmaps", - filename: str = "refined_mindmap", - n_elements: int = 50, - max_workers: int = 10): + def bootstrap_refined( + self, + instructions: str, + focus: str, + main_theme: str, + map_type: str, + initial_mindmap: str, + search_scope: Any = None, + sortby: Any = None, + date_range: Any = None, + chunk_limit: int = 20, + grounding_method: str = "tool_call", + output_dir: str = "./bootstrapped_mindmaps", + filename: str = "refined_mindmap", + n_elements: int = 50, + max_workers: int = 10, + ): """ Generate multiple refined mindmaps in parallel using ThreadPoolExecutor. - + Generates n_elements mindmaps by calling generate_or_load_refined for each index. Uses a thread pool to parallelize the generation process for better efficiency. Each mindmap is saved with an index suffix to the output_dir. - + Returns a list of all generated mindmap results. """ # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) - + refined_results = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: # Create a mapping of futures to their corresponding indices future_to_index = {} - + # Submit all tasks and track which future corresponds to which index for i in range(n_elements): future = executor.submit( @@ -536,13 +682,15 @@ def bootstrap_refined(self, instructions: str, grounding_method=grounding_method, output_dir=output_dir, filename=filename, - i=i + i=i, ) future_to_index[future] = i # Process futures as they complete for future in tqdm( - as_completed(future_to_index), total=n_elements, desc="Bootstrapping Refined Mindmaps..." + as_completed(future_to_index), + total=n_elements, + desc="Bootstrapping Refined Mindmaps...", ): i = future_to_index[future] try: @@ -552,7 +700,7 @@ def bootstrap_refined(self, instructions: str, print(f"Error in generating mindmap {i}: {e}") return refined_results - + def generate_dynamic( self, instructions: str, @@ -566,7 +714,7 @@ def generate_dynamic( grounding_method: str = "tool_call", map_type: str = "risk", output_dir: str = "./dynamic_mindmaps", - **llm_kwargs + **llm_kwargs, ) -> List[Dict[str, Any]]: """ Dynamic/iterative mind map generation over time intervals. @@ -579,24 +727,27 @@ def generate_dynamic( instructions, focus, main_theme, map_type=map_type, **llm_kwargs ) prev_mindmap = one_shot["mindmap_text"] - results['base_mindmap'] = one_shot + results["base_mindmap"] = one_shot # Step 2: For each subsequent interval, refine using previous mind map and new search, including starting month - for i, (interval, month_name) in enumerate(zip(month_intervals, month_names), start=0): + for i, (interval, month_name) in enumerate( + zip(month_intervals, month_names), start=0 + ): date_range = self._make_absolute_date_range(interval) - refined = self.generate_refined(focus = focus, - main_theme=main_theme, - initial_mindmap=prev_mindmap, - grounding_method=grounding_method, - output_dir=output_dir, - filename=f"{month_name}.json", - map_type=map_type, - instructions=instructions, - search_scope=search_scope, - sortby=sortby, - date_range=date_range, - chunk_limit=chunk_limit, - **llm_kwargs - ) + refined = self.generate_refined( + focus=focus, + main_theme=main_theme, + initial_mindmap=prev_mindmap, + grounding_method=grounding_method, + output_dir=output_dir, + filename=f"{month_name}.json", + map_type=map_type, + instructions=instructions, + search_scope=search_scope, + sortby=sortby, + date_range=date_range, + chunk_limit=chunk_limit, + **llm_kwargs, + ) results[month_name] = refined prev_mindmap = refined["mindmap_text"] @@ -610,7 +761,7 @@ def _run_and_collate_search( search_scope: Any = None, sortby: Any = None, date_range: Any = None, - chunk_limit: int = 20 + chunk_limit: int = 20, ) -> str: """ Run Bigdata search for each query and collate results for LLM context. @@ -625,10 +776,19 @@ def _run_and_collate_search( # --- Robust date_range parsing --- # If date_range is a list of one tuple, unpack it - if isinstance(date_range, list) and len(date_range) == 1 and isinstance(date_range[0], (tuple, list)) and len(date_range[0]) == 2: + if ( + isinstance(date_range, list) + and len(date_range) == 1 + and isinstance(date_range[0], (tuple, list)) + and len(date_range[0]) == 2 + ): date_range = date_range[0] # If date_range is a tuple/list of two strings, convert to AbsoluteDateRange - if isinstance(date_range, (tuple, list)) and len(date_range) == 2 and all(isinstance(x, str) for x in date_range): + if ( + isinstance(date_range, (tuple, list)) + and len(date_range) == 2 + and all(isinstance(x, str) for x in date_range) + ): date_range = AbsoluteDateRange(start=date_range[0], end=date_range[1]) elif date_range is None: date_range = RollingDateRange.LAST_THIRTY_DAYS @@ -638,41 +798,51 @@ def _run_and_collate_search( entity_objs = [] for entity_name in entities_list: try: - entity = self.bigdata_connection.knowledge_graph.autosuggest(entity_name, limit=1)[0] + entity = self.bigdata_connection.knowledge_graph.autosuggest( + entity_name, limit=1 + )[0] entity_objs.append(entity) except Exception as e: print(f"Warning: Autosuggest failed for '{entity_name}': {e}") continue - print(f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}") - confirmed_entities = [entity for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name] + print( + f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}" + ) + confirmed_entities = [ + entity + for entity, orig_str in zip(entity_objs, entities_list) + if entity.name in orig_str or orig_str in entity.name + ] if confirmed_entities: - entities = Any(confirmed_entities) + entities = BigdataAny(confirmed_entities) else: entities = None else: entities = None if keywords_list: print(f"Searching with keywords: {keywords_list}") - keywords = Any([Keyword(kw) for kw in keywords_list]) + keywords = BigdataAny([Keyword(kw) for kw in keywords_list]) else: keywords = None - + queries = [Similarity(sentence) for sentence in search_list] if entities: - queries = [query&entities for query in queries] + queries = [query & entities for query in queries] if keywords: - queries = [query&keywords for query in queries] - - all_results = run_search(queries=queries, - date_ranges = date_range, - sortby = sortby, - scope = scope, - limit = chunk_limit, - only_results = False, - rerank_threshold = None) + queries = [query & keywords for query in queries] + + all_results = run_search( + queries=queries, + date_ranges=date_range, + sortby=sortby, + scope=scope, + limit=chunk_limit, + only_results=False, + rerank_threshold=None, + ) return self.collate_results(all_results) - + def collate_results(self, results: List[Tuple[str, Any]]) -> str: """ Collate a list of (query, result) tuples into a single string for LLM context. @@ -687,8 +857,8 @@ def collate_results(self, results: List[Tuple[str, Any]]) -> str: for (text_query, date_range), result in results.items(): for item in text_query.items: dictitem = item.to_dict() - if dictitem['type']=='similarity': - sentence = dictitem['value'] + if dictitem["type"] == "similarity": + sentence = dictitem["value"] docstr = f"###Query: {sentence}\n ### Results:\n" for doc in result: headline = getattr(doc, "headline", "No headline") @@ -706,6 +876,7 @@ def _parse_queries(self, queries_json: str) -> List[str]: Parse LLM output (JSON or text) into a list of search queries. """ import json + try: queries = json.loads(queries_json) if isinstance(queries, list): @@ -724,4 +895,4 @@ def _make_absolute_date_range(interval: Tuple[str, str]) -> Any: """ Helper to create an AbsoluteDateRange object from a (start, end) tuple. """ - return AbsoluteDateRange(start=interval[0], end=interval[1]) \ No newline at end of file + return AbsoluteDateRange(start=interval[0], end=interval[1]) diff --git a/src/bigdata_research_tools/mindmap/mindmap_utils.py b/src/bigdata_research_tools/mindmap/mindmap_utils.py index 7265641..4f09b07 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_utils.py +++ b/src/bigdata_research_tools/mindmap/mindmap_utils.py @@ -1,11 +1,15 @@ -import pandas as pd -from io import StringIO -import os import json +import os +from io import StringIO -prompts_dict = {'theme':{'qualifier':'Main Theme', - 'user_prompt_message':'Your given Theme is: {main_theme}', - 'enforce_structure_string':("""IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" +import pandas as pd + +prompts_dict = { + "theme": { + "qualifier": "Main Theme", + "user_prompt_message": "Your given Theme is: {main_theme}", + "enforce_structure_string": ( + """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" "- `node`: an integer representing the unique identifier for the node.\n" "- `label`: a string for the name of the sub-theme.\n" "- `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme.\n" @@ -33,10 +37,13 @@ " ]}\n" " ]\n" "}\n" - """)}, - 'risk':{'qualifier':'Risk Scenario', - 'user_prompt_message':'Your given Risk Scenario is: {main_theme}', - 'enforce_structure_string':( + """ + ), + }, + "risk": { + "qualifier": "Risk Scenario", + "user_prompt_message": "Your given Risk Scenario is: {main_theme}", + "enforce_structure_string": ( """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" " - `node`: an integer representing the unique identifier for the node.\n" " - `label`: a string for the name of the sub-theme.\n" @@ -64,10 +71,12 @@ " ]}\n" " ]\n" "}\n" - """) - } + """ + ), + }, } + def format_mindmap_to_dataframe(mindmap_text): """ Parse a mind map in pipe-delimited table format into a cleaned pandas DataFrame. @@ -83,18 +92,20 @@ def format_mindmap_to_dataframe(mindmap_text): ValueError: If the resulting DataFrame does not contain the required columns. """ try: - df = pd.read_csv(StringIO(mindmap_text.strip()), sep="|", engine="python", skiprows=[1]) - df = df.loc[:, ~df.columns.str.contains('^Unnamed')] - except Exception as e: + df = pd.read_csv( + StringIO(mindmap_text.strip()), sep="|", engine="python", skiprows=[1] + ) + df = df.loc[:, ~df.columns.str.contains("^Unnamed")] + except Exception: try: df = pd.read_csv( StringIO(mindmap_text.strip()), sep="|", engine="python", skiprows=[1], - on_bad_lines='skip' + on_bad_lines="skip", ) - df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + df = df.loc[:, ~df.columns.str.contains("^Unnamed")] except Exception as e2: raise ValueError(f"Failed to parse mindmap text to DataFrame: {e2}") required_columns = {"Main Branches", "Sub-Branches", "Description"} @@ -102,6 +113,7 @@ def format_mindmap_to_dataframe(mindmap_text): raise ValueError(f"Missing required columns in mindmap table: {df.columns}") return df + def save_results_to_file(results, output_dir, filename): """ Save the results to a JSON file. @@ -112,10 +124,11 @@ def save_results_to_file(results, output_dir, filename): with open(output_file, "w") as f: json.dump(results, f, default=str, indent=2) + def load_results_from_file(output_dir, filename): """ Load the results from a JSON file. """ input_file = os.path.join(output_dir, filename) with open(input_file, "r") as f: - return json.load(f) \ No newline at end of file + return json.load(f) diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 6166bc2..2e545f1 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -40,7 +40,12 @@ | list[tuple[datetime, datetime] | AbsoluteDateRange | RollingDateRange] ) -INPUT_DATE_RANGE = tuple[datetime, datetime] | RollingDateRange | AbsoluteDateRange | NORMALIZED_DATE_RANGE +INPUT_DATE_RANGE = ( + tuple[datetime, datetime] + | RollingDateRange + | AbsoluteDateRange + | NORMALIZED_DATE_RANGE +) SEARCH_QUERY_RESULTS_TYPE = dict[ tuple[QueryComponent, AbsoluteDateRange | RollingDateRange], list[Document] @@ -250,12 +255,14 @@ def concurrent_search( as_completed(futures), total=len(futures), desc="Querying Bigdata..." ): query, date_range = futures[future] - + try: if isinstance(date_range, AbsoluteDateRange): date_range = f"{date_range.start_dt.isoformat()}_{date_range.end_dt.isoformat()}" elif isinstance(date_range, tuple): - date_range = f"{date_range[0].isoformat()}_{date_range[1].isoformat()}" + date_range = ( + f"{date_range[0].isoformat()}_{date_range[1].isoformat()}" + ) results[(query, date_range)] = future.result() except Exception as e: diff --git a/src/bigdata_research_tools/visuals/mindmap_visuals.py b/src/bigdata_research_tools/visuals/mindmap_visuals.py index 3e1a2f7..3d9ab0a 100644 --- a/src/bigdata_research_tools/visuals/mindmap_visuals.py +++ b/src/bigdata_research_tools/visuals/mindmap_visuals.py @@ -17,7 +17,7 @@ 2. **Text Fitting Algorithm**: - Uses binary search (30-40 iterations) to find optimal font size for each text element - Text wrapping respects word boundaries only (no mid-word breaks) - - Separate fitting strategies for main branches (strict, 88% safety margin) and + - Separate fitting strategies for main branches (strict, 88% safety margin) and sub-branches (prioritizes larger fonts, 90% safety margin, minimal wrapping) - Measures actual rendered text dimensions on the target figure for accuracy @@ -36,23 +36,27 @@ The algorithm guarantees no text overflow, no ellipsis, and proper spacing while maintaining visual hierarchy and readability. """ + import matplotlib -matplotlib.use('Agg') # Use non-interactive backend -import matplotlib.pyplot as plt -import matplotlib.patches as patches -from matplotlib import rcParams -import pandas as pd -import numpy as np + +matplotlib.use("Agg") # Use non-interactive backend import math import os -from typing import Tuple, Dict, List, Optional +from typing import Dict, Tuple + +import matplotlib.patches as patches +import matplotlib.pyplot as plt +import pandas as pd +from matplotlib import rcParams + from bigdata_research_tools.mindmap.mindmap import MindMap ##Note: In case the mindmap overflows from the chart area and edges, you can try to adjust the padding system by shrinking the 'available chart area'. For example, if padding does not work, you can try to have the algorithm think that the available area is 95% of the original available area. In this way, the algorithm will have less space to work with and will try to fit the text within the available area. + class MindmapPlotter: """Main class for plotting mindmaps with graphviz-inspired layout.""" - + def __init__( self, mindmap: pd.DataFrame | MindMap, @@ -60,68 +64,74 @@ def __init__( title: str = "Mind Map", color_scheme: str = "gold", output_dir: str = "./outputs", - aspect_ratio: float = 8/9 + aspect_ratio: float = 8 / 9, ): """Initialize the mindmap plotter.""" if isinstance(mindmap, pd.DataFrame): self.df = mindmap.copy() elif isinstance(mindmap, MindMap): self.df = mindmap.to_dataframe() - + # Handle flexible column names: either (Parent, Label) or (Main Branches, Sub-Branches) - if 'Main Branches' in self.df.columns and 'Sub-Branches' in self.df.columns: + if "Main Branches" in self.df.columns and "Sub-Branches" in self.df.columns: # Already in correct format pass - elif 'Parent' in self.df.columns and 'Label' in self.df.columns: + elif "Parent" in self.df.columns and "Label" in self.df.columns: # Rename to standard format - self.df = self.df.rename(columns={'Parent': 'Main Branches', 'Label': 'Sub-Branches'}) + self.df = self.df.rename( + columns={"Parent": "Main Branches", "Label": "Sub-Branches"} + ) else: # Check what columns we have - has_main = 'Main Branches' in self.df.columns - has_sub = 'Sub-Branches' in self.df.columns - has_parent = 'Parent' in self.df.columns - has_label = 'Label' in self.df.columns - + has_main = "Main Branches" in self.df.columns + has_sub = "Sub-Branches" in self.df.columns + has_parent = "Parent" in self.df.columns + has_label = "Label" in self.df.columns + raise ValueError( f"DataFrame must have either (Parent, Label) or (Main Branches, Sub-Branches) columns. " f"Found columns: {list(self.df.columns)}. " f"Has Main Branches: {has_main}, Has Sub-Branches: {has_sub}, " f"Has Parent: {has_parent}, Has Label: {has_label}" ) - + # Assert required columns exist - assert 'Main Branches' in self.df.columns, "Missing 'Main Branches' column after processing" - assert 'Sub-Branches' in self.df.columns, "Missing 'Sub-Branches' column after processing" - - self.df['Main Branches'] = self.df['Main Branches'].astype(str).str.strip() - self.df['Sub-Branches'] = self.df['Sub-Branches'].astype(str).str.strip() - + assert "Main Branches" in self.df.columns, ( + "Missing 'Main Branches' column after processing" + ) + assert "Sub-Branches" in self.df.columns, ( + "Missing 'Sub-Branches' column after processing" + ) + + self.df["Main Branches"] = self.df["Main Branches"].astype(str).str.strip() + self.df["Sub-Branches"] = self.df["Sub-Branches"].astype(str).str.strip() + self.main_theme = main_theme self.title = title self.color_scheme = color_scheme self.output_dir = output_dir self.aspect_ratio = aspect_ratio - + os.makedirs(output_dir, exist_ok=True) - - plt.rcParams['svg.fonttype'] = 'none' - rcParams['font.family'] = 'DejaVu Sans' - + + plt.rcParams["svg.fonttype"] = "none" + rcParams["font.family"] = "DejaVu Sans" + self.colors = self._get_color_scheme(color_scheme) - - self.main_branches = self.df['Main Branches'].unique() + + self.main_branches = self.df["Main Branches"].unique() self.n_main = len(self.main_branches) - + self.sub_branches = { - main: self.df[self.df['Main Branches'] == main]['Sub-Branches'].tolist() + main: self.df[self.df["Main Branches"] == main]["Sub-Branches"].tolist() for main in self.main_branches } self.n_sub_total = sum(len(subs) for subs in self.sub_branches.values()) - + self.fig_width = 10.0 self.fig_height = self.fig_width / aspect_ratio self.title_height = 0 # No title - removed to maximize space - + # Add padding to prevent rectangles from going over chart edges self.edge_padding = 0.25 # Padding on sides self.top_padding = 0.1 # Minimal top padding @@ -131,20 +141,33 @@ def __init__( self.sub_linewidth = 2.0 # Sub-branch linewidth # Linewidth extends by half on each side, convert points to inches (72 points per inch) # Be very conservative: use 3x linewidth as padding to ensure borders don't get cut - self.linewidth_padding = (self.max_linewidth / 72.0) * 3.0 # Very conservative padding - self.sub_linewidth_padding = (self.sub_linewidth / 72.0) * 3.0 # Sub-branch linewidth padding + self.linewidth_padding = ( + self.max_linewidth / 72.0 + ) * 3.0 # Very conservative padding + self.sub_linewidth_padding = ( + self.sub_linewidth / 72.0 + ) * 3.0 # Sub-branch linewidth padding # Use minimal top padding, extra bottom padding # Reduce available_height to account for linewidth extension at bottom - self.available_height = self.fig_height - self.title_height - self.top_padding - (self.bottom_padding + self.sub_linewidth_padding) - self.available_width = self.fig_width - 2 * (self.edge_padding + self.linewidth_padding) + self.available_height = ( + self.fig_height + - self.title_height + - self.top_padding + - (self.bottom_padding + self.sub_linewidth_padding) + ) + self.available_width = self.fig_width - 2 * ( + self.edge_padding + self.linewidth_padding + ) # Calculate absolute bottom boundary - no rectangle should exceed this # Use sub_linewidth_padding since sub-branches are at the bottom - self.absolute_bottom = self.fig_height - self.bottom_padding - self.sub_linewidth_padding - + self.absolute_bottom = ( + self.fig_height - self.bottom_padding - self.sub_linewidth_padding + ) + # Increased padding for text inside boxes self.pad_x = 0.7 # Increased from 0.5 self.pad_y = 0.7 # Increased from 0.5 - + def _get_color_scheme(self, scheme: str) -> Dict[str, str]: """Get color scheme dictionary.""" schemes = { @@ -152,67 +175,67 @@ def _get_color_scheme(self, scheme: str) -> Dict[str, str]: "edge": "#eab720", "link": "#eab720", "background": "none", # Transparent - "text": "black" + "text": "black", }, "light_blue": { "edge": "#206EB5", "link": "#206EB5", "background": "none", # Transparent - "text": "black" + "text": "black", }, "dark_blue": { "edge": "#2C318C", "link": "#2C318C", "background": "none", # Transparent - "text": "black" - } + "text": "black", + }, } return schemes.get(scheme, schemes["gold"]) - + def _measure_text(self, ax, text: str, fontsize: float) -> Tuple[float, float]: """Measure text dimensions accurately.""" if not text: return self.pad_x, self.pad_y - - t = ax.text(0, 0, text, fontsize=fontsize, ha='left', va='bottom') - if not hasattr(ax.figure.canvas, 'renderer'): + + t = ax.text(0, 0, text, fontsize=fontsize, ha="left", va="bottom") + if not hasattr(ax.figure.canvas, "renderer"): ax.figure.canvas.draw() renderer = ax.figure.canvas.get_renderer() bbox = t.get_window_extent(renderer=renderer) t.remove() - + width = bbox.width / ax.figure.dpi + self.pad_x height = bbox.height / ax.figure.dpi + self.pad_y - + return width, height - + def _wrap_text(self, text: str, max_chars: int) -> str: """Wrap text at word boundaries.""" if not text: return text - + lines = [] - for para in text.split('\n'): + for para in text.split("\n"): if not para: - lines.append('') + lines.append("") continue - + words = para.split() if not words: - lines.append('') + lines.append("") continue - + line = words[0] for word in words[1:]: - if len(line + ' ' + word) <= max_chars: - line += ' ' + word + if len(line + " " + word) <= max_chars: + line += " " + word else: lines.append(line) line = word lines.append(line) - - return '\n'.join(lines) - + + return "\n".join(lines) + def _fit_text_main( self, ax, @@ -220,81 +243,91 @@ def _fit_text_main( max_width: float, max_height: float, initial_font: float, - min_font: float = 9 + min_font: float = 9, ) -> Tuple[float, str, float, float]: """ Fit text for main branches - STRICT no overflow. Returns: fontsize, wrapped_text, actual_width, actual_height """ if not text: - return min_font, '', self.pad_x, self.pad_y - + return min_font, "", self.pad_x, self.pad_y + available_w = max(0.1, max_width - self.pad_x) - available_h = max(0.1, max_height - self.pad_y) - + _available_h = max(0.1, max_height - self.pad_y) + # Binary search for optimal font size low_font = min_font high_font = initial_font * 1.3 best = None - + for _ in range(40): test_font = (low_font + high_font) / 2.0 - + # Less aggressive wrapping - prioritize font size chars_per_inch = test_font / 12.0 * 7 # More chars per inch wrap_chars = max(20, int(available_w * chars_per_inch)) # Minimum 20 chars - + wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, test_font) - + if w <= max_width * 0.88 and h <= max_height * 0.88: best = (test_font, wrapped, w, h) low_font = test_font + 0.2 else: high_font = test_font - 0.2 - + if high_font < low_font: break - + if best: return best - + # Fallback: ensure it fits fontsize = min_font chars_per_inch = fontsize / 12.0 * 7 wrap_chars = max(20, int(available_w * chars_per_inch)) wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, fontsize) - + # Keep reducing until it fits - more aggressive max_attempts = 60 attempt = 0 - while (w > max_width * 0.88 or h > max_height * 0.88) and fontsize > min_font * 0.7 and attempt < max_attempts: + while ( + (w > max_width * 0.88 or h > max_height * 0.88) + and fontsize > min_font * 0.7 + and attempt < max_attempts + ): attempt += 1 - scale = min(max_width * 0.88 / max(0.01, w), max_height * 0.88 / max(0.01, h)) * 0.97 + scale = ( + min(max_width * 0.88 / max(0.01, w), max_height * 0.88 / max(0.01, h)) + * 0.97 + ) fontsize = max(min_font * 0.7, fontsize * scale) chars_per_inch = fontsize / 12.0 * 7 wrap_chars = max(20, int(available_w * chars_per_inch)) wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, fontsize) - + # Final verification - ensure text actually fits w, h = self._measure_text(ax, wrapped, fontsize) if w > max_width * 0.88 or h > max_height * 0.88: # Force one more reduction - scale = min(max_width * 0.88 / max(0.01, w), max_height * 0.88 / max(0.01, h)) * 0.98 + scale = ( + min(max_width * 0.88 / max(0.01, w), max_height * 0.88 / max(0.01, h)) + * 0.98 + ) fontsize = max(min_font * 0.7, fontsize * scale) chars_per_inch = fontsize / 12.0 * 7 wrap_chars = max(20, int((max_width - self.pad_x) * chars_per_inch)) wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, fontsize) - + # Final safety check - clip dimensions w = min(w, max_width * 0.88) h = min(h, max_height * 0.88) - + return fontsize, wrapped, w, h - + def _fit_text_sub( self, ax, @@ -302,117 +335,131 @@ def _fit_text_sub( max_width: float, max_height: float, initial_font: float, - min_font: float = 8 + min_font: float = 8, ) -> Tuple[float, str, float, float]: """ Fit text for sub-branches - prioritize large font, minimal wrapping. Returns: fontsize, wrapped_text, actual_width, actual_height """ if not text: - return min_font, '', self.pad_x, self.pad_y - + return min_font, "", self.pad_x, self.pad_y + available_w = max(0.1, max_width - self.pad_x) - available_h = max(0.1, max_height - self.pad_y) - + _available_h = max(0.1, max_height - self.pad_y) + # Binary search prioritizing larger fonts low_font = min_font high_font = initial_font * 1.5 best = None - + for _ in range(40): test_font = (low_font + high_font) / 2.0 - + # Much less aggressive wrapping - wide lines preferred chars_per_inch = test_font / 12.0 * 8 # Even more chars per inch - wrap_chars = max(25, int(available_w * chars_per_inch)) # Minimum 25 chars, prefer wide lines - + wrap_chars = max( + 25, int(available_w * chars_per_inch) + ) # Minimum 25 chars, prefer wide lines + wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, test_font) - + if w <= max_width * 0.90 and h <= max_height * 0.90: best = (test_font, wrapped, w, h) low_font = test_font + 0.3 # Try even larger else: high_font = test_font - 0.3 - + if high_font < low_font: break - + if best: return best - + # Fallback: use minimum font with wide wrapping fontsize = min_font chars_per_inch = fontsize / 12.0 * 8 wrap_chars = max(25, int(available_w * chars_per_inch)) wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, fontsize) - + # Only reduce if absolutely necessary max_attempts = 30 attempt = 0 - while (w > max_width * 0.90 or h > max_height * 0.90) and fontsize > min_font * 0.85 and attempt < max_attempts: + while ( + (w > max_width * 0.90 or h > max_height * 0.90) + and fontsize > min_font * 0.85 + and attempt < max_attempts + ): attempt += 1 - scale = min(max_width * 0.90 / max(0.01, w), max_height * 0.90 / max(0.01, h)) * 0.98 + scale = ( + min(max_width * 0.90 / max(0.01, w), max_height * 0.90 / max(0.01, h)) + * 0.98 + ) fontsize = max(min_font * 0.85, fontsize * scale) chars_per_inch = fontsize / 12.0 * 8 wrap_chars = max(25, int(available_w * chars_per_inch)) wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, fontsize) - + # Final verification - ensure text actually fits w, h = self._measure_text(ax, wrapped, fontsize) if w > max_width * 0.90 or h > max_height * 0.90: # Force one more reduction - scale = min(max_width * 0.90 / max(0.01, w), max_height * 0.90 / max(0.01, h)) * 0.99 + scale = ( + min(max_width * 0.90 / max(0.01, w), max_height * 0.90 / max(0.01, h)) + * 0.99 + ) fontsize = max(min_font * 0.85, fontsize * scale) chars_per_inch = fontsize / 12.0 * 8 wrap_chars = max(25, int((max_width - self.pad_x) * chars_per_inch)) wrapped = self._wrap_text(text, wrap_chars) w, h = self._measure_text(ax, wrapped, fontsize) - + # Final safety check - clip dimensions w = min(w, max_width * 0.90) h = min(h, max_height * 0.90) - + return fontsize, wrapped, w, h - + def _calculate_layout(self) -> Dict: """Calculate complete layout with uniform dimensions and scaling.""" fig_temp = plt.figure(figsize=(self.fig_width, self.fig_height)) ax_temp = fig_temp.add_subplot(111) - ax_temp.axis('off') - + ax_temp.axis("off") + # Base font calculation total_elements = self.n_main + self.n_sub_total - area_per_element = (self.available_height * self.available_width) / max(1, total_elements) - + area_per_element = (self.available_height * self.available_width) / max( + 1, total_elements + ) + ref_area = 1.0 if area_per_element > ref_area: area_factor = math.pow(area_per_element / ref_area, 0.6) else: area_factor = math.pow(area_per_element / ref_area, 0.7) - + area_factor = max(0.4, min(3.5, area_factor)) base_font = max(8, min(26, 12 * area_factor)) - + # Initial dimensions (will be scaled if needed) main_spacing = self.available_height * 0.06 available_h = self.available_height - (self.n_main - 1) * main_spacing uniform_main_h = max(0.7, min(1.6, available_h / max(1, self.n_main))) - + max_main_len = max([len(m) for m in self.main_branches], default=20) uniform_main_w = max(1.3, min(2.5, max_main_len * 0.10)) - + # Theme width matches main branch width theme_w = uniform_main_w theme_h = uniform_main_h - + # Sub-branch dimensions max_subs = max([len(subs) for subs in self.sub_branches.values()], default=1) spacing_extra = main_spacing * 0.4 available_sub_h = uniform_main_h + 2 * spacing_extra - + if max_subs > 0: sub_spacing = 0.12 available_for_subs = available_sub_h - (max_subs - 1) * sub_spacing @@ -420,21 +467,21 @@ def _calculate_layout(self) -> Dict: else: uniform_sub_h = 0.8 sub_spacing = 0.12 - + all_subs = [s for subs in self.sub_branches.values() for s in subs] max_sub_len = max([len(s) for s in all_subs], default=30) if all_subs else 30 max_w = self.available_width * 0.28 uniform_sub_w = max(2.2, min(max_w, max_sub_len * 0.12)) - + # Calculate column positions (accounting for edge padding and linewidth) margin = self.edge_padding + self.linewidth_padding col_spacing = self.available_width * 0.07 - + x_theme = margin x_main = x_theme + theme_w + col_spacing x_sub = x_main + uniform_main_w + col_spacing total_width_needed = x_sub + uniform_sub_w + margin - + # Horizontal scaling if needed h_scale = 1.0 if total_width_needed > self.available_width * 0.96: @@ -448,7 +495,7 @@ def _calculate_layout(self) -> Dict: x_theme = margin x_main = x_theme + theme_w + col_spacing x_sub = x_main + uniform_main_w + col_spacing - + # Vertical scaling check total_h = uniform_main_h * self.n_main + main_spacing * (self.n_main - 1) v_scale = 1.0 @@ -459,27 +506,31 @@ def _calculate_layout(self) -> Dict: uniform_sub_h *= v_scale main_spacing *= v_scale sub_spacing *= v_scale - + # Now fit text with final scaled dimensions # Fit all main branches to find uniform font main_fonts = [] for main in self.main_branches: - font, _, _, _ = self._fit_text_main(ax_temp, main, uniform_main_w, uniform_main_h, base_font * 0.9, 10) + font, _, _, _ = self._fit_text_main( + ax_temp, main, uniform_main_w, uniform_main_h, base_font * 0.9, 10 + ) main_fonts.append(font) - + uniform_main_font = min(main_fonts) if main_fonts else base_font * 0.9 - + # Fit all main branches with uniform dimensions - STRICT no overflow main_data = {} for main in self.main_branches: - font, wrapped, w, h = self._fit_text_main(ax_temp, main, uniform_main_w, uniform_main_h, uniform_main_font, 10) + font, wrapped, w, h = self._fit_text_main( + ax_temp, main, uniform_main_w, uniform_main_h, uniform_main_font, 10 + ) main_data[main] = { - 'fontsize': font, - 'text': wrapped, - 'width': uniform_main_w, - 'height': uniform_main_h + "fontsize": font, + "text": wrapped, + "width": uniform_main_w, + "height": uniform_main_h, } - + # Theme layout - use same width as main theme_font, theme_wrapped, theme_w_actual, theme_h_actual = self._fit_text_main( ax_temp, self.main_theme, theme_w, theme_h, base_font * 1.0, 11 @@ -487,326 +538,422 @@ def _calculate_layout(self) -> Dict: # Use target dimensions, not measured theme_w = uniform_main_w theme_h = uniform_main_h - + # Fit all sub-branches - prioritize large font, minimal wrapping sub_fonts = [] for sub in all_subs: - font, _, _, _ = self._fit_text_sub(ax_temp, sub, uniform_sub_w * 0.94, uniform_sub_h * 0.94, base_font * 1.0, 9) + font, _, _, _ = self._fit_text_sub( + ax_temp, + sub, + uniform_sub_w * 0.94, + uniform_sub_h * 0.94, + base_font * 1.0, + 9, + ) sub_fonts.append(font) - + uniform_sub_font = min(sub_fonts) if sub_fonts else base_font * 1.0 - + # Fit all sub-branches with uniform dimensions sub_data = {} for main in self.main_branches: subs = self.sub_branches[main] main_sub_data = [] - + for sub in subs: font, wrapped, w, h = self._fit_text_sub( - ax_temp, sub, uniform_sub_w * 0.94, uniform_sub_h * 0.94, uniform_sub_font, 9 + ax_temp, + sub, + uniform_sub_w * 0.94, + uniform_sub_h * 0.94, + uniform_sub_font, + 9, ) - - main_sub_data.append({ - 'fontsize': font, - 'text': wrapped, - 'width': uniform_sub_w, - 'height': uniform_sub_h - }) - + + main_sub_data.append( + { + "fontsize": font, + "text": wrapped, + "width": uniform_sub_w, + "height": uniform_sub_h, + } + ) + sub_data[main] = main_sub_data - + plt.close(fig_temp) - + return { - 'theme': {'fontsize': theme_font, 'text': theme_wrapped, 'width': theme_w, 'height': theme_h}, - 'main_data': main_data, - 'sub_data': sub_data, - 'uniform_main_w': uniform_main_w, - 'uniform_main_h': uniform_main_h, - 'uniform_sub_w': uniform_sub_w, - 'uniform_sub_h': uniform_sub_h, - 'main_spacing': main_spacing, - 'sub_spacing': sub_spacing, - 'margin': margin, - 'col_spacing': col_spacing, - 'x_theme': x_theme, - 'x_main': x_main, - 'x_sub': x_sub + "theme": { + "fontsize": theme_font, + "text": theme_wrapped, + "width": theme_w, + "height": theme_h, + }, + "main_data": main_data, + "sub_data": sub_data, + "uniform_main_w": uniform_main_w, + "uniform_main_h": uniform_main_h, + "uniform_sub_w": uniform_sub_w, + "uniform_sub_h": uniform_sub_h, + "main_spacing": main_spacing, + "sub_spacing": sub_spacing, + "margin": margin, + "col_spacing": col_spacing, + "x_theme": x_theme, + "x_main": x_main, + "x_sub": x_sub, } - - def _check_overflow(self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) -> Dict: + + def _check_overflow( + self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y + ) -> Dict: """Check for text and rectangle overflow, return adjustments needed.""" issues = { - 'theme_overflow': False, - 'main_overflows': {}, - 'sub_overflows': {}, - 'bottom_cutoff': False + "theme_overflow": False, + "main_overflows": {}, + "sub_overflows": {}, + "bottom_cutoff": False, } - + # Check theme text overflow theme_text = ax.text( - x_theme + layout['theme']['width'] / 2, - theme_y + layout['theme']['height'] / 2, - layout['theme']['text'], - ha='center', va='center', fontsize=layout['theme']['fontsize'], - fontweight='bold' + x_theme + layout["theme"]["width"] / 2, + theme_y + layout["theme"]["height"] / 2, + layout["theme"]["text"], + ha="center", + va="center", + fontsize=layout["theme"]["fontsize"], + fontweight="bold", ) renderer = ax.figure.canvas.get_renderer() bbox = theme_text.get_window_extent(renderer=renderer) theme_text.remove() text_w = bbox.width / ax.figure.dpi text_h = bbox.height / ax.figure.dpi - if text_w > layout['theme']['width'] * 0.88 or text_h > layout['theme']['height'] * 0.88: - issues['theme_overflow'] = True - + if ( + text_w > layout["theme"]["width"] * 0.88 + or text_h > layout["theme"]["height"] * 0.88 + ): + issues["theme_overflow"] = True + # Check main branch text overflow for main in self.main_branches: main_y = main_positions[main] - main_info = layout['main_data'][main] + main_info = layout["main_data"][main] main_text = ax.text( - x_main + layout['uniform_main_w'] / 2, - main_y + layout['uniform_main_h'] / 2, - main_info['text'], - ha='center', va='center', fontsize=main_info['fontsize'], - fontweight='bold' + x_main + layout["uniform_main_w"] / 2, + main_y + layout["uniform_main_h"] / 2, + main_info["text"], + ha="center", + va="center", + fontsize=main_info["fontsize"], + fontweight="bold", ) bbox = main_text.get_window_extent(renderer=renderer) main_text.remove() text_w = bbox.width / ax.figure.dpi text_h = bbox.height / ax.figure.dpi - if text_w > layout['uniform_main_w'] * 0.88 or text_h > layout['uniform_main_h'] * 0.88: - issues['main_overflows'][main] = True - + if ( + text_w > layout["uniform_main_w"] * 0.88 + or text_h > layout["uniform_main_h"] * 0.88 + ): + issues["main_overflows"][main] = True + # Check sub-branch overflow and bottom cutoff max_bottom_y = 0 for main in self.main_branches: main_y = main_positions[main] - subs = layout['sub_data'].get(main, []) + subs = layout["sub_data"].get(main, []) if subs: - spacing_extra = layout['main_spacing'] * 0.4 + spacing_extra = layout["main_spacing"] * 0.4 min_sub_y = main_y - spacing_extra - max_sub_y = main_y + layout['uniform_main_h'] + spacing_extra - - sub_spacing = layout['sub_spacing'] - total_sub_h = layout['uniform_sub_h'] * len(subs) + (len(subs) - 1) * sub_spacing - main_center = main_y + layout['uniform_main_h'] / 2 + max_sub_y = main_y + layout["uniform_main_h"] + spacing_extra + + sub_spacing = layout["sub_spacing"] + total_sub_h = ( + layout["uniform_sub_h"] * len(subs) + (len(subs) - 1) * sub_spacing + ) + main_center = main_y + layout["uniform_main_h"] / 2 sub_y_start = main_center - total_sub_h / 2 - + if sub_y_start < min_sub_y: sub_y_start = min_sub_y if sub_y_start + total_sub_h > max_sub_y: sub_y_start = max_sub_y - total_sub_h # Calculate maximum allowed bottom position (use absolute bottom) min_top_y = self.top_padding + self.linewidth_padding - + if sub_y_start < min_top_y: sub_y_start = min_top_y # Ensure total height doesn't exceed absolute bottom if sub_y_start + total_sub_h > self.absolute_bottom: sub_y_start = max(min_top_y, self.absolute_bottom - total_sub_h) - + sub_y = sub_y_start for i, sub_info in enumerate(subs): - if sub_y + layout['uniform_sub_h'] > max_bottom_y: - issues['bottom_cutoff'] = True + if sub_y + layout["uniform_sub_h"] > max_bottom_y: + issues["bottom_cutoff"] = True break - + # Check text overflow sub_text = ax.text( - x_sub + layout['uniform_sub_w'] / 2, - sub_y + layout['uniform_sub_h'] / 2, - sub_info['text'], - ha='center', va='center', fontsize=sub_info['fontsize'], - fontweight='bold' + x_sub + layout["uniform_sub_w"] / 2, + sub_y + layout["uniform_sub_h"] / 2, + sub_info["text"], + ha="center", + va="center", + fontsize=sub_info["fontsize"], + fontweight="bold", ) bbox = sub_text.get_window_extent(renderer=renderer) sub_text.remove() text_w = bbox.width / ax.figure.dpi text_h = bbox.height / ax.figure.dpi - if text_w > layout['uniform_sub_w'] * 0.90 or text_h > layout['uniform_sub_h'] * 0.90: - issues['sub_overflows'][(main, i)] = True - - bottom_y = sub_y + layout['uniform_sub_h'] + if ( + text_w > layout["uniform_sub_w"] * 0.90 + or text_h > layout["uniform_sub_h"] * 0.90 + ): + issues["sub_overflows"][(main, i)] = True + + bottom_y = sub_y + layout["uniform_sub_h"] if bottom_y > max_bottom_y: max_bottom_y = bottom_y - - sub_y += layout['uniform_sub_h'] + sub_spacing - if sub_y + layout['uniform_sub_h'] > max_sub_y: + + sub_y += layout["uniform_sub_h"] + sub_spacing + if sub_y + layout["uniform_sub_h"] > max_sub_y: break - + # Check if bottom sub-branch is too close to edge (accounting for padding and linewidth) if max_bottom_y > self.absolute_bottom - 0.1: - issues['bottom_cutoff'] = True - + issues["bottom_cutoff"] = True + return issues - + def plot(self) -> Tuple[plt.Figure, plt.Axes]: """Create the mindmap plot with iterative refinement.""" layout = self._calculate_layout() - + # Iterative refinement loop max_iterations = 5 for iteration in range(max_iterations): fig, ax = plt.subplots(figsize=(self.fig_width, self.fig_height)) - ax.axis('off') - + ax.axis("off") + # Use pre-calculated positions from layout - x_theme = layout['x_theme'] - x_main = layout['x_main'] - x_sub = layout['x_sub'] - + x_theme = layout["x_theme"] + x_main = layout["x_main"] + x_sub = layout["x_sub"] + # Main branch positions - total_h = layout['uniform_main_h'] * self.n_main + layout['main_spacing'] * (self.n_main - 1) + total_h = layout["uniform_main_h"] * self.n_main + layout[ + "main_spacing" + ] * (self.n_main - 1) y_start = (self.available_height - total_h) / 2 - + main_positions = {} y = y_start for main in self.main_branches: main_positions[main] = y - y += layout['uniform_main_h'] + layout['main_spacing'] - + y += layout["uniform_main_h"] + layout["main_spacing"] + # Theme position (centered with main branches) - theme_y = y_start + (total_h - layout['theme']['height']) / 2 - + theme_y = y_start + (total_h - layout["theme"]["height"]) / 2 + # Draw elements to check overflow - self._draw_elements(ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) - + self._draw_elements( + ax, layout, x_theme, x_main, x_sub, main_positions, theme_y + ) + # Ensure figure is drawn for accurate measurement ax.figure.canvas.draw() - + # Check for overflow - issues = self._check_overflow(ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) - + issues = self._check_overflow( + ax, layout, x_theme, x_main, x_sub, main_positions, theme_y + ) + # If no issues, we're done - if not issues['theme_overflow'] and not issues['main_overflows'] and not issues['sub_overflows'] and not issues['bottom_cutoff']: + if ( + not issues["theme_overflow"] + and not issues["main_overflows"] + and not issues["sub_overflows"] + and not issues["bottom_cutoff"] + ): plt.close(fig) break - + # Adjust layout based on issues - if issues['theme_overflow'] or issues['main_overflows'] or issues['bottom_cutoff']: + if ( + issues["theme_overflow"] + or issues["main_overflows"] + or issues["bottom_cutoff"] + ): # Reduce font sizes for theme and main branches layout = self._adjust_layout_for_overflow(layout, issues, iteration) plt.close(fig) continue - + plt.close(fig) break - + # Final render fig, ax = plt.subplots(figsize=(self.fig_width, self.fig_height)) - ax.axis('off') - + ax.axis("off") + # Use pre-calculated positions from layout - x_theme = layout['x_theme'] - x_main = layout['x_main'] - x_sub = layout['x_sub'] - + x_theme = layout["x_theme"] + x_main = layout["x_main"] + x_sub = layout["x_sub"] + # Main branch positions (accounting for top padding and linewidth) - total_h = layout['uniform_main_h'] * self.n_main + layout['main_spacing'] * (self.n_main - 1) - y_start = self.top_padding + self.linewidth_padding + (self.available_height - total_h) / 2 - + total_h = layout["uniform_main_h"] * self.n_main + layout["main_spacing"] * ( + self.n_main - 1 + ) + y_start = ( + self.top_padding + + self.linewidth_padding + + (self.available_height - total_h) / 2 + ) + main_positions = {} y = y_start for main in self.main_branches: main_positions[main] = y - y += layout['uniform_main_h'] + layout['main_spacing'] - + y += layout["uniform_main_h"] + layout["main_spacing"] + # Theme position (centered with main branches) - theme_y = y_start + (total_h - layout['theme']['height']) / 2 - + theme_y = y_start + (total_h - layout["theme"]["height"]) / 2 + # Draw all elements self._draw_elements(ax, layout, x_theme, x_main, x_sub, main_positions, theme_y) - + # Set strict limits to prevent anything from being drawn outside bounds # Account for linewidth extension - clip everything strictly ax.set_xlim(0, self.fig_width) ax.set_ylim(0, self.fig_height) # Clip all patches and text to axes limits ax.set_clip_on(True) - + return fig, ax - - def _draw_elements(self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y): + + def _draw_elements( + self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y + ): """Draw all mindmap elements.""" # Draw theme theme_rect = patches.Rectangle( - (x_theme, theme_y), layout['theme']['width'], layout['theme']['height'], - linewidth=2.5, edgecolor=self.colors['edge'], facecolor=self.colors['background'], zorder=2 + (x_theme, theme_y), + layout["theme"]["width"], + layout["theme"]["height"], + linewidth=2.5, + edgecolor=self.colors["edge"], + facecolor=self.colors["background"], + zorder=2, ) ax.add_patch(theme_rect) ax.text( - x_theme + layout['theme']['width'] / 2, - theme_y + layout['theme']['height'] / 2, - layout['theme']['text'], - ha='center', va='center', fontsize=layout['theme']['fontsize'], - color=self.colors['text'], fontweight='bold', zorder=3 + x_theme + layout["theme"]["width"] / 2, + theme_y + layout["theme"]["height"] / 2, + layout["theme"]["text"], + ha="center", + va="center", + fontsize=layout["theme"]["fontsize"], + color=self.colors["text"], + fontweight="bold", + zorder=3, ) - + # Draw main branches and sub-branches for main in self.main_branches: main_y = main_positions[main] - main_info = layout['main_data'][main] - + main_info = layout["main_data"][main] + # Main branch rectangle main_rect = patches.Rectangle( - (x_main, main_y), layout['uniform_main_w'], layout['uniform_main_h'], - linewidth=2.5, edgecolor=self.colors['edge'], facecolor=self.colors['background'], zorder=2 + (x_main, main_y), + layout["uniform_main_w"], + layout["uniform_main_h"], + linewidth=2.5, + edgecolor=self.colors["edge"], + facecolor=self.colors["background"], + zorder=2, ) ax.add_patch(main_rect) ax.text( - x_main + layout['uniform_main_w'] / 2, - main_y + layout['uniform_main_h'] / 2, - main_info['text'], - ha='center', va='center', fontsize=main_info['fontsize'], - color=self.colors['text'], fontweight='bold', zorder=3 + x_main + layout["uniform_main_w"] / 2, + main_y + layout["uniform_main_h"] / 2, + main_info["text"], + ha="center", + va="center", + fontsize=main_info["fontsize"], + color=self.colors["text"], + fontweight="bold", + zorder=3, ) - + # Connection theme to main ax.plot( - [x_theme + layout['theme']['width'], x_main], - [theme_y + layout['theme']['height'] / 2, main_y + layout['uniform_main_h'] / 2], - color=self.colors['link'], linewidth=3, alpha=0.6, zorder=1, solid_capstyle='round' + [x_theme + layout["theme"]["width"], x_main], + [ + theme_y + layout["theme"]["height"] / 2, + main_y + layout["uniform_main_h"] / 2, + ], + color=self.colors["link"], + linewidth=3, + alpha=0.6, + zorder=1, + solid_capstyle="round", ) - + # Sub-branches - subs = layout['sub_data'].get(main, []) + subs = layout["sub_data"].get(main, []) if subs: - spacing_extra = layout['main_spacing'] * 0.4 + spacing_extra = layout["main_spacing"] * 0.4 min_sub_y = main_y - spacing_extra - max_sub_y = main_y + layout['uniform_main_h'] + spacing_extra + max_sub_y = main_y + layout["uniform_main_h"] + spacing_extra # Ensure max_sub_y doesn't exceed absolute bottom max_sub_y = min(max_sub_y, self.absolute_bottom) available_sub_h = max_sub_y - min_sub_y - - sub_spacing = layout['sub_spacing'] - total_sub_h = layout['uniform_sub_h'] * len(subs) + (len(subs) - 1) * sub_spacing - + + sub_spacing = layout["sub_spacing"] + total_sub_h = ( + layout["uniform_sub_h"] * len(subs) + (len(subs) - 1) * sub_spacing + ) + # Adjust spacing if needed if total_sub_h > available_sub_h: - max_sp = (available_sub_h - layout['uniform_sub_h'] * len(subs)) / max(1, len(subs) - 1) + max_sp = ( + available_sub_h - layout["uniform_sub_h"] * len(subs) + ) / max(1, len(subs) - 1) sub_spacing = max(0.08, min(sub_spacing, max_sp)) - total_sub_h = layout['uniform_sub_h'] * len(subs) + (len(subs) - 1) * sub_spacing - + total_sub_h = ( + layout["uniform_sub_h"] * len(subs) + + (len(subs) - 1) * sub_spacing + ) + # Center on main branch - calculate ideal center position - main_center = main_y + layout['uniform_main_h'] / 2 + main_center = main_y + layout["uniform_main_h"] / 2 ideal_sub_y_start = main_center - total_sub_h / 2 - + # Determine available space boundaries (use the most restrictive) absolute_min = self.top_padding + self.linewidth_padding absolute_max = self.absolute_bottom relative_min = min_sub_y relative_max = max_sub_y - + # Use the most restrictive boundaries min_top_y = max(absolute_min, relative_min) max_bottom_y = min(absolute_max, relative_max) - + # Start with ideal centered position sub_y_start = ideal_sub_y_start - + # If the entire block fits within available space, use centered position - if ideal_sub_y_start >= min_top_y and ideal_sub_y_start + total_sub_h <= max_bottom_y: + if ( + ideal_sub_y_start >= min_top_y + and ideal_sub_y_start + total_sub_h <= max_bottom_y + ): sub_y_start = ideal_sub_y_start else: # Block doesn't fit centered - adjust to fit while maintaining centering as much as possible @@ -819,126 +966,154 @@ def _draw_elements(self, ax, layout, x_theme, x_main, x_sub, main_positions, the # Ensure we don't go below minimum if sub_y_start < min_top_y: sub_y_start = min_top_y - + # Final safety check: ensure we don't exceed absolute bottom if sub_y_start + total_sub_h > absolute_max: sub_y_start = max(min_top_y, absolute_max - total_sub_h) - + # Draw sub-branches sub_y = sub_y_start # Use pre-calculated sub_linewidth_padding # Use absolute bottom boundary - no rectangle should exceed this for sub_info in subs: # STRICT check: rectangle bottom must not exceed absolute bottom - rect_bottom = sub_y + layout['uniform_sub_h'] - + rect_bottom = sub_y + layout["uniform_sub_h"] + # Don't draw if rectangle itself would exceed absolute bottom if rect_bottom > self.absolute_bottom: break # Don't draw this or any subsequent sub-branches - if sub_y + layout['uniform_sub_h'] > max_sub_y: + if sub_y + layout["uniform_sub_h"] > max_sub_y: break - + # Extra safety check - leave margin for linewidth extension # Linewidth extends by half outward, so ensure rect_bottom + half_linewidth <= absolute_bottom half_linewidth_extension = (self.sub_linewidth / 2.0) / 72.0 if rect_bottom + half_linewidth_extension > self.absolute_bottom: break - + # Sub-branch rectangle - clip to axes to prevent overflow sub_rect = patches.Rectangle( - (x_sub, sub_y), layout['uniform_sub_w'], layout['uniform_sub_h'], - linewidth=self.sub_linewidth, edgecolor=self.colors['edge'], facecolor=self.colors['background'], zorder=2, - clip_on=True + (x_sub, sub_y), + layout["uniform_sub_w"], + layout["uniform_sub_h"], + linewidth=self.sub_linewidth, + edgecolor=self.colors["edge"], + facecolor=self.colors["background"], + zorder=2, + clip_on=True, ) ax.add_patch(sub_rect) - + # Sub-branch text ax.text( - x_sub + layout['uniform_sub_w'] / 2, - sub_y + layout['uniform_sub_h'] / 2, - sub_info['text'], - ha='center', va='center', fontsize=sub_info['fontsize'], - color=self.colors['text'], fontweight='bold', zorder=3 + x_sub + layout["uniform_sub_w"] / 2, + sub_y + layout["uniform_sub_h"] / 2, + sub_info["text"], + ha="center", + va="center", + fontsize=sub_info["fontsize"], + color=self.colors["text"], + fontweight="bold", + zorder=3, ) - + # Connection main to sub ax.plot( - [x_main + layout['uniform_main_w'], x_sub], - [main_y + layout['uniform_main_h'] / 2, sub_y + layout['uniform_sub_h'] / 2], - color=self.colors['link'], linewidth=2.5, alpha=0.5, zorder=1, solid_capstyle='round' + [x_main + layout["uniform_main_w"], x_sub], + [ + main_y + layout["uniform_main_h"] / 2, + sub_y + layout["uniform_sub_h"] / 2, + ], + color=self.colors["link"], + linewidth=2.5, + alpha=0.5, + zorder=1, + solid_capstyle="round", ) - - sub_y += layout['uniform_sub_h'] + sub_spacing - - if sub_y + layout['uniform_sub_h'] > max_sub_y: + + sub_y += layout["uniform_sub_h"] + sub_spacing + + if sub_y + layout["uniform_sub_h"] > max_sub_y: break - + # Title removed to maximize space for mindmap - + def _adjust_layout_for_overflow(self, layout, issues, iteration): """Adjust layout to fix overflow issues.""" # Create temp figure for re-fitting fig_temp = plt.figure(figsize=(self.fig_width, self.fig_height)) ax_temp = fig_temp.add_subplot(111) - ax_temp.axis('off') - + ax_temp.axis("off") + # Reduce font sizes more aggressively reduction_factor = 0.92 - (iteration * 0.02) # More aggressive each iteration - + # Adjust theme - if issues['theme_overflow']: - current_font = layout['theme']['fontsize'] + if issues["theme_overflow"]: + current_font = layout["theme"]["fontsize"] new_font = max(9, current_font * reduction_factor) font, wrapped, _, _ = self._fit_text_main( - ax_temp, self.main_theme, layout['theme']['width'], layout['theme']['height'], - new_font, 9 + ax_temp, + self.main_theme, + layout["theme"]["width"], + layout["theme"]["height"], + new_font, + 9, ) - layout['theme']['fontsize'] = font - layout['theme']['text'] = wrapped - + layout["theme"]["fontsize"] = font + layout["theme"]["text"] = wrapped + # Adjust main branches - if issues['main_overflows']: - for main in issues['main_overflows']: - current_font = layout['main_data'][main]['fontsize'] + if issues["main_overflows"]: + for main in issues["main_overflows"]: + current_font = layout["main_data"][main]["fontsize"] new_font = max(8, current_font * reduction_factor) font, wrapped, _, _ = self._fit_text_main( - ax_temp, main, layout['uniform_main_w'], layout['uniform_main_h'], - new_font, 8 + ax_temp, + main, + layout["uniform_main_w"], + layout["uniform_main_h"], + new_font, + 8, ) - layout['main_data'][main]['fontsize'] = font - layout['main_data'][main]['text'] = wrapped - + layout["main_data"][main]["fontsize"] = font + layout["main_data"][main]["text"] = wrapped + # Adjust for bottom cutoff - reduce vertical spacing or sub-branch height - if issues['bottom_cutoff']: + if issues["bottom_cutoff"]: # Reduce sub-branch height slightly - layout['uniform_sub_h'] *= 0.95 - layout['sub_spacing'] *= 0.95 + layout["uniform_sub_h"] *= 0.95 + layout["sub_spacing"] *= 0.95 # Re-fit all sub-branches for main in self.main_branches: - subs = layout['sub_data'].get(main, []) + subs = layout["sub_data"].get(main, []) for i, sub_info in enumerate(subs): sub_text = self.sub_branches[main][i] font, wrapped, _, _ = self._fit_text_sub( - ax_temp, sub_text, layout['uniform_sub_w'] * 0.94, - layout['uniform_sub_h'] * 0.94, layout['sub_data'][main][i]['fontsize'], 8 + ax_temp, + sub_text, + layout["uniform_sub_w"] * 0.94, + layout["uniform_sub_h"] * 0.94, + layout["sub_data"][main][i]["fontsize"], + 8, ) - layout['sub_data'][main][i]['fontsize'] = font - layout['sub_data'][main][i]['text'] = wrapped - + layout["sub_data"][main][i]["fontsize"] = font + layout["sub_data"][main][i]["text"] = wrapped + plt.close(fig_temp) return layout - + def save(self, fig: plt.Figure): """Save the figure as PNG and SVG.""" - filename = self.title.replace(' ', '_') + filename = self.title.replace(" ", "_") png_path = os.path.join(self.output_dir, f"{filename}.png") svg_path = os.path.join(self.output_dir, f"{filename}.svg") - + # Don't use bbox_inches='tight' to ensure we stay within figure bounds # The axes limits are already set correctly in plot() fig.savefig(png_path, transparent=True, dpi=300) fig.savefig(svg_path, transparent=True) - + print(f"Saved: {png_path}") print(f"Saved: {svg_path}") @@ -949,11 +1124,12 @@ def plot_mindmap( title: str = "Mind Map", color_scheme: str = "gold", output_dir: str = "./outputs", - aspect_ratio: float = 8/9 + aspect_ratio: float = 8 / 9, ) -> Tuple[plt.Figure, plt.Axes]: - """Plot a mindmap from a DataFrame.""" - plotter = MindmapPlotter(mindmap, main_theme, title, color_scheme, output_dir, aspect_ratio) + plotter = MindmapPlotter( + mindmap, main_theme, title, color_scheme, output_dir, aspect_ratio + ) fig, ax = plotter.plot() plotter.save(fig) return fig, ax diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 9eef2ea..5ba8882 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -9,6 +9,10 @@ from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category from bigdata_research_tools.llm.base import LLMConfig +from bigdata_research_tools.mindmap.mindmap import ( + MindMap, + generate_risk_tree, +) from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies @@ -17,10 +21,6 @@ WorkflowTraceEvent, send_trace, ) -from bigdata_research_tools.mindmap.mindmap import ( - MindMap, - generate_risk_tree, -) from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index cbf62f8..f33b187 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -9,6 +9,7 @@ from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler from bigdata_research_tools.llm.base import LLMConfig +from bigdata_research_tools.mindmap.mindmap import generate_theme_tree from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies @@ -17,7 +18,6 @@ WorkflowTraceEvent, send_trace, ) -from bigdata_research_tools.mindmap.mindmap import generate_theme_tree from bigdata_research_tools.workflows.base import Workflow from bigdata_research_tools.workflows.utils import get_scored_df From 77354250da5351acbe3f2192e07460877ea55d2c Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Tue, 18 Nov 2025 16:11:45 +0000 Subject: [PATCH 71/82] robust grounding search --- examples/grounded_mindmaps.py | 4 +-- .../mindmap/mindmap_generator.py | 25 +++++++++---------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index b3750bc..ce30dde 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -128,8 +128,8 @@ def test_dynamic_mindmap( ], month_names=["October_2025", "November_2025", "December_2025"], ) - logger.info("Results: %s", mindmap["base_mindmap"]) - logger.info("Results: %s", mindmap["October_2025"]) + logger.info("Results: %s", mindmap["base_mindmap"]['mindmap_json']) + logger.info("Results: %s", mindmap["October_2025"]['mindmap_json']) logger.info("") diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 7c18f2f..65a504f 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -14,6 +14,7 @@ from bigdata_client.query import ( Keyword, Similarity, + Entity ) from tqdm import tqdm @@ -798,27 +799,25 @@ def _run_and_collate_search( entity_objs = [] for entity_name in entities_list: try: - entity = self.bigdata_connection.knowledge_graph.autosuggest( - entity_name, limit=1 - )[0] - entity_objs.append(entity) + suggestions = self.bigdata_connection.knowledge_graph.autosuggest(entity_name, limit=1) + if suggestions: # Check if list is not empty + entity = suggestions[0] + entity_objs.append(entity) + else: + print(f"Warning: No autosuggest results for '{entity_name}'") except Exception as e: print(f"Warning: Autosuggest failed for '{entity_name}': {e}") continue - print( - f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}" - ) - confirmed_entities = [ - entity - for entity, orig_str in zip(entity_objs, entities_list) - if entity.name in orig_str or orig_str in entity.name - ] + + confirmed_entities = [entity.id for entity, orig_str in zip(entity_objs, entities_list) if entity.name.lower() in orig_str.lower() or orig_str.lower() in entity.name.lower()] if confirmed_entities: - entities = BigdataAny(confirmed_entities) + + entities = Any([Entity(entity) for entity in confirmed_entities]) else: entities = None else: entities = None + print(f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}") if keywords_list: print(f"Searching with keywords: {keywords_list}") keywords = BigdataAny([Keyword(kw) for kw in keywords_list]) From 96c22173735c77e453ce2630f978acf3b5866478 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Tue, 18 Nov 2025 18:31:39 +0000 Subject: [PATCH 72/82] improved typing and robust argument passing --- examples/grounded_mindmaps.py | 11 +- .../mindmap/mindmap_generator.py | 419 +++++++----------- 2 files changed, 166 insertions(+), 264 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index ce30dde..ac66db1 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -60,7 +60,6 @@ def test_refined_mindmap( focus=focus, main_theme=main_theme, initial_mindmap=base_mindmap, - grounding_method="tool_call", output_dir="./refined_mindmaps", filename="refined_mindmap.json", map_type=map_type, @@ -92,7 +91,7 @@ def test_refined_mindmap2( focus=focus, main_theme=main_theme, initial_mindmap=base_mindmap, - grounding_method="tool_call", + date_range = ("2025-10-01", "2025-10-31"), output_dir="./refined_mindmaps", filename="refined_mindmap.json", map_type=map_type, @@ -122,11 +121,11 @@ def test_dynamic_mindmap( focus=focus, main_theme=main_theme, month_intervals=[ - ["2025-10-01", "2025-10-31"], - ["2025-11-01", "2025-11-30"], - ["2025-12-01", "2025-12-31"], + ("2025-09-01", "2025-09-30"), + ("2025-10-01", "2025-10-31"), + ("2025-11-01", "2025-11-30"), ], - month_names=["October_2025", "November_2025", "December_2025"], + month_names=["September_2025", "October_2025", "November_2025", ], ) logger.info("Results: %s", mindmap["base_mindmap"]['mindmap_json']) logger.info("Results: %s", mindmap["October_2025"]['mindmap_json']) diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 65a504f..41b3e2c 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -4,13 +4,14 @@ import re from concurrent.futures import ThreadPoolExecutor, as_completed from logging import Logger, getLogger -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Optional from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange from bigdata_client.models.search import DocumentType, SortBy from bigdata_client.query import ( Any as BigdataAny, ) +from bigdata_client.models.search import DocumentType, SortBy from bigdata_client.query import ( Keyword, Similarity, @@ -232,12 +233,17 @@ def compose_tool_call_message( focus: str, map_type: str, instructions: Optional[str], + date_range: Optional[tuple[str,str]], initial_mindmap: Optional[str], ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] + tool_prompt = f"{instructions} {focus} You can use news search to find relevant information about the topic. \nUse the Bigdata API to search for news articles related to the topic and use them to inform your response." + if initial_mindmap: - tool_prompt += f"Starting from the following mind map:\n{initial_mindmap}" + tool_prompt += f"\nStarting from the following mind map:\n{initial_mindmap}" + if date_range is not None: + tool_prompt += f"\nYour search will be conducted over the range: {date_range[0]} - {date_range[1]}" tool_prompt += f"\nReturn a list of searches you would like to perform to enhance it.\n{enforce_structure}" @@ -296,16 +302,22 @@ def compose_final_message( focus: str, map_type: str, instructions: Optional[str], + date_range: Optional[tuple[str,str]], tool_calls, tool_call_id, context, ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] + final_prompt = f"{instructions} {focus}. \nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}" + + if date_range is not None: + tool_prompt += f"\nYour search will be conducted over the range: {date_range[0]} - {date_range[1]}" + final_message = [ { "role": "system", - "content": f"{instructions} {focus}. IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant. \n{enforce_structure}", + "content": final_prompt, }, { "role": "user", @@ -325,6 +337,7 @@ def compose_refinement_message( focus: str, map_type: str, instructions: Optional[str], + date_range: Optional[tuple[str,str]], initial_mindmap: str, context: str, tool_calls, @@ -332,12 +345,11 @@ def compose_refinement_message( ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] - refine_prompt = ( - f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus} " - "Based on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text." - "IMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant." - f"{enforce_structure}." - ) + refine_prompt = f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus}.\nBased on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text.\nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}." + + if date_range is not None: + refine_prompt += f"\nYour search will be conducted over the range: {date_range[0]} - {date_range[1]}" + refinement_messages = [ {"role": "system", "content": refine_prompt}, {"role": "user", "content": initial_mindmap}, @@ -349,115 +361,73 @@ def compose_refinement_message( def generate_one_shot( self, - focus: str, main_theme: str, - instructions: Optional[str] = None, + focus: str, allow_grounding: bool = False, - grounding_method: str = "tool_call", - date_range: Optional[Tuple[str, str]] = None, + instructions: Optional[str] = None, + date_range: Optional[tuple[str, str]] = None, map_type: str = "risk", - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Generate a mind map in one LLM call, optionally allowing the LLM to request grounding. If allow_grounding is True, use the specified grounding_method ("tool_call" or "chat"). Optionally log intermediate steps to disk. """ - messages = self.compose_base_message(main_theme, focus, map_type, instructions) + messages = self.compose_base_message(main_theme=main_theme, focus=focus, map_type=map_type, instructions=instructions) llm_kwargs = self.llm_model_config_base.get_llm_kwargs( remove_max_tokens=True, remove_timeout=True ) if allow_grounding: - if grounding_method == "tool_call": - messages.append( - { - "role": "user", - "content": "You can use news search to find relevant information about the topic. " - "Use the Bigdata API to search for news articles related to the topic and use them to inform your response. You will need to specify a list of sentences, a list of entities, and a list of keywords.", - } - ) - tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( - self.send_tool_call(messages, self.llm_base, llm_kwargs) + messages = self.compose_tool_call_message( + main_theme=main_theme, focus=focus, map_type=map_type, instructions=instructions, date_range=date_range, + ) + tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( + self.send_tool_call(messages, self.llm_base, llm_kwargs) + ) + + if search_list and isinstance(search_list, list): + context = self._run_and_collate_search( + search_list, entities_list, keywords_list, date_range=date_range ) - if search_list and isinstance(search_list, list): - context = self._run_and_collate_search( - search_list, entities_list, keywords_list, date_range=date_range - ) + final_messages = self.compose_final_message( + main_theme=main_theme, + focus=focus, + map_type=map_type, + instructions=instructions, + date_range=date_range, + tool_calls=tool_calls, + tool_call_id=tool_call_id, + context=context, + ) - final_messages = self.compose_final_message( - main_theme, - focus, - map_type, - instructions, - tool_calls, - tool_call_id, - context, - ) + mindmap_text = self.llm_base.get_response(final_messages) - mindmap_text = self.llm_base.get_response(final_messages) - - theme_tree = self._parse_llm_to_themetree(mindmap_text) - df = self._themetree_to_dataframe(theme_tree) - return { - "mindmap_text": mindmap_text, - "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), ##where does this come from? - "grounded": True, - "search_queries": search_list, - "search_context": context, - } - else: - # decide if this fallback should be simplified - mindmap_text = search_list if isinstance(search_list, str) else "" - theme_tree = self._parse_llm_to_themetree( - mindmap_text - ) ## check if correct - df = format_mindmap_to_dataframe(mindmap_text) - return { - "mindmap_text": mindmap_text, - "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), - "grounded": False, - } + theme_tree = self._parse_llm_to_themetree(mindmap_text) + df = self._themetree_to_dataframe(theme_tree) + return { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), ##where does this come from? + "grounded": True, + "search_queries": search_list, + "search_context": context, + } else: # decide if this fallback should be simplified - messages[0]["content"] += ( - " You may request news search to ground your mind map. " - "If you want to search, return a list of queries." - ) - response = self.llm_base.get_response(messages) - - queries = self._parse_queries(response) - - if queries: - context = self._run_and_collate_search(queries, [], []) - - followup_messages = [ - {"role": "system", "content": f"{instructions} {focus}"}, - { - "role": "user", - "content": prompts_dict[map_type][ - "user_prompt_message" - ].format(main_theme=main_theme), - }, - { - "role": "assistant", - "content": "News search results:\n" + context, - }, - ] - mindmap_text = self.llm_base.get_response(followup_messages) - - df = format_mindmap_to_dataframe(mindmap_text) - return { - "mindmap_text": mindmap_text, - "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), - "grounded": True, - "search_queries": queries, - "search_context": context, - } + mindmap_text = search_list if isinstance(search_list, str) else "" + theme_tree = self._parse_llm_to_themetree( + mindmap_text + ) ## check if correct + df = format_mindmap_to_dataframe(mindmap_text) + return { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "grounded": False, + } # Default: just generate mind map mindmap_text = self.llm_base.get_response(messages) @@ -473,98 +443,61 @@ def generate_one_shot( def generate_refined( self, - focus: str, main_theme: str, + focus: str, initial_mindmap: str, - grounding_method: str = "tool_call", output_dir: str = "./refined_mindmaps", filename: str = "refined_mindmap.json", map_type: str = "risk", instructions: Optional[str] = None, - search_scope: Optional[Any] = None, - sortby: Optional[Any] = None, - date_range: Optional[Any] = None, + search_scope: Optional[DocumentType] = None, + sortby: Optional[SortBy] = None, + date_range: Optional[tuple[str, str]] = None, chunk_limit: Optional[int] = 20, **llm_kwargs, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Refine an initial mind map: LLM proposes searches, search is run, LLM refines mind map with search results. Optionally log intermediate steps to disk. """ + messages = self.compose_tool_call_message( - main_theme, focus, map_type, instructions, initial_mindmap + main_theme=main_theme, + focus=focus, + map_type=map_type, + instructions=instructions, + date_range=date_range, + initial_mindmap=initial_mindmap ) llm_kwargs = self.llm_model_config_reasoning.get_llm_kwargs( remove_max_tokens=True, remove_timeout=True ) - if grounding_method == "tool_call": - tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( - self.send_tool_call(messages, self.llm_reasoning, llm_kwargs=llm_kwargs) - ) - - if search_list and isinstance(search_list, list): - context = self._run_and_collate_search( - search_list, - entities_list, - keywords_list, - search_scope, - sortby, - date_range, - chunk_limit, - ) - - refinement_messages = self.compose_refinement_message( - main_theme, - focus, - map_type, - instructions, - initial_mindmap, - context, - tool_calls, - tool_call_id, - ) - mindmap_text = self.llm_reasoning.get_response(refinement_messages) - theme_tree = self._parse_llm_to_themetree(mindmap_text) - df = self._themetree_to_dataframe(theme_tree) - result_dict = { - "mindmap_text": mindmap_text, - "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), - "search_queries": search_list, - "search_context": context, - } - save_results_to_file(result_dict, output_dir, filename) - return result_dict - else: - mindmap_text = search_list if isinstance(search_list, str) else "" - df = format_mindmap_to_dataframe(mindmap_text) - result_dict = { - "mindmap_text": mindmap_text, - "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), - "search_queries": [], - "search_context": "", - } - save_results_to_file(result_dict, output_dir, filename) - return result_dict - else: - queries_json = self.llm_reasoning.get_response(messages) + tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( + self.send_tool_call(messages, self.llm_reasoning, llm_kwargs=llm_kwargs) + ) - search_queries = self._parse_queries(queries_json) + if search_list and isinstance(search_list, list): context = self._run_and_collate_search( - search_queries, [], [], search_scope, sortby, date_range, chunk_limit + search_list, + entities_list, + keywords_list, + search_scope, + sortby, + date_range, + chunk_limit, ) refinement_messages = self.compose_refinement_message( - main_theme, - focus, - map_type, - instructions, - initial_mindmap, - context, - tool_calls, - tool_call_id, + main_theme=main_theme, + focus=focus, + map_type=map_type, + instructions=instructions, + date_range=date_range, + initial_mindmap=initial_mindmap, + tool_calls=tool_calls, + tool_call_id=tool_call_id, + context=context ) mindmap_text = self.llm_reasoning.get_response(refinement_messages) @@ -574,26 +507,36 @@ def generate_refined( "mindmap_text": mindmap_text, "mindmap_df": df, "mindmap_json": theme_tree.to_json(), - "search_queries": search_queries, + "search_queries": search_list, "search_context": context, } save_results_to_file(result_dict, output_dir, filename) return result_dict + else: + mindmap_text = search_list if isinstance(search_list, str) else "" + df = format_mindmap_to_dataframe(mindmap_text) + result_dict = { + "mindmap_text": mindmap_text, + "mindmap_df": df, + "mindmap_json": theme_tree.to_json(), + "search_queries": [], + "search_context": "", + } + save_results_to_file(result_dict, output_dir, filename) + return result_dict + def generate_or_load_refined( self, - instructions: str, - focus: str, main_theme: str, + focus: str, map_type: str, initial_mindmap: str, - llm_model: str = "o3-mini", - reasoning_effort: str = "high", - search_scope: Any = None, - sortby: Any = None, - date_range: Any = None, - chunk_limit: int = 20, - grounding_method: str = "tool_call", + instructions: Optional[str], + search_scope: Optional[DocumentType] = None, + sortby: Optional[SortBy] = None, + date_range: Optional[tuple[str, str]] = None, + chunk_limit: Optional[int] = 20, output_dir: str = "./bootstrapped_mindmaps", filename: str = "refined_mindmap", i: int = 0, @@ -609,9 +552,10 @@ def generate_or_load_refined( main_theme=main_theme, map_type=map_type, initial_mindmap=initial_mindmap, - reasoning_effort=reasoning_effort, - grounding_method=grounding_method, date_range=date_range, + search_scope=search_scope, + sortby=sortby, + chunk_limit=chunk_limit, output_dir=output_dir, filename=f"{filename}_{i}.json", ) @@ -624,8 +568,6 @@ def generate_or_load_refined( main_theme=main_theme, map_type=map_type, initial_mindmap=initial_mindmap, - reasoning_effort=reasoning_effort, - grounding_method=grounding_method, date_range=date_range, output_dir=output_dir, filename=f"{filename}_{i}.json", @@ -635,16 +577,15 @@ def generate_or_load_refined( def bootstrap_refined( self, - instructions: str, - focus: str, main_theme: str, + focus: str, map_type: str, initial_mindmap: str, - search_scope: Any = None, - sortby: Any = None, - date_range: Any = None, + instructions: Optional[str], + search_scope: Optional[DocumentType] = None, + sortby: Optional[SortBy] = None, + date_range: Optional[tuple[str, str]] = None, chunk_limit: int = 20, - grounding_method: str = "tool_call", output_dir: str = "./bootstrapped_mindmaps", filename: str = "refined_mindmap", n_elements: int = 50, @@ -680,7 +621,6 @@ def bootstrap_refined( sortby=sortby, date_range=date_range, chunk_limit=chunk_limit, - grounding_method=grounding_method, output_dir=output_dir, filename=filename, i=i, @@ -704,19 +644,18 @@ def bootstrap_refined( def generate_dynamic( self, - instructions: str, - focus: str, main_theme: str, - month_intervals: List[Tuple[str, str]], - month_names: List[str], - search_scope: Any = None, - sortby: Any = None, - chunk_limit: int = 20, - grounding_method: str = "tool_call", + focus: str, + month_intervals: list[tuple[str, str]], + month_names: list[str], + instructions: Optional[str], + search_scope: Optional[DocumentType] = None, + sortby: Optional[SortBy] = None, + chunk_limit: Optional[int] = 20, map_type: str = "risk", output_dir: str = "./dynamic_mindmaps", **llm_kwargs, - ) -> List[Dict[str, Any]]: + ) -> list[dict[str, Any]]: """ Dynamic/iterative mind map generation over time intervals. Returns a list of dicts, one per interval. @@ -725,23 +664,28 @@ def generate_dynamic( results = {} # Step 1: Generate initial mind map for t0 one_shot = self.generate_one_shot( - instructions, focus, main_theme, map_type=map_type, **llm_kwargs + main_theme=main_theme, + focus=focus, + allow_grounding=False, + instructions=instructions, + map_type=map_type, + **llm_kwargs ) - prev_mindmap = one_shot["mindmap_text"] + prev_mindmap = one_shot["mindmap_json"] + print(prev_mindmap) results["base_mindmap"] = one_shot # Step 2: For each subsequent interval, refine using previous mind map and new search, including starting month - for i, (interval, month_name) in enumerate( + for i, (date_range, month_name) in enumerate( zip(month_intervals, month_names), start=0 ): - date_range = self._make_absolute_date_range(interval) + refined = self.generate_refined( - focus=focus, main_theme=main_theme, + focus=focus, initial_mindmap=prev_mindmap, - grounding_method=grounding_method, + map_type=map_type, output_dir=output_dir, filename=f"{month_name}.json", - map_type=map_type, instructions=instructions, search_scope=search_scope, sortby=sortby, @@ -751,18 +695,18 @@ def generate_dynamic( ) results[month_name] = refined - prev_mindmap = refined["mindmap_text"] + prev_mindmap = refined["mindmap_json"] return results def _run_and_collate_search( self, - search_list: List[str], - entities_list: List[str], - keywords_list: List[str], - search_scope: Any = None, - sortby: Any = None, - date_range: Any = None, - chunk_limit: int = 20, + search_list: list[str], + entities_list: Optional[list[str]], + keywords_list: Optional[list[str]], + search_scope: Optional[DocumentType] = None, + sortby: Optional[SortBy] = None, + date_range: Optional[tuple[str, str]] = None, + chunk_limit: Optional[int] = 20, ) -> str: """ Run Bigdata search for each query and collate results for LLM context. @@ -775,24 +719,10 @@ def _run_and_collate_search( scope = search_scope if search_scope is not None else DocumentType.NEWS sortby = sortby if sortby is not None else SortBy.RELEVANCE - # --- Robust date_range parsing --- - # If date_range is a list of one tuple, unpack it - if ( - isinstance(date_range, list) - and len(date_range) == 1 - and isinstance(date_range[0], (tuple, list)) - and len(date_range[0]) == 2 - ): - date_range = date_range[0] - # If date_range is a tuple/list of two strings, convert to AbsoluteDateRange - if ( - isinstance(date_range, (tuple, list)) - and len(date_range) == 2 - and all(isinstance(x, str) for x in date_range) - ): - date_range = AbsoluteDateRange(start=date_range[0], end=date_range[1]) - elif date_range is None: + if date_range is None: date_range = RollingDateRange.LAST_THIRTY_DAYS + else: + date_range = AbsoluteDateRange(start=date_range[0], end=date_range[1]) if entities_list: print(f"Entities List: {entities_list}") @@ -812,7 +742,7 @@ def _run_and_collate_search( confirmed_entities = [entity.id for entity, orig_str in zip(entity_objs, entities_list) if entity.name.lower() in orig_str.lower() or orig_str.lower() in entity.name.lower()] if confirmed_entities: - entities = Any([Entity(entity) for entity in confirmed_entities]) + entities = BigdataAny([Entity(entity) for entity in confirmed_entities]) else: entities = None else: @@ -842,7 +772,7 @@ def _run_and_collate_search( return self.collate_results(all_results) - def collate_results(self, results: List[Tuple[str, Any]]) -> str: + def collate_results(self, results: dict[tuple[str, str], list]) -> str: """ Collate a list of (query, result) tuples into a single string for LLM context. @@ -868,30 +798,3 @@ def collate_results(self, results: List[Tuple[str, Any]]) -> str: docstr += f"{chunk.text}\n" doctexts.append(docstr) return "\n".join(doctexts) - - @staticmethod - def _parse_queries(self, queries_json: str) -> List[str]: - """ - Parse LLM output (JSON or text) into a list of search queries. - """ - import json - - try: - queries = json.loads(queries_json) - if isinstance(queries, list): - return queries - elif isinstance(queries, dict) and "search_list" in queries: - return queries["search_list"] - elif isinstance(queries, dict) and "queries" in queries: - return queries["queries"] - except Exception: - # Fallback: split by lines - return [q.strip() for q in queries_json.splitlines() if q.strip()] - return [] - - @staticmethod - def _make_absolute_date_range(interval: Tuple[str, str]) -> Any: - """ - Helper to create an AbsoluteDateRange object from a (start, end) tuple. - """ - return AbsoluteDateRange(start=interval[0], end=interval[1]) From c08938a6b0d7950cac5d88b2fda85f8f077b61d1 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 19 Nov 2025 09:47:51 +0100 Subject: [PATCH 73/82] Improve typing --- examples/grounded_mindmaps.py | 10 ++++---- src/bigdata_research_tools/llm/openai.py | 2 +- .../mindmap/mindmap_generator.py | 24 ++++++++----------- src/bigdata_research_tools/search/search.py | 2 +- .../visuals/mindmap_visuals.py | 3 ++- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index ac66db1..f9adf78 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -22,7 +22,7 @@ def test_one_shot_mindmap( map_type, instructions, llm_base_config: str = "openai::gpt-4o-mini", -) -> MindMap: +): """Test one-shot mind map generation with base LLM.""" logger.info("=" * 60) logger.info("TEST 1: One-Shot Mind Map Generation with Base LLM") @@ -48,7 +48,7 @@ def test_refined_mindmap( instructions, base_mindmap: str, llm_base_config: str = "openai::o3-mini", -) -> MindMap: +): """Test refined mindmap generation with reasoning LLM sent in the base config.""" logger.info("=" * 60) logger.info("TEST 2: Refined MindMap Generation with Reasoning LLM in Base Config") @@ -74,9 +74,9 @@ def test_refined_mindmap2( map_type, instructions, base_mindmap: str, - llm_base_config: str | None = None, + llm_base_config: str, llm_reasoning_config: str = "openai::o3-mini", -) -> MindMap: +): """Test refined mindmap generation with reasoning LLM sent in the reasoning config.""" logger.info("=" * 60) logger.info( @@ -107,7 +107,7 @@ def test_dynamic_mindmap( instructions, llm_base_config: str = "openai::gpt-4o-mini", llm_reasoning_config: str = "openai::o3-mini", -) -> MindMap: +): """Test dynamic mindmap generation with two LLMs.""" logger.info("=" * 60) logger.info("TEST 4: Dynamic MindMap Generation with Two LLMs") diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 7ec6fe6..61a54b8 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -169,7 +169,7 @@ def get_tools_response( chat_history: list[dict[str, str]], tools: list[dict[str, str]], **kwargs, - ) -> dict[str, list[dict] | str]: + ) -> dict[str, list[dict] | str | list[str]]: """ Get the response from an LLM model from OpenAI with tools. Args: diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 41b3e2c..947aab8 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -29,14 +29,10 @@ prompts_dict, save_results_to_file, ) +from bigdata_client.models.advanced_search_query import QueryComponent +from bigdata_research_tools.search.search import SEARCH_QUERY_RESULTS_TYPE -# from bigdata_research_tools.search.query_builder import ( -# EntitiesToSearch, -# build_batched_query, -# create_date_ranges, -# ) -# cannot use query builder because it is to error-prone to build EntitiesToSearch based on the LLM output -from bigdata_research_tools.search.search import run_search +from bigdata_research_tools.search.search import run_search, INPUT_DATE_RANGE logger: Logger = getLogger(__name__) @@ -261,7 +257,7 @@ def compose_tool_call_message( def send_tool_call( self, messages: list, llm_client: LLMEngine, llm_kwargs: dict - ) -> list: + ) -> tuple[str | None, dict | None, list | None, list | None, list | None]: llm_kwargs.update( { "tool_choice": { @@ -341,7 +337,7 @@ def compose_refinement_message( initial_mindmap: str, context: str, tool_calls, - tool_call_id, + tool_call_id: str | None, ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] @@ -754,7 +750,7 @@ def _run_and_collate_search( else: keywords = None - queries = [Similarity(sentence) for sentence in search_list] + queries: list[QueryComponent] = [Similarity(sentence) for sentence in search_list] if entities: queries = [query & entities for query in queries] if keywords: @@ -784,10 +780,10 @@ def collate_results(self, results: dict[tuple[str, str], list]) -> str: """ doctexts = [] for (text_query, date_range), result in results.items(): - for item in text_query.items: - dictitem = item.to_dict() - if dictitem["type"] == "similarity": - sentence = dictitem["value"] + + dictitem = text_query.to_dict() + if dictitem["type"] == "similarity": + sentence = dictitem["value"] docstr = f"###Query: {sentence}\n ### Results:\n" for doc in result: headline = getattr(doc, "headline", "No headline") diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 2e545f1..9aefaf1 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -48,7 +48,7 @@ ) SEARCH_QUERY_RESULTS_TYPE = dict[ - tuple[QueryComponent, AbsoluteDateRange | RollingDateRange], list[Document] + tuple[QueryComponent, str], list[Document] ] REQUESTS_PER_MINUTE_LIMIT = 300 diff --git a/src/bigdata_research_tools/visuals/mindmap_visuals.py b/src/bigdata_research_tools/visuals/mindmap_visuals.py index 3d9ab0a..c09b468 100644 --- a/src/bigdata_research_tools/visuals/mindmap_visuals.py +++ b/src/bigdata_research_tools/visuals/mindmap_visuals.py @@ -38,6 +38,7 @@ """ import matplotlib +from typing import Any matplotlib.use("Agg") # Use non-interactive backend import math @@ -609,7 +610,7 @@ def _check_overflow( self, ax, layout, x_theme, x_main, x_sub, main_positions, theme_y ) -> Dict: """Check for text and rectangle overflow, return adjustments needed.""" - issues = { + issues: dict[str, Any] = { "theme_overflow": False, "main_overflows": {}, "sub_overflows": {}, From ed42c464f58de3f985d1028cb62ba118c3119edd Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 19 Nov 2025 10:36:54 +0100 Subject: [PATCH 74/82] Remove all type errors --- src/bigdata_research_tools/llm/azure.py | 4 +-- src/bigdata_research_tools/llm/base.py | 8 ++--- src/bigdata_research_tools/llm/bedrock.py | 4 +-- src/bigdata_research_tools/llm/openai.py | 6 ++-- .../mindmap/mindmap_generator.py | 34 +++++++++---------- src/bigdata_research_tools/search/search.py | 22 +++++------- 6 files changed, 36 insertions(+), 42 deletions(-) diff --git a/src/bigdata_research_tools/llm/azure.py b/src/bigdata_research_tools/llm/azure.py index 7929568..37ac3b9 100644 --- a/src/bigdata_research_tools/llm/azure.py +++ b/src/bigdata_research_tools/llm/azure.py @@ -97,7 +97,7 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st async def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: @@ -234,7 +234,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 413f420..4979064 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -112,7 +112,7 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st async def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: @@ -202,7 +202,7 @@ async def get_stream_response( async def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: @@ -244,7 +244,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], **kwargs, ) -> dict[str, list[dict] | str]: """ @@ -330,7 +330,7 @@ def get_stream_response( def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], **kwargs, ) -> dict[str, list[dict] | str]: """ diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index e50456e..2107722 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -113,7 +113,7 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st async def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: @@ -274,7 +274,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], temperature: float = 0, **kwargs, ) -> dict[str, list[dict] | str]: diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 61a54b8..54b7116 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -58,7 +58,7 @@ async def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> st async def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], **kwargs, ) -> dict[str, list[dict] | str]: """ @@ -167,9 +167,9 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: def get_tools_response( self, chat_history: list[dict[str, str]], - tools: list[dict[str, str]], + tools: list[dict], **kwargs, - ) -> dict[str, list[dict] | str | list[str]]: + ) -> dict: """ Get the response from an LLM model from OpenAI with tools. Args: diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 947aab8..40ac243 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -257,7 +257,7 @@ def compose_tool_call_message( def send_tool_call( self, messages: list, llm_client: LLMEngine, llm_kwargs: dict - ) -> tuple[str | None, dict | None, list | None, list | None, list | None]: + ) -> tuple: llm_kwargs.update( { "tool_choice": { @@ -275,9 +275,9 @@ def send_tool_call( if response_dict["tool_calls"] is not None: tool_call_id = response_dict["id"][0] arguments = response_dict["arguments"][0] - search_list = arguments.get("search_list", []) - entities_list = arguments.get("entities_list", []) - keywords_list = arguments.get("keywords_list", []) + search_list = arguments.get("search_list", []) # ty: ignore[possibly-missing-attribute] + entities_list = arguments.get("entities_list", []) # ty: ignore[possibly-missing-attribute] + keywords_list = arguments.get("keywords_list", []) # ty: ignore[possibly-missing-attribute] return ( tool_call_id, response_dict["tool_calls"], @@ -307,9 +307,6 @@ def compose_final_message( final_prompt = f"{instructions} {focus}. \nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}" - if date_range is not None: - tool_prompt += f"\nYour search will be conducted over the range: {date_range[0]} - {date_range[1]}" - final_message = [ { "role": "system", @@ -378,6 +375,7 @@ def generate_one_shot( if allow_grounding: messages = self.compose_tool_call_message( main_theme=main_theme, focus=focus, map_type=map_type, instructions=instructions, date_range=date_range, + initial_mindmap=None ) tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( self.send_tool_call(messages, self.llm_base, llm_kwargs) @@ -449,7 +447,7 @@ def generate_refined( search_scope: Optional[DocumentType] = None, sortby: Optional[SortBy] = None, date_range: Optional[tuple[str, str]] = None, - chunk_limit: Optional[int] = 20, + chunk_limit: int = 20, **llm_kwargs, ) -> dict[str, Any]: """ @@ -514,7 +512,7 @@ def generate_refined( result_dict = { "mindmap_text": mindmap_text, "mindmap_df": df, - "mindmap_json": theme_tree.to_json(), + "mindmap_json": "", "search_queries": [], "search_context": "", } @@ -532,7 +530,7 @@ def generate_or_load_refined( search_scope: Optional[DocumentType] = None, sortby: Optional[SortBy] = None, date_range: Optional[tuple[str, str]] = None, - chunk_limit: Optional[int] = 20, + chunk_limit: int = 20, output_dir: str = "./bootstrapped_mindmaps", filename: str = "refined_mindmap", i: int = 0, @@ -647,11 +645,11 @@ def generate_dynamic( instructions: Optional[str], search_scope: Optional[DocumentType] = None, sortby: Optional[SortBy] = None, - chunk_limit: Optional[int] = 20, + chunk_limit: int = 20, map_type: str = "risk", output_dir: str = "./dynamic_mindmaps", **llm_kwargs, - ) -> list[dict[str, Any]]: + ) -> dict[str, dict[str, Any]]: """ Dynamic/iterative mind map generation over time intervals. Returns a list of dicts, one per interval. @@ -702,7 +700,7 @@ def _run_and_collate_search( search_scope: Optional[DocumentType] = None, sortby: Optional[SortBy] = None, date_range: Optional[tuple[str, str]] = None, - chunk_limit: Optional[int] = 20, + chunk_limit: int = 20, ) -> str: """ Run Bigdata search for each query and collate results for LLM context. @@ -716,9 +714,9 @@ def _run_and_collate_search( sortby = sortby if sortby is not None else SortBy.RELEVANCE if date_range is None: - date_range = RollingDateRange.LAST_THIRTY_DAYS + date_range_filter = RollingDateRange.LAST_THIRTY_DAYS else: - date_range = AbsoluteDateRange(start=date_range[0], end=date_range[1]) + date_range_filter = AbsoluteDateRange(start=date_range[0], end=date_range[1]) if entities_list: print(f"Entities List: {entities_list}") @@ -741,9 +739,9 @@ def _run_and_collate_search( entities = BigdataAny([Entity(entity) for entity in confirmed_entities]) else: entities = None + print(f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}") else: entities = None - print(f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}") if keywords_list: print(f"Searching with keywords: {keywords_list}") keywords = BigdataAny([Keyword(kw) for kw in keywords_list]) @@ -758,7 +756,7 @@ def _run_and_collate_search( all_results = run_search( queries=queries, - date_ranges=date_range, + date_ranges=date_range_filter, sortby=sortby, scope=scope, limit=chunk_limit, @@ -768,7 +766,7 @@ def _run_and_collate_search( return self.collate_results(all_results) - def collate_results(self, results: dict[tuple[str, str], list]) -> str: + def collate_results(self, results: dict) -> str: """ Collate a list of (query, result) tuples into a single string for LLM context. diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 9aefaf1..48e20ab 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -13,13 +13,14 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime -from typing import Literal, overload +from typing import Literal, overload, cast from bigdata_client import Bigdata from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange from bigdata_client.document import Document from bigdata_client.models.advanced_search_query import QueryComponent from bigdata_client.models.search import DocumentType, SortBy +from matplotlib.pylab import Sequence from tqdm import tqdm from bigdata_research_tools.client import bigdata_connection, init_bigdata_client @@ -30,15 +31,7 @@ send_trace, ) -NORMALIZED_DATE_RANGE = ( - list[tuple[datetime, datetime]] - | list[RollingDateRange] - | list[AbsoluteDateRange] - | list[tuple[datetime, datetime] | RollingDateRange] - | list[tuple[datetime, datetime] | AbsoluteDateRange] - | list[AbsoluteDateRange | RollingDateRange] - | list[tuple[datetime, datetime] | AbsoluteDateRange | RollingDateRange] -) +NORMALIZED_DATE_RANGE = Sequence[tuple[datetime, datetime] | AbsoluteDateRange | RollingDateRange] INPUT_DATE_RANGE = ( tuple[datetime, datetime] @@ -285,8 +278,11 @@ def get_quota_consumed(self) -> float: def normalize_date_range( date_ranges: INPUT_DATE_RANGE, ) -> NORMALIZED_DATE_RANGE: - if not isinstance(date_ranges, list): - date_ranges = [date_ranges] + if isinstance(date_ranges, (AbsoluteDateRange, RollingDateRange, tuple)): + return cast(NORMALIZED_DATE_RANGE, [date_ranges]) + if isinstance(date_ranges, Sequence): + if all(isinstance(dr, (tuple, AbsoluteDateRange, RollingDateRange)) for dr in date_ranges): + return list(date_ranges) return date_ranges @@ -357,7 +353,7 @@ def run_search( If `only_results` is False, returns a mapping of the tuple of search query and date range to the list of the corresponding search results. """ - date_ranges = normalize_date_range(date_ranges) + date_ranges = list(normalize_date_range(date_ranges)) if isinstance(date_ranges[0], tuple) or isinstance(date_ranges[0], list): date_ranges.sort(key=lambda x: x[0]) From 559ac3a8d6f4ff91ca8fc9c14c944d880b15b9f4 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 19 Nov 2025 10:40:24 +0100 Subject: [PATCH 75/82] Format and linting --- examples/grounded_mindmaps.py | 13 +-- src/bigdata_research_tools/llm/bedrock.py | 5 +- .../mindmap/mindmap_generator.py | 89 +++++++++++-------- src/bigdata_research_tools/search/search.py | 16 ++-- .../visuals/mindmap_visuals.py | 3 +- 5 files changed, 71 insertions(+), 55 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index f9adf78..a600f39 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -2,7 +2,6 @@ from dotenv import load_dotenv -from bigdata_research_tools.mindmap.mindmap import MindMap from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator from bigdata_research_tools.visuals.mindmap_visuals import plot_mindmap @@ -91,7 +90,7 @@ def test_refined_mindmap2( focus=focus, main_theme=main_theme, initial_mindmap=base_mindmap, - date_range = ("2025-10-01", "2025-10-31"), + date_range=("2025-10-01", "2025-10-31"), output_dir="./refined_mindmaps", filename="refined_mindmap.json", map_type=map_type, @@ -125,10 +124,14 @@ def test_dynamic_mindmap( ("2025-10-01", "2025-10-31"), ("2025-11-01", "2025-11-30"), ], - month_names=["September_2025", "October_2025", "November_2025", ], + month_names=[ + "September_2025", + "October_2025", + "November_2025", + ], ) - logger.info("Results: %s", mindmap["base_mindmap"]['mindmap_json']) - logger.info("Results: %s", mindmap["October_2025"]['mindmap_json']) + logger.info("Results: %s", mindmap["base_mindmap"]["mindmap_json"]) + logger.info("Results: %s", mindmap["October_2025"]["mindmap_json"]) logger.info("") diff --git a/src/bigdata_research_tools/llm/bedrock.py b/src/bigdata_research_tools/llm/bedrock.py index 2107722..f099586 100644 --- a/src/bigdata_research_tools/llm/bedrock.py +++ b/src/bigdata_research_tools/llm/bedrock.py @@ -2,7 +2,6 @@ try: from boto3 import Session # ty: ignore[unresolved-import] - from botocore import BaseClient # type: ignore[unresolved-import] except ImportError: raise ImportError( "Missing optional dependency for LLM Bedrock provider, " @@ -34,7 +33,7 @@ def configure_bedrock_client(self) -> None: if not self._client: self._client = Session(**self.connection_config) - def _get_bedrock_client(self) -> BaseClient: + def _get_bedrock_client(self): if not self._client: raise NotInitializedLLMProviderError(self) return self._client.client("bedrock-runtime") @@ -195,7 +194,7 @@ def configure_bedrock_client(self) -> None: if not self._client: self._client = Session(**self.connection_config) - def _get_bedrock_client(self) -> BaseClient: + def _get_bedrock_client(self): if not self._client: raise NotInitializedLLMProviderError(self) return self._client.client("bedrock-runtime") diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 40ac243..cd82129 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -7,16 +7,12 @@ from typing import Any, Optional from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange +from bigdata_client.models.advanced_search_query import QueryComponent from bigdata_client.models.search import DocumentType, SortBy from bigdata_client.query import ( Any as BigdataAny, ) -from bigdata_client.models.search import DocumentType, SortBy -from bigdata_client.query import ( - Keyword, - Similarity, - Entity -) +from bigdata_client.query import Entity, Keyword, Similarity from tqdm import tqdm from bigdata_research_tools.client import bigdata_connection @@ -29,10 +25,7 @@ prompts_dict, save_results_to_file, ) -from bigdata_client.models.advanced_search_query import QueryComponent -from bigdata_research_tools.search.search import SEARCH_QUERY_RESULTS_TYPE - -from bigdata_research_tools.search.search import run_search, INPUT_DATE_RANGE +from bigdata_research_tools.search.search import run_search logger: Logger = getLogger(__name__) @@ -229,13 +222,13 @@ def compose_tool_call_message( focus: str, map_type: str, instructions: Optional[str], - date_range: Optional[tuple[str,str]], + date_range: Optional[tuple[str, str]], initial_mindmap: Optional[str], ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] - + tool_prompt = f"{instructions} {focus} You can use news search to find relevant information about the topic. \nUse the Bigdata API to search for news articles related to the topic and use them to inform your response." - + if initial_mindmap: tool_prompt += f"\nStarting from the following mind map:\n{initial_mindmap}" if date_range is not None: @@ -275,9 +268,9 @@ def send_tool_call( if response_dict["tool_calls"] is not None: tool_call_id = response_dict["id"][0] arguments = response_dict["arguments"][0] - search_list = arguments.get("search_list", []) # ty: ignore[possibly-missing-attribute] - entities_list = arguments.get("entities_list", []) # ty: ignore[possibly-missing-attribute] - keywords_list = arguments.get("keywords_list", []) # ty: ignore[possibly-missing-attribute] + search_list = arguments.get("search_list", []) # ty: ignore[possibly-missing-attribute] + entities_list = arguments.get("entities_list", []) # ty: ignore[possibly-missing-attribute] + keywords_list = arguments.get("keywords_list", []) # ty: ignore[possibly-missing-attribute] return ( tool_call_id, response_dict["tool_calls"], @@ -298,7 +291,7 @@ def compose_final_message( focus: str, map_type: str, instructions: Optional[str], - date_range: Optional[tuple[str,str]], + date_range: Optional[tuple[str, str]], tool_calls, tool_call_id, context, @@ -330,7 +323,7 @@ def compose_refinement_message( focus: str, map_type: str, instructions: Optional[str], - date_range: Optional[tuple[str,str]], + date_range: Optional[tuple[str, str]], initial_mindmap: str, context: str, tool_calls, @@ -339,7 +332,7 @@ def compose_refinement_message( enforce_structure = prompts_dict[map_type]["enforce_structure_string"] refine_prompt = f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus}.\nBased on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text.\nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}." - + if date_range is not None: refine_prompt += f"\nYour search will be conducted over the range: {date_range[0]} - {date_range[1]}" @@ -367,16 +360,25 @@ def generate_one_shot( Optionally log intermediate steps to disk. """ - messages = self.compose_base_message(main_theme=main_theme, focus=focus, map_type=map_type, instructions=instructions) + messages = self.compose_base_message( + main_theme=main_theme, + focus=focus, + map_type=map_type, + instructions=instructions, + ) llm_kwargs = self.llm_model_config_base.get_llm_kwargs( remove_max_tokens=True, remove_timeout=True ) if allow_grounding: messages = self.compose_tool_call_message( - main_theme=main_theme, focus=focus, map_type=map_type, instructions=instructions, date_range=date_range, - initial_mindmap=None - ) + main_theme=main_theme, + focus=focus, + map_type=map_type, + instructions=instructions, + date_range=date_range, + initial_mindmap=None, + ) tool_call_id, tool_calls, search_list, entities_list, keywords_list = ( self.send_tool_call(messages, self.llm_base, llm_kwargs) ) @@ -461,7 +463,7 @@ def generate_refined( map_type=map_type, instructions=instructions, date_range=date_range, - initial_mindmap=initial_mindmap + initial_mindmap=initial_mindmap, ) llm_kwargs = self.llm_model_config_reasoning.get_llm_kwargs( remove_max_tokens=True, remove_timeout=True @@ -491,7 +493,7 @@ def generate_refined( initial_mindmap=initial_mindmap, tool_calls=tool_calls, tool_call_id=tool_call_id, - context=context + context=context, ) mindmap_text = self.llm_reasoning.get_response(refinement_messages) @@ -518,7 +520,6 @@ def generate_refined( } save_results_to_file(result_dict, output_dir, filename) return result_dict - def generate_or_load_refined( self, @@ -660,10 +661,10 @@ def generate_dynamic( one_shot = self.generate_one_shot( main_theme=main_theme, focus=focus, - allow_grounding=False, - instructions=instructions, - map_type=map_type, - **llm_kwargs + allow_grounding=False, + instructions=instructions, + map_type=map_type, + **llm_kwargs, ) prev_mindmap = one_shot["mindmap_json"] print(prev_mindmap) @@ -672,7 +673,6 @@ def generate_dynamic( for i, (date_range, month_name) in enumerate( zip(month_intervals, month_names), start=0 ): - refined = self.generate_refined( main_theme=main_theme, focus=focus, @@ -716,14 +716,18 @@ def _run_and_collate_search( if date_range is None: date_range_filter = RollingDateRange.LAST_THIRTY_DAYS else: - date_range_filter = AbsoluteDateRange(start=date_range[0], end=date_range[1]) + date_range_filter = AbsoluteDateRange( + start=date_range[0], end=date_range[1] + ) if entities_list: print(f"Entities List: {entities_list}") entity_objs = [] for entity_name in entities_list: try: - suggestions = self.bigdata_connection.knowledge_graph.autosuggest(entity_name, limit=1) + suggestions = self.bigdata_connection.knowledge_graph.autosuggest( + entity_name, limit=1 + ) if suggestions: # Check if list is not empty entity = suggestions[0] entity_objs.append(entity) @@ -732,14 +736,20 @@ def _run_and_collate_search( except Exception as e: print(f"Warning: Autosuggest failed for '{entity_name}': {e}") continue - - confirmed_entities = [entity.id for entity, orig_str in zip(entity_objs, entities_list) if entity.name.lower() in orig_str.lower() or orig_str.lower() in entity.name.lower()] + + confirmed_entities = [ + entity.id + for entity, orig_str in zip(entity_objs, entities_list) + if entity.name.lower() in orig_str.lower() + or orig_str.lower() in entity.name.lower() + ] if confirmed_entities: - entities = BigdataAny([Entity(entity) for entity in confirmed_entities]) else: entities = None - print(f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}") + print( + f"Searching with entities: {[entity.name for entity, orig_str in zip(entity_objs, entities_list) if entity.name in orig_str or orig_str in entity.name]}" + ) else: entities = None if keywords_list: @@ -748,7 +758,9 @@ def _run_and_collate_search( else: keywords = None - queries: list[QueryComponent] = [Similarity(sentence) for sentence in search_list] + queries: list[QueryComponent] = [ + Similarity(sentence) for sentence in search_list + ] if entities: queries = [query & entities for query in queries] if keywords: @@ -778,7 +790,6 @@ def collate_results(self, results: dict) -> str: """ doctexts = [] for (text_query, date_range), result in results.items(): - dictitem = text_query.to_dict() if dictitem["type"] == "similarity": sentence = dictitem["value"] diff --git a/src/bigdata_research_tools/search/search.py b/src/bigdata_research_tools/search/search.py index 48e20ab..941f354 100644 --- a/src/bigdata_research_tools/search/search.py +++ b/src/bigdata_research_tools/search/search.py @@ -13,14 +13,13 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime -from typing import Literal, overload, cast +from typing import Literal, Sequence, cast, overload from bigdata_client import Bigdata from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange from bigdata_client.document import Document from bigdata_client.models.advanced_search_query import QueryComponent from bigdata_client.models.search import DocumentType, SortBy -from matplotlib.pylab import Sequence from tqdm import tqdm from bigdata_research_tools.client import bigdata_connection, init_bigdata_client @@ -31,7 +30,9 @@ send_trace, ) -NORMALIZED_DATE_RANGE = Sequence[tuple[datetime, datetime] | AbsoluteDateRange | RollingDateRange] +NORMALIZED_DATE_RANGE = Sequence[ + tuple[datetime, datetime] | AbsoluteDateRange | RollingDateRange +] INPUT_DATE_RANGE = ( tuple[datetime, datetime] @@ -40,9 +41,7 @@ | NORMALIZED_DATE_RANGE ) -SEARCH_QUERY_RESULTS_TYPE = dict[ - tuple[QueryComponent, str], list[Document] -] +SEARCH_QUERY_RESULTS_TYPE = dict[tuple[QueryComponent, str], list[Document]] REQUESTS_PER_MINUTE_LIMIT = 300 MAX_WORKERS = 4 @@ -281,7 +280,10 @@ def normalize_date_range( if isinstance(date_ranges, (AbsoluteDateRange, RollingDateRange, tuple)): return cast(NORMALIZED_DATE_RANGE, [date_ranges]) if isinstance(date_ranges, Sequence): - if all(isinstance(dr, (tuple, AbsoluteDateRange, RollingDateRange)) for dr in date_ranges): + if all( + isinstance(dr, (tuple, AbsoluteDateRange, RollingDateRange)) + for dr in date_ranges + ): return list(date_ranges) return date_ranges diff --git a/src/bigdata_research_tools/visuals/mindmap_visuals.py b/src/bigdata_research_tools/visuals/mindmap_visuals.py index c09b468..4b14227 100644 --- a/src/bigdata_research_tools/visuals/mindmap_visuals.py +++ b/src/bigdata_research_tools/visuals/mindmap_visuals.py @@ -37,9 +37,10 @@ maintaining visual hierarchy and readability. """ -import matplotlib from typing import Any +import matplotlib + matplotlib.use("Agg") # Use non-interactive backend import math import os From c4a8e97bd71aa2408520d9c2822f3137c9ee7ef5 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 19 Nov 2025 15:18:03 +0000 Subject: [PATCH 76/82] grounding for workflows and motivation issues --- examples/risk_analyzer.py | 3 +- examples/thematic_screener.py | 3 +- src/bigdata_research_tools/llm/base.py | 1 + src/bigdata_research_tools/llm/openai.py | 4 +- src/bigdata_research_tools/mindmap/mindmap.py | 2 - .../mindmap/mindmap_generator.py | 63 ++++++++++++----- .../mindmap/mindmap_utils.py | 70 +++++++++++++++++++ .../portfolio/motivation.py | 7 +- .../workflows/risk_analyzer.py | 26 +++++-- .../workflows/thematic_screener.py | 21 ++++-- 10 files changed, 165 insertions(+), 35 deletions(-) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index 7edb2f3..575260b 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -26,7 +26,7 @@ def risk_analyzer_example( analyzer = RiskAnalyzer( main_theme=risk_scenario, - companies=companies, + companies=companies[:3], start_date="2025-01-01", end_date="2025-01-31", keywords=keywords, @@ -34,6 +34,7 @@ def risk_analyzer_example( control_entities=control_entities, focus=focus, # Optional focus to narrow the theme, llm_model_config=llm_model_config, + ground_mindmap=False ) class PrintObserver(Observer): diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index dacc07d..4ed41e7 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -30,6 +30,7 @@ def thematic_screener_example( end_date="2024-02-28", document_type=DocumentType.TRANSCRIPTS, fiscal_year=2024, + ground_mindmap=True ) class PrintObserver(Observer): @@ -59,7 +60,7 @@ def update(self, message: OberserverNotification): x = thematic_screener_example( "Chip Manufacturers", export_path=str(output_path), - llm_model_config="openai::gpt-5-mini", + llm_model_config="openai::gpt-4o-mini", ) custom_config = { "company_column": "Company", diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 4979064..92649f3 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -59,6 +59,7 @@ def validate_reasoning_config(self): self.reasoning_effort = ( self.reasoning_effort if self.reasoning_effort is not None else "high" ) + self.max_completion_tokens = 1000 if self.temperature is not None: warnings.warn( "The selected model does not support temperature settings. " diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index 54b7116..e893921 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -16,7 +16,8 @@ LLMProvider, NotInitializedLLMProviderError, ) - +from logging import Logger, getLogger +logger: Logger = getLogger(__name__) class AsyncOpenAIProvider(AsyncLLMProvider): def __init__(self, model: str, **connection_config): @@ -161,6 +162,7 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: chat_completion = self._client.chat.completions.create( messages=chat_history, model=self.model, **kwargs ) + logger.info(f"Chat Completion {chat_completion}") return chat_completion.choices[0].message.content diff --git a/src/bigdata_research_tools/mindmap/mindmap.py b/src/bigdata_research_tools/mindmap/mindmap.py index f466196..2dc2e65 100644 --- a/src/bigdata_research_tools/mindmap/mindmap.py +++ b/src/bigdata_research_tools/mindmap/mindmap.py @@ -526,7 +526,6 @@ def get_default_tree_config(llm_model: str) -> LLMConfig: model=llm_model, reasoning_effort="high", seed=42, - max_completion_tokens=300, response_format={"type": "json_object"}, ) else: @@ -536,7 +535,6 @@ def get_default_tree_config(llm_model: str) -> LLMConfig: top_p=1, frequency_penalty=0, presence_penalty=0, - max_completion_tokens=300, seed=42, response_format={"type": "json_object"}, ) diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index cd82129..0f642cf 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -92,6 +92,8 @@ def __init__( self.llm_model_config_base = LLMConfig(**llm_model_config_base) elif isinstance(llm_model_config_base, str): self.llm_model_config_base = get_default_tree_config(llm_model_config_base) + else: + self.llm_model_config_base = llm_model_config_base if isinstance(llm_model_config_reasoning, dict): self.llm_model_config_reasoning = LLMConfig(**llm_model_config_reasoning) @@ -99,13 +101,14 @@ def __init__( self.llm_model_config_reasoning = get_default_tree_config( llm_model_config_reasoning ) + else: + self.llm_model_config_reasoning = llm_model_config_reasoning - print(self.llm_model_config_base) self.llm_base = LLMEngine( model=self.llm_model_config_base.model, **self.llm_model_config_base.connection_config, ) - print(self.llm_model_config_reasoning) + self.llm_reasoning = LLMEngine( model=self.llm_model_config_reasoning.model, **self.llm_model_config_reasoning.connection_config, @@ -201,6 +204,9 @@ def compose_base_message( self, main_theme: str, focus: str, map_type: str, instructions: Optional[str] ) -> list: # Explicit, step-by-step prompt (robust, as in working repo, minus Keywords) + if instructions is None: + instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + enforce_structure = prompts_dict[map_type]["enforce_structure_string"] messages = [ { @@ -214,6 +220,7 @@ def compose_base_message( ), }, ] + return messages def compose_tool_call_message( @@ -227,6 +234,9 @@ def compose_tool_call_message( ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] + if instructions is None: + instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + tool_prompt = f"{instructions} {focus} You can use news search to find relevant information about the topic. \nUse the Bigdata API to search for news articles related to the topic and use them to inform your response." if initial_mindmap: @@ -298,8 +308,14 @@ def compose_final_message( ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] + if instructions is None: + instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + final_prompt = f"{instructions} {focus}. \nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}" + if date_range is not None: + final_prompt += f"\nYour search will be conducted over the range: {date_range[0]} - {date_range[1]}" + final_message = [ { "role": "system", @@ -331,6 +347,9 @@ def compose_refinement_message( ) -> list: enforce_structure = prompts_dict[map_type]["enforce_structure_string"] + if instructions is None: + instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + refine_prompt = f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus}.\nBased on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text.\nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}." if date_range is not None: @@ -353,7 +372,7 @@ def generate_one_shot( instructions: Optional[str] = None, date_range: Optional[tuple[str, str]] = None, map_type: str = "risk", - ) -> dict[str, Any]: + ) -> tuple[MindMap, dict]: """ Generate a mind map in one LLM call, optionally allowing the LLM to request grounding. If allow_grounding is True, use the specified grounding_method ("tool_call" or "chat"). @@ -403,7 +422,7 @@ def generate_one_shot( theme_tree = self._parse_llm_to_themetree(mindmap_text) df = self._themetree_to_dataframe(theme_tree) - return { + return theme_tree, { "mindmap_text": mindmap_text, "mindmap_df": df, "mindmap_json": theme_tree.to_json(), ##where does this come from? @@ -418,7 +437,7 @@ def generate_one_shot( mindmap_text ) ## check if correct df = format_mindmap_to_dataframe(mindmap_text) - return { + return None, { "mindmap_text": mindmap_text, "mindmap_df": df, "mindmap_json": theme_tree.to_json(), @@ -429,7 +448,7 @@ def generate_one_shot( theme_tree = self._parse_llm_to_themetree(mindmap_text) df = self._themetree_to_dataframe(theme_tree) - return { + return theme_tree, { "mindmap_text": mindmap_text, "mindmap_tree": theme_tree, "mindmap_json": theme_tree.to_json(), @@ -451,7 +470,7 @@ def generate_refined( date_range: Optional[tuple[str, str]] = None, chunk_limit: int = 20, **llm_kwargs, - ) -> dict[str, Any]: + ) -> tuple[MindMap, dict]: """ Refine an initial mind map: LLM proposes searches, search is run, LLM refines mind map with search results. Optionally log intermediate steps to disk. @@ -507,7 +526,7 @@ def generate_refined( "search_context": context, } save_results_to_file(result_dict, output_dir, filename) - return result_dict + return theme_tree, result_dict else: mindmap_text = search_list if isinstance(search_list, str) else "" df = format_mindmap_to_dataframe(mindmap_text) @@ -519,7 +538,7 @@ def generate_refined( "search_context": "", } save_results_to_file(result_dict, output_dir, filename) - return result_dict + return None, result_dict def generate_or_load_refined( self, @@ -535,7 +554,7 @@ def generate_or_load_refined( output_dir: str = "./bootstrapped_mindmaps", filename: str = "refined_mindmap", i: int = 0, - ): + ) -> dict: if f"{filename}_{i}.json" in os.listdir(output_dir): result = load_results_from_file(output_dir, f"{filename}_{i}.json") print(f"Loaded existing result for {filename}_{i}.json") @@ -557,7 +576,7 @@ def generate_or_load_refined( # save_results_to_file(result, output_dir, ) except Exception as e: print(e) - result = self.generate_refined( + _, result = self.generate_refined( instructions=instructions, focus=focus, main_theme=main_theme, @@ -585,7 +604,7 @@ def bootstrap_refined( filename: str = "refined_mindmap", n_elements: int = 50, max_workers: int = 10, - ): + ) -> dict: """ Generate multiple refined mindmaps in parallel using ThreadPoolExecutor. @@ -650,15 +669,16 @@ def generate_dynamic( map_type: str = "risk", output_dir: str = "./dynamic_mindmaps", **llm_kwargs, - ) -> dict[str, dict[str, Any]]: + ) -> tuple[dict[str,MindMap], dict]: """ Dynamic/iterative mind map generation over time intervals. Returns a list of dicts, one per interval. Each step: generate/refine mind map for the given interval, grounded in search results for that period. """ results = {} + mind_map_objs = {} # Step 1: Generate initial mind map for t0 - one_shot = self.generate_one_shot( + one_shot_map, one_shot_dict = self.generate_one_shot( main_theme=main_theme, focus=focus, allow_grounding=False, @@ -666,14 +686,14 @@ def generate_dynamic( map_type=map_type, **llm_kwargs, ) - prev_mindmap = one_shot["mindmap_json"] - print(prev_mindmap) - results["base_mindmap"] = one_shot + prev_mindmap = one_shot_dict["mindmap_json"] + mind_map_objs["base_mindmap"] = one_shot_map + results["base_mindmap"] = one_shot_dict # Step 2: For each subsequent interval, refine using previous mind map and new search, including starting month for i, (date_range, month_name) in enumerate( zip(month_intervals, month_names), start=0 ): - refined = self.generate_refined( + refined_map, refined = self.generate_refined( main_theme=main_theme, focus=focus, initial_mindmap=prev_mindmap, @@ -689,8 +709,9 @@ def generate_dynamic( ) results[month_name] = refined + mind_map_objs[month_name] = refined_map prev_mindmap = refined["mindmap_json"] - return results + return mind_map_objs, results def _run_and_collate_search( self, @@ -758,6 +779,8 @@ def _run_and_collate_search( else: keywords = None + print(f"Searching with sentences: {search_list}") + queries: list[QueryComponent] = [ Similarity(sentence) for sentence in search_list ] @@ -793,6 +816,8 @@ def collate_results(self, results: dict) -> str: dictitem = text_query.to_dict() if dictitem["type"] == "similarity": sentence = dictitem["value"] + else: + sentence = "" docstr = f"###Query: {sentence}\n ### Results:\n" for doc in result: headline = getattr(doc, "headline", "No headline") diff --git a/src/bigdata_research_tools/mindmap/mindmap_utils.py b/src/bigdata_research_tools/mindmap/mindmap_utils.py index 4f09b07..af436aa 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_utils.py +++ b/src/bigdata_research_tools/mindmap/mindmap_utils.py @@ -8,6 +8,28 @@ "theme": { "qualifier": "Main Theme", "user_prompt_message": "Your given Theme is: {main_theme}", + "default_instructions": ("Forget all previous prompts." + "You are assisting a professional analyst tasked with creating a screener to measure the impact of the theme {main_theme} on companies." + "Your objective is to generate a comprehensive tree structure of distinct sub-themes that will guide the analyst's research process." + "Follow these steps strictly:" + "1. **Understand the Core Theme {main_theme}**:" + " - The theme {main_theme} is a central concept. All components are essential for a thorough understanding." + "2. **Create a Taxonomy of Sub-themes for {main_theme}**:" + " - Decompose the main theme {main_theme} into concise, focused, and self-contained sub-themes." + " - Each sub-theme should represent a singular, concise, informative, and clear aspect of the main theme." + " - Expand the sub-theme to be relevant for the {main_theme}: a single word is not informative enough." + " - Prioritize clarity and specificity in your sub-themes." + " - Avoid repetition and strive for diverse angles of exploration." + " - Provide a comprehensive list of potential sub-themes." + "3. **Iterate Based on the Analyst's Focus {analyst_focus}**:" + " - If no specific {analyst_focus} is provided, transition directly to formatting the JSON response." + "4. **Format Your Response as a JSON Object**:" + " - Each node in the JSON object must include:" + " - `node`: an integer representing the unique identifier for the node." + " - `label`: a string for the name of the sub-theme." + " - `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme {main_theme}." + " - For the node referring to the first node {main_theme}, just define briefly in maximum 15 words the theme {main_theme}." + " - `children`: an array of child nodes."), "enforce_structure_string": ( """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" "- `node`: an integer representing the unique identifier for the node.\n" @@ -43,6 +65,54 @@ "risk": { "qualifier": "Risk Scenario", "user_prompt_message": "Your given Risk Scenario is: {main_theme}", + "default_instructions": ("Forget all previous prompts." + "You are assisting a professional risk analyst tasked with creating a taxonomy to classify the impact of the Risk Scenario '**{main_theme}**' on companies." + "Your objective is to generate a **comprehensive tree structure** that maps the **risk spillovers** stemming from the Risk Scenario '**{main_theme}**', and generates related sub-scenarios." + "Key Instructions:" + "1. **Understand the Risk Scenario: '{main_theme}'**:" + " - The Risk Scenario '**{main_theme}**' represents a central, multifaceted concept that may be harmful or beneficial to firms." + " - Your task is to identify how the Risk Scenario impacts firms through various **risk spillovers** and transmission channels." + " - Summarize the Risk Scenario '**{main_theme}**' in a **short list of essential keywords**." + " - The keyword list should be short (1-2 keywords). Avoid unnecessary, unmentioned, indirectly inferred, or redundant keywords." + "2. **Create a Tree Structure for Risk Spillovers and Sub-Scenarios**:" + " - Decompose the Risk Scenario into **distinct, focused, and self-contained risk spillovers**." + " - Each risk spillover must represent a **specific risk channel** through which firms are exposed to as a consequence of the Risk Scenario." + " - Label each **primary node** in the tree explicitly as a 'Risk' in the `Label` field. For example:" + " - Use 'Cost Risk' instead of 'Cost Impacts'." + " - Use 'Supply Chain Risk' instead of 'Supply Chain Disruptions'." + " - Risk spillovers must:" + " - Cover a wide range of potential impacts on firms' operations, business, performance, strategy, profits, and long-term success." + " - Explore both macroeconomic and microeconomic dimensions of the Risk Scenario '**{main_theme}**' and analyze their impact on firms when relevant." + " - Microeconomic effects, such as cost of inputs, directly affect firms' operations" + " - Macroeconomic effects may affect firms revenues directly (e.g. currency fluctuations) or indirectly (e.g. economic downturns triggering lower demand)." + " - Include **direct and indirect consequences** of the main scenario." + " - Represent **dimensions of risk** that firms must monitor or mitigate." + " - NOT overlap." + " - Independently identify the most relevant spillovers based on the Risk Scenario '**{main_theme}**', without limiting to predefined categories." + "3. **Generate Sub-Scenarios for Each Risk Spillover**:" + " - For each risk spillover, identify **specific sub-scenarios** that will arise as a consequence of the Risk Scenario '**{main_theme}**'." + " - All sub-scenarios must:" + " - Be **concise and descriptive sentences**, clearly stating how the sub-scenario is an event caused by the main scenario." + " - **Explicitly include ALL core concepts and keywords** from the main scenario, including specific geographical locations or temporal details, in every sentence in order to ensure clarity and relevance towards the main scenario." + " - Integrate the Risk Scenario in a natural way, avoiding repetitive or mechanical structures." + " - Not exceed 15 words." + " - Sub-scenarios MUST be mutually exclusive: they CANNOT overlap neither within nor across branches of the tree." + " - Do NOT combine multiple sub-scenarios in a single label." + " - Sub-Scenarios have to be consistent with the parent Risk Spillover (e.g. Market Access related sub-scenarios have to belong to the Market Access Risk node)." + " - Generate 3 OR MORE sub-scenarios for each risk spillover." + " - Generate a short label for each subscenario." + "4. **Iterate Based on the Analyst's Focus: '{analyst_focus}'**:" + " - After generating the initial tree structure, use the analyst's focus ('{analyst_focus}') to:" + " - Identify **missing branches** or underexplored areas of the tree." + " - Add new risk spillovers or sub-scenarios that align with the analyst's focus." + " - Ensure that sub-scenarios ALWAYS include ALL core components of the Risk Scenario and are formulated as natural sentences." + " - Ensure that sub-scenarios DO NOT overlap within and across risk spillovers." + " - Ensure that sub-scenarios belong to the correct Risk Spillover." + " - If the analyst focus is empty, skip this step." + " - If you don't understand the analyst focus ('{analyst_focus}'), ask an open-ended question to the analyst." + "5. **Review and Expand the Tree for Missing Risks**:" + " - After incorporating the analyst's focus, review the tree structure to ensure it includes a **broad range of risks** and sub-scenarios." + " - Add any missing risks or sub-scenarios to the tree."), "enforce_structure_string": ( """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" " - `node`: an integer representing the unique identifier for the node.\n" diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index cd05b1f..f8de4fa 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -36,6 +36,8 @@ def __init__( else: self.llm_model_config = llm_model_config + logger.info(f"LLM Config {self.llm_model_config}") + self.llm_engine = LLMEngine( model=self.llm_model_config.model, **self.llm_model_config.connection_config ) @@ -43,8 +45,9 @@ def __init__( def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" if any(rm in model for rm in REASONING_MODELS): + print(model) return LLMConfig( - model=model, reasoning_effort="high", seed=42, max_completion_tokens=300 + model=model, reasoning_effort="high", seed=42, max_completion_tokens=1000, ) else: return LLMConfig( @@ -141,6 +144,8 @@ def query_llm_for_motivation(self, prompt: str) -> str: ), ) + logger.info(f"Motivation {motivation}") + return motivation.strip() def generate_company_motivations( diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 5ba8882..9ea06c4 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -13,6 +13,7 @@ MindMap, generate_risk_tree, ) +from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies @@ -44,6 +45,7 @@ def __init__( sources: list[str] | None = None, rerank_threshold: float | None = None, focus: str = "", + ground_mindmap: bool = False, ): """ This class will screen a universe's (specified in 'companies') exposure to a given theme ('main_theme'). @@ -79,6 +81,7 @@ def __init__( self.sources = sources self.rerank_threshold = rerank_threshold self.focus = focus + self.ground_mindmap = ground_mindmap if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) @@ -87,6 +90,9 @@ def __init__( elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config + logger.info(f"LLM Config {self.llm_model_config}") + logger.info(f"LLM Config {type(self.llm_model_config)}") + def create_taxonomy(self): """Create a risk taxonomy based on the main theme and focus. Returns: @@ -95,11 +101,19 @@ def create_taxonomy(self): List[str]: A list of terminal labels for the risk categories. """ - risk_tree = generate_risk_tree( - main_theme=self.main_theme, - focus=self.focus, - llm_model_config=self.llm_model_config, - ) + # risk_tree = generate_risk_tree( + # main_theme=self.main_theme, + # focus=self.focus, + # llm_model_config=self.llm_model_config, + # ) + + mindmap_generator = MindMapGenerator(llm_model_config_base=self.llm_model_config) + risk_tree, _ = mindmap_generator.generate_one_shot(main_theme = self.main_theme, + focus = self.focus, + allow_grounding = self.ground_mindmap, + instructions = None, + date_range = None, + map_type = "risk") risk_summaries = risk_tree.get_terminal_summaries() terminal_labels = risk_tree.get_terminal_labels() @@ -257,6 +271,7 @@ def generate_results( df_industry = get_scored_df( df_labeled, index_columns=["Industry"], pivot_column="Sub-Scenario" ) + logger.info(f"LLM CONFIG {self.llm_model_config}") motivation_generator = Motivation(llm_model_config=self.llm_model_config) motivation_df = motivation_generator.generate_company_motivations( df=df_labeled.rename(columns={"Sub-Scenario": "Theme"}), @@ -264,6 +279,7 @@ def generate_results( word_range=word_range, use_case=MotivationType.RISK_ANALYZER, ) + print(motivation_df) return df_company, df_industry, motivation_df diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index f33b187..e813c19 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -10,6 +10,7 @@ from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.mindmap.mindmap import generate_theme_tree +from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType from bigdata_research_tools.search.screener_search import search_by_companies @@ -39,6 +40,7 @@ def __init__( sources: list[str] | None = None, rerank_threshold: float | None = None, focus: str | None = None, + ground_mindmap: bool = None ): """ This class will screen a universe's (specified in 'companies') exposure to a given theme ('main_theme'). @@ -84,6 +86,7 @@ def __init__( self.sources = sources self.rerank_threshold = rerank_threshold self.focus = focus or "" + self.ground_mindmap = ground_mindmap if isinstance(llm_model_config, dict): self.llm_model_config = LLMConfig(**llm_model_config) elif isinstance(llm_model_config, str): @@ -135,11 +138,19 @@ def screen_companies( try: self.notify_observers("Generating thematic tree") - theme_tree = generate_theme_tree( - main_theme=self.main_theme, - focus=self.focus, - llm_model_config=self.llm_model_config, - ) + # theme_tree = generate_theme_tree( + # main_theme=self.main_theme, + # focus=self.focus, + # llm_model_config=self.llm_model_config, + # ) + + mindmap_generator = MindMapGenerator(llm_model_config_base=self.llm_model_config) + theme_tree, _ = mindmap_generator.generate_one_shot(main_theme = self.main_theme, + focus = self.focus, + allow_grounding = self.ground_mindmap, + instructions = None, + date_range = None, + map_type = "theme") theme_summaries = theme_tree.get_terminal_summaries() terminal_labels = theme_tree.get_terminal_labels() From d35ff8512b23062ac2b43998f712d99262980722 Mon Sep 17 00:00:00 2001 From: Alessandro Bouchs Date: Wed, 19 Nov 2025 16:17:41 +0000 Subject: [PATCH 77/82] removed max tokens for reasoning models --- examples/risk_analyzer.py | 2 +- src/bigdata_research_tools/llm/base.py | 2 +- src/bigdata_research_tools/portfolio/motivation.py | 7 +------ src/bigdata_research_tools/workflows/risk_analyzer.py | 5 +---- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index 575260b..ac44ff7 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -26,7 +26,7 @@ def risk_analyzer_example( analyzer = RiskAnalyzer( main_theme=risk_scenario, - companies=companies[:3], + companies=companies, start_date="2025-01-01", end_date="2025-01-31", keywords=keywords, diff --git a/src/bigdata_research_tools/llm/base.py b/src/bigdata_research_tools/llm/base.py index 92649f3..d919ae1 100644 --- a/src/bigdata_research_tools/llm/base.py +++ b/src/bigdata_research_tools/llm/base.py @@ -59,7 +59,7 @@ def validate_reasoning_config(self): self.reasoning_effort = ( self.reasoning_effort if self.reasoning_effort is not None else "high" ) - self.max_completion_tokens = 1000 + self.max_completion_tokens = None if self.temperature is not None: warnings.warn( "The selected model does not support temperature settings. " diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index f8de4fa..28c4a2c 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -36,8 +36,6 @@ def __init__( else: self.llm_model_config = llm_model_config - logger.info(f"LLM Config {self.llm_model_config}") - self.llm_engine = LLMEngine( model=self.llm_model_config.model, **self.llm_model_config.connection_config ) @@ -45,9 +43,8 @@ def __init__( def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" if any(rm in model for rm in REASONING_MODELS): - print(model) return LLMConfig( - model=model, reasoning_effort="high", seed=42, max_completion_tokens=1000, + model=model, reasoning_effort="high", seed=42, max_completion_tokens=None, ) else: return LLMConfig( @@ -144,8 +141,6 @@ def query_llm_for_motivation(self, prompt: str) -> str: ), ) - logger.info(f"Motivation {motivation}") - return motivation.strip() def generate_company_motivations( diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 9ea06c4..2af4d86 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -90,9 +90,6 @@ def __init__( elif isinstance(llm_model_config, LLMConfig): self.llm_model_config = llm_model_config - logger.info(f"LLM Config {self.llm_model_config}") - logger.info(f"LLM Config {type(self.llm_model_config)}") - def create_taxonomy(self): """Create a risk taxonomy based on the main theme and focus. Returns: @@ -271,7 +268,7 @@ def generate_results( df_industry = get_scored_df( df_labeled, index_columns=["Industry"], pivot_column="Sub-Scenario" ) - logger.info(f"LLM CONFIG {self.llm_model_config}") + motivation_generator = Motivation(llm_model_config=self.llm_model_config) motivation_df = motivation_generator.generate_company_motivations( df=df_labeled.rename(columns={"Sub-Scenario": "Theme"}), From eda63c91ba5b9a455d5f51d502d2aa408fd51cc4 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 19 Nov 2025 17:26:34 +0100 Subject: [PATCH 78/82] Fix type errors and formatting --- examples/grounded_mindmaps.py | 8 +-- examples/risk_analyzer.py | 2 +- examples/thematic_screener.py | 2 +- src/bigdata_research_tools/llm/openai.py | 4 +- .../mindmap/mindmap_generator.py | 33 +++++++----- .../mindmap/mindmap_utils.py | 52 ++++++++++--------- .../portfolio/motivation.py | 5 +- .../workflows/risk_analyzer.py | 19 ++++--- .../workflows/thematic_screener.py | 21 ++++---- 9 files changed, 82 insertions(+), 64 deletions(-) diff --git a/examples/grounded_mindmaps.py b/examples/grounded_mindmaps.py index a600f39..91c8e4e 100644 --- a/examples/grounded_mindmaps.py +++ b/examples/grounded_mindmaps.py @@ -29,7 +29,7 @@ def test_one_shot_mindmap( mindmap_generator = MindMapGenerator( llm_model_config_base=llm_base_config, ) - mindmap = mindmap_generator.generate_one_shot( + _, mindmap = mindmap_generator.generate_one_shot( instructions=instructions, focus=focus, main_theme=main_theme, @@ -55,7 +55,7 @@ def test_refined_mindmap( mindmap_generator = MindMapGenerator( llm_model_config_base=llm_base_config, ) - mindmap = mindmap_generator.generate_refined( + _, mindmap = mindmap_generator.generate_refined( focus=focus, main_theme=main_theme, initial_mindmap=base_mindmap, @@ -86,7 +86,7 @@ def test_refined_mindmap2( llm_model_config_base=llm_base_config, llm_model_config_reasoning=llm_reasoning_config, ) - mindmap = mindmap_generator.generate_refined( + _, mindmap = mindmap_generator.generate_refined( focus=focus, main_theme=main_theme, initial_mindmap=base_mindmap, @@ -115,7 +115,7 @@ def test_dynamic_mindmap( llm_model_config_base=llm_base_config, llm_model_config_reasoning=llm_reasoning_config, ) - mindmap = mindmap_generator.generate_dynamic( + _, mindmap = mindmap_generator.generate_dynamic( instructions=instructions, focus=focus, main_theme=main_theme, diff --git a/examples/risk_analyzer.py b/examples/risk_analyzer.py index ac44ff7..c14e2f6 100644 --- a/examples/risk_analyzer.py +++ b/examples/risk_analyzer.py @@ -34,7 +34,7 @@ def risk_analyzer_example( control_entities=control_entities, focus=focus, # Optional focus to narrow the theme, llm_model_config=llm_model_config, - ground_mindmap=False + ground_mindmap=False, ) class PrintObserver(Observer): diff --git a/examples/thematic_screener.py b/examples/thematic_screener.py index 4ed41e7..67d34f6 100644 --- a/examples/thematic_screener.py +++ b/examples/thematic_screener.py @@ -30,7 +30,7 @@ def thematic_screener_example( end_date="2024-02-28", document_type=DocumentType.TRANSCRIPTS, fiscal_year=2024, - ground_mindmap=True + ground_mindmap=True, ) class PrintObserver(Observer): diff --git a/src/bigdata_research_tools/llm/openai.py b/src/bigdata_research_tools/llm/openai.py index e893921..54b7116 100644 --- a/src/bigdata_research_tools/llm/openai.py +++ b/src/bigdata_research_tools/llm/openai.py @@ -16,8 +16,7 @@ LLMProvider, NotInitializedLLMProviderError, ) -from logging import Logger, getLogger -logger: Logger = getLogger(__name__) + class AsyncOpenAIProvider(AsyncLLMProvider): def __init__(self, model: str, **connection_config): @@ -162,7 +161,6 @@ def get_response(self, chat_history: list[dict[str, str]], **kwargs) -> str: chat_completion = self._client.chat.completions.create( messages=chat_history, model=self.model, **kwargs ) - logger.info(f"Chat Completion {chat_completion}") return chat_completion.choices[0].message.content diff --git a/src/bigdata_research_tools/mindmap/mindmap_generator.py b/src/bigdata_research_tools/mindmap/mindmap_generator.py index 0f642cf..8490290 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_generator.py +++ b/src/bigdata_research_tools/mindmap/mindmap_generator.py @@ -4,7 +4,7 @@ import re from concurrent.futures import ThreadPoolExecutor, as_completed from logging import Logger, getLogger -from typing import Any, Optional +from typing import Optional from bigdata_client.daterange import AbsoluteDateRange, RollingDateRange from bigdata_client.models.advanced_search_query import QueryComponent @@ -205,7 +205,9 @@ def compose_base_message( ) -> list: # Explicit, step-by-step prompt (robust, as in working repo, minus Keywords) if instructions is None: - instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + instructions = prompts_dict[map_type]["default_instructions"].format( + main_theme=main_theme, analyst_focus=focus + ) enforce_structure = prompts_dict[map_type]["enforce_structure_string"] messages = [ @@ -235,7 +237,9 @@ def compose_tool_call_message( enforce_structure = prompts_dict[map_type]["enforce_structure_string"] if instructions is None: - instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + instructions = prompts_dict[map_type]["default_instructions"].format( + main_theme=main_theme, analyst_focus=focus + ) tool_prompt = f"{instructions} {focus} You can use news search to find relevant information about the topic. \nUse the Bigdata API to search for news articles related to the topic and use them to inform your response." @@ -309,7 +313,9 @@ def compose_final_message( enforce_structure = prompts_dict[map_type]["enforce_structure_string"] if instructions is None: - instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + instructions = prompts_dict[map_type]["default_instructions"].format( + main_theme=main_theme, analyst_focus=focus + ) final_prompt = f"{instructions} {focus}. \nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}" @@ -348,7 +354,9 @@ def compose_refinement_message( enforce_structure = prompts_dict[map_type]["enforce_structure_string"] if instructions is None: - instructions = prompts_dict[map_type]["default_instructions"].format(main_theme=main_theme, analyst_focus=focus) + instructions = prompts_dict[map_type]["default_instructions"].format( + main_theme=main_theme, analyst_focus=focus + ) refine_prompt = f"{instructions} {prompts_dict[map_type]['qualifier']}: {main_theme} {focus}.\nBased on these instructions, enhance the given mindmap with the information below. Only return the mindmap without extra text.\nIMPORTANT: Only create additional branches if the tool call results contain explicit information suggesting that new branches would be relevant.\n{enforce_structure}." @@ -437,7 +445,7 @@ def generate_one_shot( mindmap_text ) ## check if correct df = format_mindmap_to_dataframe(mindmap_text) - return None, { + return MindMap("", 0), { "mindmap_text": mindmap_text, "mindmap_df": df, "mindmap_json": theme_tree.to_json(), @@ -470,7 +478,7 @@ def generate_refined( date_range: Optional[tuple[str, str]] = None, chunk_limit: int = 20, **llm_kwargs, - ) -> tuple[MindMap, dict]: + ) -> tuple[MindMap | None, dict]: """ Refine an initial mind map: LLM proposes searches, search is run, LLM refines mind map with search results. Optionally log intermediate steps to disk. @@ -560,7 +568,7 @@ def generate_or_load_refined( print(f"Loaded existing result for {filename}_{i}.json") else: try: - result = self.generate_refined( + _, result = self.generate_refined( instructions=instructions, focus=focus, main_theme=main_theme, @@ -574,8 +582,7 @@ def generate_or_load_refined( filename=f"{filename}_{i}.json", ) # save_results_to_file(result, output_dir, ) - except Exception as e: - print(e) + except Exception: _, result = self.generate_refined( instructions=instructions, focus=focus, @@ -586,7 +593,7 @@ def generate_or_load_refined( output_dir=output_dir, filename=f"{filename}_{i}.json", ) - # save_results_to_file(result, output_dir, f"{filename}_{i}.json") + return result def bootstrap_refined( @@ -604,7 +611,7 @@ def bootstrap_refined( filename: str = "refined_mindmap", n_elements: int = 50, max_workers: int = 10, - ) -> dict: + ) -> list[dict]: """ Generate multiple refined mindmaps in parallel using ThreadPoolExecutor. @@ -669,7 +676,7 @@ def generate_dynamic( map_type: str = "risk", output_dir: str = "./dynamic_mindmaps", **llm_kwargs, - ) -> tuple[dict[str,MindMap], dict]: + ) -> tuple[dict[str, MindMap], dict]: """ Dynamic/iterative mind map generation over time intervals. Returns a list of dicts, one per interval. diff --git a/src/bigdata_research_tools/mindmap/mindmap_utils.py b/src/bigdata_research_tools/mindmap/mindmap_utils.py index af436aa..0f8e25b 100644 --- a/src/bigdata_research_tools/mindmap/mindmap_utils.py +++ b/src/bigdata_research_tools/mindmap/mindmap_utils.py @@ -8,28 +8,30 @@ "theme": { "qualifier": "Main Theme", "user_prompt_message": "Your given Theme is: {main_theme}", - "default_instructions": ("Forget all previous prompts." - "You are assisting a professional analyst tasked with creating a screener to measure the impact of the theme {main_theme} on companies." - "Your objective is to generate a comprehensive tree structure of distinct sub-themes that will guide the analyst's research process." - "Follow these steps strictly:" - "1. **Understand the Core Theme {main_theme}**:" - " - The theme {main_theme} is a central concept. All components are essential for a thorough understanding." - "2. **Create a Taxonomy of Sub-themes for {main_theme}**:" - " - Decompose the main theme {main_theme} into concise, focused, and self-contained sub-themes." - " - Each sub-theme should represent a singular, concise, informative, and clear aspect of the main theme." - " - Expand the sub-theme to be relevant for the {main_theme}: a single word is not informative enough." - " - Prioritize clarity and specificity in your sub-themes." - " - Avoid repetition and strive for diverse angles of exploration." - " - Provide a comprehensive list of potential sub-themes." - "3. **Iterate Based on the Analyst's Focus {analyst_focus}**:" - " - If no specific {analyst_focus} is provided, transition directly to formatting the JSON response." - "4. **Format Your Response as a JSON Object**:" - " - Each node in the JSON object must include:" - " - `node`: an integer representing the unique identifier for the node." - " - `label`: a string for the name of the sub-theme." - " - `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme {main_theme}." - " - For the node referring to the first node {main_theme}, just define briefly in maximum 15 words the theme {main_theme}." - " - `children`: an array of child nodes."), + "default_instructions": ( + "Forget all previous prompts." + "You are assisting a professional analyst tasked with creating a screener to measure the impact of the theme {main_theme} on companies." + "Your objective is to generate a comprehensive tree structure of distinct sub-themes that will guide the analyst's research process." + "Follow these steps strictly:" + "1. **Understand the Core Theme {main_theme}**:" + " - The theme {main_theme} is a central concept. All components are essential for a thorough understanding." + "2. **Create a Taxonomy of Sub-themes for {main_theme}**:" + " - Decompose the main theme {main_theme} into concise, focused, and self-contained sub-themes." + " - Each sub-theme should represent a singular, concise, informative, and clear aspect of the main theme." + " - Expand the sub-theme to be relevant for the {main_theme}: a single word is not informative enough." + " - Prioritize clarity and specificity in your sub-themes." + " - Avoid repetition and strive for diverse angles of exploration." + " - Provide a comprehensive list of potential sub-themes." + "3. **Iterate Based on the Analyst's Focus {analyst_focus}**:" + " - If no specific {analyst_focus} is provided, transition directly to formatting the JSON response." + "4. **Format Your Response as a JSON Object**:" + " - Each node in the JSON object must include:" + " - `node`: an integer representing the unique identifier for the node." + " - `label`: a string for the name of the sub-theme." + " - `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme {main_theme}." + " - For the node referring to the first node {main_theme}, just define briefly in maximum 15 words the theme {main_theme}." + " - `children`: an array of child nodes." + ), "enforce_structure_string": ( """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" "- `node`: an integer representing the unique identifier for the node.\n" @@ -65,7 +67,8 @@ "risk": { "qualifier": "Risk Scenario", "user_prompt_message": "Your given Risk Scenario is: {main_theme}", - "default_instructions": ("Forget all previous prompts." + "default_instructions": ( + "Forget all previous prompts." "You are assisting a professional risk analyst tasked with creating a taxonomy to classify the impact of the Risk Scenario '**{main_theme}**' on companies." "Your objective is to generate a **comprehensive tree structure** that maps the **risk spillovers** stemming from the Risk Scenario '**{main_theme}**', and generates related sub-scenarios." "Key Instructions:" @@ -112,7 +115,8 @@ " - If you don't understand the analyst focus ('{analyst_focus}'), ask an open-ended question to the analyst." "5. **Review and Expand the Tree for Missing Risks**:" " - After incorporating the analyst's focus, review the tree structure to ensure it includes a **broad range of risks** and sub-scenarios." - " - Add any missing risks or sub-scenarios to the tree."), + " - Add any missing risks or sub-scenarios to the tree." + ), "enforce_structure_string": ( """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n" " - `node`: an integer representing the unique identifier for the node.\n" diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py index 28c4a2c..4f9555e 100644 --- a/src/bigdata_research_tools/portfolio/motivation.py +++ b/src/bigdata_research_tools/portfolio/motivation.py @@ -44,7 +44,10 @@ def _get_default_model_config(self, model: str) -> LLMConfig: """Get default LLM model configuration.""" if any(rm in model for rm in REASONING_MODELS): return LLMConfig( - model=model, reasoning_effort="high", seed=42, max_completion_tokens=None, + model=model, + reasoning_effort="high", + seed=42, + max_completion_tokens=None, ) else: return LLMConfig( diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py index 2af4d86..592af0e 100644 --- a/src/bigdata_research_tools/workflows/risk_analyzer.py +++ b/src/bigdata_research_tools/workflows/risk_analyzer.py @@ -11,7 +11,6 @@ from bigdata_research_tools.llm.base import LLMConfig from bigdata_research_tools.mindmap.mindmap import ( MindMap, - generate_risk_tree, ) from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator from bigdata_research_tools.portfolio.motivation import Motivation @@ -104,13 +103,17 @@ def create_taxonomy(self): # llm_model_config=self.llm_model_config, # ) - mindmap_generator = MindMapGenerator(llm_model_config_base=self.llm_model_config) - risk_tree, _ = mindmap_generator.generate_one_shot(main_theme = self.main_theme, - focus = self.focus, - allow_grounding = self.ground_mindmap, - instructions = None, - date_range = None, - map_type = "risk") + mindmap_generator = MindMapGenerator( + llm_model_config_base=self.llm_model_config + ) + risk_tree, _ = mindmap_generator.generate_one_shot( + main_theme=self.main_theme, + focus=self.focus, + allow_grounding=self.ground_mindmap, + instructions=None, + date_range=None, + map_type="risk", + ) risk_summaries = risk_tree.get_terminal_summaries() terminal_labels = risk_tree.get_terminal_labels() diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index e813c19..5eeed88 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -9,7 +9,6 @@ from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel from bigdata_research_tools.labeler.screener_labeler import ScreenerLabeler from bigdata_research_tools.llm.base import LLMConfig -from bigdata_research_tools.mindmap.mindmap import generate_theme_tree from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator from bigdata_research_tools.portfolio.motivation import Motivation from bigdata_research_tools.prompts.motivation import MotivationType @@ -40,7 +39,7 @@ def __init__( sources: list[str] | None = None, rerank_threshold: float | None = None, focus: str | None = None, - ground_mindmap: bool = None + ground_mindmap: bool | None = None, ): """ This class will screen a universe's (specified in 'companies') exposure to a given theme ('main_theme'). @@ -144,13 +143,17 @@ def screen_companies( # llm_model_config=self.llm_model_config, # ) - mindmap_generator = MindMapGenerator(llm_model_config_base=self.llm_model_config) - theme_tree, _ = mindmap_generator.generate_one_shot(main_theme = self.main_theme, - focus = self.focus, - allow_grounding = self.ground_mindmap, - instructions = None, - date_range = None, - map_type = "theme") + mindmap_generator = MindMapGenerator( + llm_model_config_base=self.llm_model_config + ) + theme_tree, _ = mindmap_generator.generate_one_shot( + main_theme=self.main_theme, + focus=self.focus, + allow_grounding=self.ground_mindmap if self.ground_mindmap else False, + instructions=None, + date_range=None, + map_type="theme", + ) theme_summaries = theme_tree.get_terminal_summaries() terminal_labels = theme_tree.get_terminal_labels() From 9743bf37a8e53acbb9be21117ee8cb9008a53ee4 Mon Sep 17 00:00:00 2001 From: jaldana Date: Wed, 19 Nov 2025 17:30:57 +0100 Subject: [PATCH 79/82] Simplify default values --- src/bigdata_research_tools/workflows/thematic_screener.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bigdata_research_tools/workflows/thematic_screener.py b/src/bigdata_research_tools/workflows/thematic_screener.py index 5eeed88..4573f8f 100644 --- a/src/bigdata_research_tools/workflows/thematic_screener.py +++ b/src/bigdata_research_tools/workflows/thematic_screener.py @@ -39,7 +39,7 @@ def __init__( sources: list[str] | None = None, rerank_threshold: float | None = None, focus: str | None = None, - ground_mindmap: bool | None = None, + ground_mindmap: bool = False, ): """ This class will screen a universe's (specified in 'companies') exposure to a given theme ('main_theme'). @@ -149,7 +149,7 @@ def screen_companies( theme_tree, _ = mindmap_generator.generate_one_shot( main_theme=self.main_theme, focus=self.focus, - allow_grounding=self.ground_mindmap if self.ground_mindmap else False, + allow_grounding=self.ground_mindmap, instructions=None, date_range=None, map_type="theme", From 4517458e983e9b123b8d9fc52ea63bd8f0db0067 Mon Sep 17 00:00:00 2001 From: jaldana Date: Thu, 20 Nov 2025 10:22:37 +0100 Subject: [PATCH 80/82] Fix issue with rare, but existing deleted entities which cant be retrieved any more --- src/bigdata_research_tools/search/search_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bigdata_research_tools/search/search_utils.py b/src/bigdata_research_tools/search/search_utils.py index cb0f936..95aabbb 100644 --- a/src/bigdata_research_tools/search/search_utils.py +++ b/src/bigdata_research_tools/search/search_utils.py @@ -68,7 +68,7 @@ def depth_first_search(batch: list[str]) -> None: try: batch_lookup = bigdata.knowledge_graph.get_entities(batch) - entities.extend([BigdataEntity.from_sdk(ent) for ent in batch_lookup]) + entities.extend([BigdataEntity.from_sdk(ent) for ent in batch_lookup if ent is not None]) except ValidationError as e: non_entities_found = findall(non_entity_key_pattern, str(e)) non_entities.extend(non_entities_found) From 510b2786c1719bd36e15377f416e0f9ddf91332e Mon Sep 17 00:00:00 2001 From: jaldana Date: Fri, 21 Nov 2025 09:21:40 +0100 Subject: [PATCH 81/82] Add common logic to many cookbooks back --- src/bigdata_research_tools/labeler/labeler.py | 15 +++++++++++++++ src/bigdata_research_tools/search/search_utils.py | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/bigdata_research_tools/labeler/labeler.py b/src/bigdata_research_tools/labeler/labeler.py index adbb382..f0b9b38 100644 --- a/src/bigdata_research_tools/labeler/labeler.py +++ b/src/bigdata_research_tools/labeler/labeler.py @@ -120,6 +120,21 @@ def _deserialize_label_response(self, response: str) -> str: } return json.dumps(response_mapping) + def deserialize_label_responses_as_df(self, responses: list[str]) -> DataFrame: + responses_dict = [ + json.loads( + self._deserialize_label_response(self.parse_labeling_response(response)) + ) + for response in responses + ] + # merge a list of dicts into a single dict + merged_responses = {k: v for d in responses_dict for k, v in d.items()} + # Deserialize the responses + df_labeled = DataFrame.from_dict(merged_responses, orient="index") + df_labeled.index = df_labeled.index.astype(int) + + return df_labeled + def _run_labeling_prompts( self, prompts: list[str], diff --git a/src/bigdata_research_tools/search/search_utils.py b/src/bigdata_research_tools/search/search_utils.py index 95aabbb..a538aa7 100644 --- a/src/bigdata_research_tools/search/search_utils.py +++ b/src/bigdata_research_tools/search/search_utils.py @@ -68,7 +68,9 @@ def depth_first_search(batch: list[str]) -> None: try: batch_lookup = bigdata.knowledge_graph.get_entities(batch) - entities.extend([BigdataEntity.from_sdk(ent) for ent in batch_lookup if ent is not None]) + entities.extend( + [BigdataEntity.from_sdk(ent) for ent in batch_lookup if ent is not None] + ) except ValidationError as e: non_entities_found = findall(non_entity_key_pattern, str(e)) non_entities.extend(non_entities_found) From 3a6fe41dc6f3a5dbac390b7785f2547672c7a26d Mon Sep 17 00:00:00 2001 From: jaldana Date: Fri, 21 Nov 2025 14:46:09 +0100 Subject: [PATCH 82/82] Bump version --- CHANGELOG.md | 2 +- pyproject.toml | 2 +- uv.lock | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 441b067..6a024b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.99.0] - Unreleased +## [1.0.0] - 2025-11-21 Preparation for a first stable release. ## Added diff --git a/pyproject.toml b/pyproject.toml index 3db1bf8..8ec8813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bigdata-research-tools" -version = "1.0.0-beta-1" +version = "1.0.0" description = "Bigdata.com API High-Efficiency Tools at Scale" readme = "README.md" authors = [{ name = "Bigdata.com", email = "support@ravenpack.com" }] diff --git a/uv.lock b/uv.lock index ba80bc4..2f97d20 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10, <4.0" resolution-markers = [ "python_full_version >= '3.12'", @@ -216,7 +216,7 @@ wheels = [ [[package]] name = "bigdata-research-tools" -version = "1.0.0b1" +version = "1.0.0" source = { editable = "." } dependencies = [ { name = "bigdata-client" },