From 12f33878c8f96f58e0802685c3d224b90b65841f Mon Sep 17 00:00:00 2001 From: whuwkl Date: Sat, 26 Jul 2025 17:02:00 +0800 Subject: [PATCH 1/2] fix: fix the accidental creation of folders during unit testing --- examples/config/config_example.py | 8 +++---- examples/config/default_config.yaml | 5 +---- examples/config/sample_config.yaml | 2 -- examples/config_example.py | 5 +---- quantmind/config/settings.py | 34 +++++------------------------ tests/config/test_settings.py | 19 ++++++++-------- 6 files changed, 19 insertions(+), 54 deletions(-) diff --git a/examples/config/config_example.py b/examples/config/config_example.py index 2bf3877..e07625f 100644 --- a/examples/config/config_example.py +++ b/examples/config/config_example.py @@ -23,7 +23,7 @@ def main(): default_setting = create_default_config() print(f" Source type: {type(default_setting.source).__name__}") print(f" Parser type: {type(default_setting.parser).__name__}") - print(f" Data directory: {default_setting.data_dir}") + print(f" Storage directory: {default_setting.storage.storage_dir}") # Example 2: Save default configuration to YAML print("\n2. Saving default configuration to YAML:") @@ -64,9 +64,8 @@ def main(): print(" - ARXIV_MAX_RESULTS=50") print(" - OPENAI_MODEL=gpt-3.5-turbo") print(" - LOG_LEVEL=DEBUG") - print(" - DATA_DIR=./custom_data") - env_vars = ["ARXIV_MAX_RESULTS", "OPENAI_MODEL", "LOG_LEVEL", "DATA_DIR"] + env_vars = ["ARXIV_MAX_RESULTS", "OPENAI_MODEL", "LOG_LEVEL"] for var in env_vars: value = os.getenv(var) status = "✅ Set" if value else "❌ Not set" @@ -74,7 +73,7 @@ def main(): # Example 5: Direct configuration creation print("\n5. Creating configuration programmatically:") - from quantmind.config import ArxivSourceConfig, PDFParserConfig, LLMConfig + from quantmind.config import ArxivSourceConfig, LLMConfig, PDFParserConfig custom_setting = Setting( source=ArxivSourceConfig( @@ -83,7 +82,6 @@ def main(): parser=PDFParserConfig(method="pymupdf", extract_tables=True), llm=LLMConfig(model="gpt-4o", temperature=0.3), log_level="DEBUG", - data_dir="./custom_data", ) print(f" ✅ Created custom configuration") diff --git a/examples/config/default_config.yaml b/examples/config/default_config.yaml index 9f5f8d1..6e13bcb 100644 --- a/examples/config/default_config.yaml +++ b/examples/config/default_config.yaml @@ -1,4 +1,3 @@ -data_dir: ./data llm: api_version: null base_url: null @@ -37,7 +36,5 @@ source: type: arxiv storage: config: - base_dir: /Users/wenkeli/work/LLMQuant/quant-mind/data - storage_dir: ./data + storage_dir: /Users/wenkeli/work/LLMQuant/quant-mind/data type: local -temp_dir: /tmp diff --git a/examples/config/sample_config.yaml b/examples/config/sample_config.yaml index f08275a..2038e36 100644 --- a/examples/config/sample_config.yaml +++ b/examples/config/sample_config.yaml @@ -59,5 +59,3 @@ llm: # Global settings log_level: ${LOG_LEVEL:INFO} -data_dir: ${DATA_DIR:./data} -temp_dir: ${TEMP_DIR:/tmp} diff --git a/examples/config_example.py b/examples/config_example.py index 57bd4d2..34ea86b 100644 --- a/examples/config_example.py +++ b/examples/config_example.py @@ -146,8 +146,6 @@ def create_sample_config(): # Set global settings settings.log_level = "INFO" - settings.data_dir = "./data" - settings.temp_dir = "./tmp" settings.arxiv_max_results = 500 return settings @@ -237,8 +235,7 @@ def show_configuration_details(settings): print(f"\nGlobal Settings:") print(f" Log level: {settings.log_level}") - print(f" Data directory: {settings.data_dir}") - print(f" Temp directory: {settings.temp_dir}") + print(f" Storage directory: {settings.storage.storage_dir}") print(f" ArXiv max results: {settings.arxiv_max_results}") print(f"\nSources ({len(settings.sources)}):") diff --git a/quantmind/config/settings.py b/quantmind/config/settings.py index 310e70e..6defbcb 100644 --- a/quantmind/config/settings.py +++ b/quantmind/config/settings.py @@ -9,7 +9,7 @@ from typing import Any, Dict, Optional, Union import yaml -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field from quantmind.config.flows import ( AnalyzerFlowConfig, @@ -56,8 +56,6 @@ class Setting(BaseModel): log_level: str = Field( default="INFO", pattern=r"^(DEBUG|INFO|WARNING|ERROR|CRITICAL)$" ) - data_dir: str = "./data" - temp_dir: str = "/tmp" class Config: """Pydantic model configuration.""" @@ -65,21 +63,6 @@ class Config: validate_assignment = True extra = "forbid" - @field_validator("data_dir", "temp_dir") - @classmethod - def validate_directories(cls, v: str) -> str: - """Validate and create directories if they don't exist.""" - path = Path(v).expanduser() - # Keep the original style for relative paths. - if not path.is_absolute(): - resolved_path = path.resolve() - resolved_path.mkdir(parents=True, exist_ok=True) - return v - else: - path = path.resolve() - path.mkdir(parents=True, exist_ok=True) - return str(path) - @classmethod def load_dotenv(cls, dotenv_path: Optional[str] = None) -> bool: """Load environment variables from .env file. @@ -246,9 +229,8 @@ def _parse_config(cls, config_dict: Dict[str, Any]) -> "Setting": parsed["llm"] = LLMConfig(**config_dict["llm"]) # Copy simple fields - for key in ["log_level", "data_dir", "temp_dir"]: - if key in config_dict: - parsed[key] = config_dict[key] + if "log_level" in config_dict: + parsed["log_level"] = config_dict["log_level"] return cls(**parsed) @@ -266,7 +248,7 @@ def create_default(cls) -> "Setting": download_pdfs=True, extract_tables=True, ), - storage=LocalStorageConfig(base_dir=Path("./data")), + storage=LocalStorageConfig(), ) def save_to_yaml(self, config_path: Union[str, Path]) -> None: @@ -357,13 +339,7 @@ def serialize_component(component, component_type_map): config_dict["llm"] = self.llm.model_dump(exclude={"api_key"}) # Export simple fields - config_dict.update( - { - "log_level": self.log_level, - "data_dir": self.data_dir, - "temp_dir": self.temp_dir, - } - ) + config_dict["log_level"] = self.log_level return config_dict diff --git a/tests/config/test_settings.py b/tests/config/test_settings.py index 6fcef48..0c5bdc4 100644 --- a/tests/config/test_settings.py +++ b/tests/config/test_settings.py @@ -1,6 +1,7 @@ """Unit tests for settings configuration system.""" import os +import shutil import tempfile import unittest from unittest.mock import patch @@ -25,8 +26,6 @@ def test_default_setting(self): setting = Setting() self.assertEqual(setting.log_level, "INFO") - self.assertEqual(setting.data_dir, "./data") - self.assertEqual(setting.temp_dir, "/tmp") self.assertIsNone(setting.source) self.assertIsNone(setting.parser) self.assertIsNone(setting.tagger) @@ -73,7 +72,6 @@ def test_parse_config_with_components(self): "config": {"max_tags": 8, "model": "gpt-4o"}, }, "log_level": "DEBUG", - "data_dir": "./test_data", } setting = Setting._parse_config(config_dict) @@ -96,7 +94,9 @@ def test_parse_config_with_components(self): # Test simple fields self.assertEqual(setting.log_level, "DEBUG") - self.assertEqual(setting.data_dir, "./test_data") + + if setting.storage.storage_dir.exists(): + shutil.rmtree(setting.storage.storage_dir) def test_parse_config_unknown_types(self): """Test parsing configuration with unknown component types.""" @@ -200,8 +200,6 @@ def test_substitute_env_vars(self): "max_results": "${MAX_RESULTS:50}", # with default }, }, - "data_dir": "${DATA_DIR:./default_data}", - "temp_dir": "${TEST_VAR}/temp", } result = Setting.substitute_env_vars(config_dict) @@ -211,8 +209,6 @@ def test_substitute_env_vars(self): self.assertEqual( result["source"]["config"]["max_results"], "50" ) # default used - self.assertEqual(result["data_dir"], "./default_data") # default used - self.assertEqual(result["temp_dir"], "test_value/temp") # Clean up del os.environ["TEST_VAR"] @@ -271,7 +267,6 @@ def test_export_config(self): parser=PDFParserConfig(method="pdfplumber", download_pdfs=True), tagger=LLMTaggerConfig(max_tags=5), log_level="DEBUG", - data_dir="./test_data", ) config_dict = setting._export_config() @@ -291,11 +286,15 @@ def test_export_config(self): # Test simple fields self.assertEqual(config_dict["log_level"], "DEBUG") - self.assertEqual(config_dict["data_dir"], "./test_data") # Test sensitive data exclusion self.assertNotIn("api_key", config_dict["llm"]) + assert setting.storage.storage_dir.exists() + if setting.storage.storage_dir.exists(): + shutil.rmtree(setting.storage.storage_dir) + assert not setting.storage.storage_dir.exists() + def test_save_to_yaml(self): """Test saving configuration to YAML file.""" setting = Setting( From 6366db411f1b8816a4c5132c7a2597ec63c55f90 Mon Sep 17 00:00:00 2001 From: whuwkl Date: Sun, 27 Jul 2025 13:54:56 +0800 Subject: [PATCH 2/2] chore(config): remove default configuration file --- examples/config/default_config.yaml | 40 ----------------------------- 1 file changed, 40 deletions(-) delete mode 100644 examples/config/default_config.yaml diff --git a/examples/config/default_config.yaml b/examples/config/default_config.yaml deleted file mode 100644 index 6e13bcb..0000000 --- a/examples/config/default_config.yaml +++ /dev/null @@ -1,40 +0,0 @@ -llm: - api_version: null - base_url: null - custom_instructions: null - extra_params: {} - max_tokens: 4000 - model: gpt-4o - retry_attempts: 3 - retry_delay: 1.0 - system_prompt: null - temperature: 0.0 - timeout: 60 - top_p: 1.0 -log_level: INFO -parser: - config: - download_pdfs: true - enable_caching: true - extract_images: false - extract_tables: true - max_file_size_mb: 50 - method: pymupdf - retry_attempts: 3 - timeout: 120 - type: pdf -source: - config: - download_pdfs: false - max_results: 100 - min_abstract_length: 50 - requests_per_second: 1.0 - retry_attempts: 3 - sort_by: submittedDate - sort_order: descending - timeout: 30 - type: arxiv -storage: - config: - storage_dir: /Users/wenkeli/work/LLMQuant/quant-mind/data - type: local