diff --git a/src/basic_open_agent_tools/__init__.py b/src/basic_open_agent_tools/__init__.py index 9b1f541..ba6bc29 100644 --- a/src/basic_open_agent_tools/__init__.py +++ b/src/basic_open_agent_tools/__init__.py @@ -1,7 +1,6 @@ """Basic Open Agent Tools. An open foundational toolkit providing essential components for building AI agents -from typing import List with minimal dependencies for local (non-HTTP/API) actions. """ @@ -10,21 +9,29 @@ __version__ = "0.1.0" # Modular structure +from . import exceptions, file_system, text, types + +# Helper functions for tool management +from .helpers import ( + get_tool_info, + list_all_available_tools, + load_all_filesystem_tools, + load_all_text_tools, + merge_tool_lists, +) + # Future modules (placeholder imports for when modules are implemented) -# from . import text # from . import system # from . import network # from . import data # from . import crypto # from . import utilities -# Common infrastructure -from . import exceptions, file_system, types -__all__ = [ - # Modular structure +__all__: List[str] = [ + # Implemented modules "file_system", + "text", # Future modules (uncomment when implemented) - # "text", # "system", # "network", # "data", @@ -33,4 +40,10 @@ # Common infrastructure "exceptions", "types", + # Helper functions + "load_all_filesystem_tools", + "load_all_text_tools", + "merge_tool_lists", + "get_tool_info", + "list_all_available_tools", ] diff --git a/src/basic_open_agent_tools/helpers.py b/src/basic_open_agent_tools/helpers.py new file mode 100644 index 0000000..1a4f3e2 --- /dev/null +++ b/src/basic_open_agent_tools/helpers.py @@ -0,0 +1,136 @@ +"""Helper functions for loading and managing tool collections.""" + +import inspect +from typing import Any, Callable, List, Union + +from . import file_system, text + + +def load_all_filesystem_tools() -> List[Callable[..., Any]]: + """Load all file system tools as a list of callable functions. + + Returns: + List of all file system tool functions + + Example: + >>> fs_tools = load_all_filesystem_tools() + >>> len(fs_tools) > 0 + True + """ + tools = [] + + # Get all functions from file_system module + for name in file_system.__all__: + func = getattr(file_system, name) + if callable(func): + tools.append(func) + + return tools + + +def load_all_text_tools() -> List[Callable[..., Any]]: + """Load all text processing tools as a list of callable functions. + + Returns: + List of all text processing tool functions + + Example: + >>> text_tools = load_all_text_tools() + >>> len(text_tools) > 0 + True + """ + tools = [] + + # Get all functions from text module + for name in text.__all__: + func = getattr(text, name) + if callable(func): + tools.append(func) + + return tools + + +def merge_tool_lists(*args: Union[List[Callable[..., Any]], Callable[..., Any]]) -> List[Callable[..., Any]]: + """Merge multiple tool lists and individual functions into a single list. + + Args: + *args: Tool lists (List[Callable]) and/or individual functions (Callable) + + Returns: + Combined list of all tools + + Raises: + TypeError: If any argument is not a list of callables or a callable + + Example: + >>> def custom_tool(x): return x + >>> fs_tools = load_all_filesystem_tools() + >>> text_tools = load_all_text_tools() + >>> all_tools = merge_tool_lists(fs_tools, text_tools, custom_tool) + >>> custom_tool in all_tools + True + """ + merged = [] + + for arg in args: + if callable(arg): + # Single function + merged.append(arg) + elif isinstance(arg, list): + # List of functions + for item in arg: + if not callable(item): + raise TypeError(f"All items in tool lists must be callable, got {type(item)}") + merged.append(item) + else: + raise TypeError(f"Arguments must be callable or list of callables, got {type(arg)}") + + return merged + + +def get_tool_info(tool: Callable[..., Any]) -> dict: + """Get information about a tool function. + + Args: + tool: The tool function to inspect + + Returns: + Dictionary containing tool information (name, docstring, signature) + + Example: + >>> from basic_open_agent_tools.text import clean_whitespace + >>> info = get_tool_info(clean_whitespace) + >>> info['name'] + 'clean_whitespace' + """ + if not callable(tool): + raise TypeError("Tool must be callable") + + sig = inspect.signature(tool) + + return { + 'name': tool.__name__, + 'docstring': tool.__doc__ or '', + 'signature': str(sig), + 'module': getattr(tool, '__module__', 'unknown'), + 'parameters': list(sig.parameters.keys()) + } + + +def list_all_available_tools() -> dict: + """List all available tools organized by category. + + Returns: + Dictionary with tool categories as keys and lists of tool info as values + + Example: + >>> tools = list_all_available_tools() + >>> 'file_system' in tools + True + >>> 'text' in tools + True + """ + return { + 'file_system': [get_tool_info(tool) for tool in load_all_filesystem_tools()], + 'text': [get_tool_info(tool) for tool in load_all_text_tools()] + } diff --git a/src/basic_open_agent_tools/text/__init__.py b/src/basic_open_agent_tools/text/__init__.py index 9e8491e..3dd7bc5 100644 --- a/src/basic_open_agent_tools/text/__init__.py +++ b/src/basic_open_agent_tools/text/__init__.py @@ -1,8 +1,39 @@ """Text tools for AI agents. -This module is not yet implemented. See TODO.md for planned functionality. +This module provides text processing and manipulation tools organized into logical submodules: + +- processing: Core text cleaning, normalization, and formatting """ from typing import List -__all__: List[str] = [] # No functions available yet +# Import all functions from submodules +from .processing import ( + clean_whitespace, + extract_sentences, + join_with_oxford_comma, + normalize_line_endings, + normalize_unicode, + smart_split_lines, + strip_html_tags, + to_camel_case, + to_snake_case, + to_title_case, +) + +# Re-export all functions at module level for convenience +__all__: List[str] = [ + # Text cleaning and normalization + "clean_whitespace", + "normalize_line_endings", + "strip_html_tags", + "normalize_unicode", + # Case conversion + "to_snake_case", + "to_camel_case", + "to_title_case", + # Text splitting and manipulation + "smart_split_lines", + "extract_sentences", + "join_with_oxford_comma", +] diff --git a/src/basic_open_agent_tools/text/processing.py b/src/basic_open_agent_tools/text/processing.py new file mode 100644 index 0000000..65e930b --- /dev/null +++ b/src/basic_open_agent_tools/text/processing.py @@ -0,0 +1,326 @@ +"""Core text processing utilities for AI agents.""" + +import re +import textwrap +import unicodedata +from typing import List, Literal, Match + + +def clean_whitespace(text: str) -> str: + """Clean and normalize whitespace in text. + + Args: + text: Input text to clean + + Returns: + Text with normalized whitespace (single spaces, no leading/trailing) + + Example: + >>> clean_whitespace(" hello world \\n\\t ") + "hello world" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + # Replace all whitespace sequences with single spaces + cleaned = re.sub(r"\s+", " ", text) + # Strip leading and trailing whitespace + return cleaned.strip() + + +def normalize_line_endings(text: str, style: str = "unix") -> str: + """Normalize line endings in text. + + Args: + text: Input text to normalize + style: Line ending style ("unix", "windows", "mac") + + Returns: + Text with normalized line endings + + Raises: + ValueError: If style is not supported + + Example: + >>> normalize_line_endings("line1\\r\\nline2\\rline3\\n", "unix") + "line1\\nline2\\nline3\\n" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + line_endings = {"unix": "\n", "windows": "\r\n", "mac": "\r"} + + if style not in line_endings: + raise ValueError(f"Unsupported line ending style: {style}") + + # Normalize all line endings to Unix first + normalized = re.sub(r"\r\n|\r|\n", "\n", text) + + # Convert to target style if not unix + if style != "unix": + normalized = normalized.replace("\n", line_endings[style]) + + return normalized + + +def strip_html_tags(text: str) -> str: + """Remove HTML tags from text. + + Args: + text: Input text containing HTML tags + + Returns: + Text with HTML tags removed + + Example: + >>> strip_html_tags("
Hello world!
") + "Hello world!" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + # Remove HTML tags - be smart about spacing to avoid extra spaces around punctuation + # First pass: remove tags that are followed by punctuation without adding space + cleaned = re.sub(r"<[^>]+>(?=[^\w\s])", "", text) + # Second pass: remove remaining tags with space replacement + cleaned = re.sub(r"<[^>]+>", " ", cleaned) + # Clean up extra whitespace that might result from tag removal + return clean_whitespace(cleaned) + + +def normalize_unicode(text: str, form: Literal["NFC", "NFD", "NFKC", "NFKD"] = "NFC") -> str: + """Normalize Unicode text. + + Args: + text: Input text to normalize + form: Unicode normalization form ("NFC", "NFD", "NFKC", "NFKD") + + Returns: + Unicode-normalized text + + Raises: + ValueError: If normalization form is not supported + + Example: + >>> normalize_unicode("café") # Handles composed/decomposed characters + "café" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + valid_forms = ["NFC", "NFD", "NFKC", "NFKD"] + if form not in valid_forms: + raise ValueError(f"Unsupported normalization form: {form}") + + return unicodedata.normalize(form, text) + + +def to_snake_case(text: str) -> str: + """Convert text to snake_case. + + Args: + text: Input text to convert + + Returns: + Text converted to snake_case + + Example: + >>> to_snake_case("HelloWorld") + "hello_world" + >>> to_snake_case("hello-world test") + "hello_world_test" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + # Replace spaces and hyphens with underscores + text = re.sub(r"[-\s]+", "_", text) + # Handle sequences of uppercase letters (e.g., XMLHttp -> XML_Http) + text = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", text) + # Insert underscore before uppercase letters that follow lowercase letters + text = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", text) + # Convert to lowercase + return text.lower() + + +def to_camel_case(text: str, upper_first: bool = False) -> str: + """Convert text to camelCase or PascalCase. + + Args: + text: Input text to convert + upper_first: If True, use PascalCase (first letter uppercase) + + Returns: + Text converted to camelCase or PascalCase + + Example: + >>> to_camel_case("hello_world") + "helloWorld" + >>> to_camel_case("hello-world", upper_first=True) + "HelloWorld" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + # Split on common delimiters + words = re.split(r"[-_\s]+", text.lower()) + # Filter out empty strings + words = [word for word in words if word] + + if not words: + return "" + + if upper_first: + # PascalCase - capitalize all words + return "".join(word.capitalize() for word in words) + else: + # camelCase - first word lowercase, rest capitalized + return words[0] + "".join(word.capitalize() for word in words[1:]) + + +def to_title_case(text: str) -> str: + """Convert text to Title Case. + + Args: + text: Input text to convert + + Returns: + Text converted to Title Case + + Example: + >>> to_title_case("hello world") + "Hello World" + >>> to_title_case("the-quick_brown fox") + "The Quick Brown Fox" + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + # Split on common delimiters but preserve the structure + def capitalize_word(match: Match[str]) -> str: + word = match.group(0) + return word.capitalize() + + # Split on word separators (spaces, hyphens, underscores) but preserve them + parts = re.split(r'([\s\-_]+)', text) # Split on whitespace, hyphens, underscores + result = [] + for part in parts: + if part and not re.match(r'^[\s\-_]+$', part): # If it's not just separators + result.append(part.capitalize()) + else: # If it's separators or empty, keep as-is + result.append(part) + return ''.join(result) + + +def smart_split_lines( + text: str, max_length: int, preserve_words: bool = True +) -> List[str]: + """Split text into lines with maximum length. + + Args: + text: Input text to split + max_length: Maximum characters per line + preserve_words: If True, avoid breaking words + + Returns: + List of text lines + + Raises: + ValueError: If max_length is less than 1 + + Example: + >>> smart_split_lines("This is a long line that needs splitting", 10) + ["This is a", "long line", "that needs", "splitting"] + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + if max_length < 1: + raise ValueError("max_length must be at least 1") + + if preserve_words: + # Use textwrap for word-preserving splits + wrapper = textwrap.TextWrapper( + width=max_length, + break_long_words=False, + break_on_hyphens=True, + expand_tabs=True, + replace_whitespace=True, + drop_whitespace=True, + ) + return wrapper.wrap(text) + else: + # Simple character-based splitting + lines = [] + for i in range(0, len(text), max_length): + lines.append(text[i : i + max_length]) + return lines + + +def extract_sentences(text: str) -> List[str]: + """Extract sentences from text using simple rules. + + Args: + text: Input text to extract sentences from + + Returns: + List of sentences + + Example: + >>> extract_sentences("Hello world. How are you? Fine!") + ["Hello world.", "How are you?", "Fine!"] + """ + if not isinstance(text, str): + raise TypeError("Input must be a string") + + # Simple sentence boundary detection + # Split on . ? ! followed by whitespace or end of string + sentences = re.split(r"[.!?]+(?:\s+|$)", text) + + # Filter out empty sentences and restore punctuation + result = [] + for i, sentence in enumerate(sentences): + sentence = sentence.strip() + if sentence: + # Find the punctuation that was used to split + if i < len(sentences) - 1: # Not the last sentence + # Look for the punctuation in the original text + start_pos = text.find(sentence) + end_pos = start_pos + len(sentence) + if end_pos < len(text) and text[end_pos] in ".!?": + sentence += text[end_pos] + result.append(sentence) + + return result + + +def join_with_oxford_comma(items: List[str], conjunction: str = "and") -> str: + """Join a list of items with Oxford comma. + + Args: + items: List of items to join + conjunction: Word to use before the last item + + Returns: + Items joined with Oxford comma + + Example: + >>> join_with_oxford_comma(["apples", "bananas", "oranges"]) + "apples, bananas, and oranges" + >>> join_with_oxford_comma(["Alice", "Bob"], "or") + "Alice or Bob" + """ + if not isinstance(items, list): + raise TypeError("Items must be a list") + + if not items: + return "" + + if len(items) == 1: + return str(items[0]) + + if len(items) == 2: + return f"{items[0]} {conjunction} {items[1]}" + + # Three or more items - use Oxford comma + return f"{', '.join(items[:-1])}, {conjunction} {items[-1]}" diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..6db1fca --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,267 @@ +"""Tests for helper functions module.""" + +import pytest + +import basic_open_agent_tools as boat +from basic_open_agent_tools.helpers import ( + get_tool_info, + list_all_available_tools, + load_all_filesystem_tools, + load_all_text_tools, + merge_tool_lists, +) + + +class TestHelperFunctions: + """Test cases for helper functions.""" + + def test_load_all_filesystem_tools(self): + """Test loading all file system tools.""" + fs_tools = load_all_filesystem_tools() + + # Should return a list + assert isinstance(fs_tools, list) + + # Should have tools + assert len(fs_tools) > 0 + + # All items should be callable + for tool in fs_tools: + assert callable(tool) + + # Should include key file system functions + tool_names = [tool.__name__ for tool in fs_tools] + assert "read_file_to_string" in tool_names + assert "write_file_from_string" in tool_names + assert "file_exists" in tool_names + + def test_load_all_text_tools(self): + """Test loading all text processing tools.""" + text_tools = load_all_text_tools() + + # Should return a list + assert isinstance(text_tools, list) + + # Should have tools + assert len(text_tools) > 0 + + # All items should be callable + for tool in text_tools: + assert callable(tool) + + # Should include key text processing functions + tool_names = [tool.__name__ for tool in text_tools] + assert "clean_whitespace" in tool_names + assert "to_snake_case" in tool_names + assert "strip_html_tags" in tool_names + + def test_merge_tool_lists_with_lists(self): + """Test merging multiple tool lists.""" + fs_tools = load_all_filesystem_tools() + text_tools = load_all_text_tools() + + merged = merge_tool_lists(fs_tools, text_tools) + + # Should return a list + assert isinstance(merged, list) + + # Should contain all tools from both lists + assert len(merged) == len(fs_tools) + len(text_tools) + + # All items should be callable + for tool in merged: + assert callable(tool) + + def test_merge_tool_lists_with_individual_functions(self): + """Test merging with individual functions.""" + def custom_tool_1(x: str) -> str: + return x.upper() + + def custom_tool_2(x: str) -> str: + return x.lower() + + fs_tools = load_all_filesystem_tools() + + merged = merge_tool_lists(fs_tools, custom_tool_1, custom_tool_2) + + # Should include all original tools plus custom ones + assert len(merged) == len(fs_tools) + 2 + + # Custom tools should be included + assert custom_tool_1 in merged + assert custom_tool_2 in merged + + def test_merge_tool_lists_mixed_args(self): + """Test merging with mixed list and function arguments.""" + def custom_tool(x: str) -> str: + return x + "_custom" + + fs_tools = load_all_filesystem_tools() + text_tools = load_all_text_tools() + + merged = merge_tool_lists(fs_tools, custom_tool, text_tools) + + # Should include all tools + expected_length = len(fs_tools) + 1 + len(text_tools) + assert len(merged) == expected_length + + # Custom tool should be included + assert custom_tool in merged + + def test_merge_tool_lists_empty_lists(self): + """Test merging with empty lists.""" + def custom_tool(x: str) -> str: + return x + + merged = merge_tool_lists([], custom_tool, []) + + assert len(merged) == 1 + assert custom_tool in merged + + def test_merge_tool_lists_invalid_arguments(self): + """Test error handling for invalid arguments.""" + # Test with non-callable in list + with pytest.raises(TypeError): + merge_tool_lists(["not_callable"]) + + # Test with invalid argument type + with pytest.raises(TypeError): + merge_tool_lists("not_a_list_or_function") + + # Test with mixed valid and invalid + def valid_tool(): + pass + + with pytest.raises(TypeError): + merge_tool_lists([valid_tool, "invalid"]) + + def test_get_tool_info(self): + """Test getting information about a tool.""" + from basic_open_agent_tools.text import clean_whitespace + + info = get_tool_info(clean_whitespace) + + # Should return a dictionary + assert isinstance(info, dict) + + # Should contain expected keys + expected_keys = ["name", "docstring", "signature", "module", "parameters"] + for key in expected_keys: + assert key in info + + # Should have correct name + assert info["name"] == "clean_whitespace" + + # Should have docstring + assert len(info["docstring"]) > 0 + + # Should have parameters + assert "text" in info["parameters"] + + def test_get_tool_info_invalid_input(self): + """Test error handling for get_tool_info.""" + with pytest.raises(TypeError): + get_tool_info("not_a_function") + + with pytest.raises(TypeError): + get_tool_info(123) + + def test_list_all_available_tools(self): + """Test listing all available tools.""" + tools = list_all_available_tools() + + # Should return a dictionary + assert isinstance(tools, dict) + + # Should have expected categories + assert "file_system" in tools + assert "text" in tools + + # Each category should contain tool info + for _category, category_tools in tools.items(): + assert isinstance(category_tools, list) + for tool_info in category_tools: + assert isinstance(tool_info, dict) + assert "name" in tool_info + assert "docstring" in tool_info + assert "signature" in tool_info + + def test_top_level_imports(self): + """Test that helper functions are available at top level.""" + # Test direct import from package + assert hasattr(boat, "load_all_filesystem_tools") + assert hasattr(boat, "load_all_text_tools") + assert hasattr(boat, "merge_tool_lists") + assert hasattr(boat, "get_tool_info") + assert hasattr(boat, "list_all_available_tools") + + # Test that they're callable + assert callable(boat.load_all_filesystem_tools) + assert callable(boat.load_all_text_tools) + assert callable(boat.merge_tool_lists) + + +class TestHelperFunctionsIntegration: + """Integration tests for helper functions.""" + + def test_complete_workflow(self): + """Test the complete workflow as described in the user request.""" + # Load tool collections + fs_tools = boat.load_all_filesystem_tools() + text_tools = boat.load_all_text_tools() + + # Create custom tool + def my_custom_tool(some_var: str) -> str: + return some_var + some_var + + # Merge all tools + agent_tools = boat.merge_tool_lists(fs_tools, text_tools, my_custom_tool) + + # Verify results + assert isinstance(agent_tools, list) + assert len(agent_tools) > 0 + + # Should contain tools from all sources + expected_min_length = len(fs_tools) + len(text_tools) + 1 + assert len(agent_tools) == expected_min_length + + # Custom tool should be included + assert my_custom_tool in agent_tools + + # All should be callable + for tool in agent_tools: + assert callable(tool) + + def test_tool_discovery(self): + """Test discovering tools and their capabilities.""" + # Get all available tools + all_tools = boat.list_all_available_tools() + + # Should find both categories + assert len(all_tools) >= 2 + + # Each category should have tools + for _category, tools in all_tools.items(): + assert len(tools) > 0 + + # Each tool should have complete info + for tool_info in tools: + assert tool_info["name"] + assert "signature" in tool_info + assert "parameters" in tool_info + + def test_tool_inspection(self): + """Test inspecting individual tools.""" + fs_tools = boat.load_all_filesystem_tools() + + # Pick a tool to inspect + sample_tool = fs_tools[0] + info = boat.get_tool_info(sample_tool) + + # Should have complete information + assert info["name"] + assert info["signature"] + assert isinstance(info["parameters"], list) + + # Should be able to identify the tool's module + assert "basic_open_agent_tools" in info["module"] diff --git a/tests/test_text_processing.py b/tests/test_text_processing.py new file mode 100644 index 0000000..7b020f9 --- /dev/null +++ b/tests/test_text_processing.py @@ -0,0 +1,293 @@ +"""Tests for text processing module.""" + +import pytest + +from basic_open_agent_tools.text.processing import ( + clean_whitespace, + extract_sentences, + join_with_oxford_comma, + normalize_line_endings, + normalize_unicode, + smart_split_lines, + strip_html_tags, + to_camel_case, + to_snake_case, + to_title_case, +) + + +class TestTextProcessing: + """Test cases for text processing functions.""" + + def test_clean_whitespace(self): + """Test whitespace cleaning functionality.""" + # Basic whitespace cleaning + assert clean_whitespace(" hello world ") == "hello world" + + # Mixed whitespace types + assert clean_whitespace("hello\t\n\r world") == "hello world" + + # Empty string + assert clean_whitespace("") == "" + + # Only whitespace + assert clean_whitespace(" \t\n ") == "" + + # Already clean + assert clean_whitespace("hello world") == "hello world" + + # Type error + with pytest.raises(TypeError): + clean_whitespace(123) + + def test_normalize_line_endings(self): + """Test line ending normalization.""" + # Unix style (default) + assert ( + normalize_line_endings("line1\r\nline2\rline3\n") == "line1\nline2\nline3\n" + ) + + # Windows style + assert normalize_line_endings("line1\nline2", "windows") == "line1\r\nline2" + + # Mac style + assert normalize_line_endings("line1\nline2", "mac") == "line1\rline2" + + # Invalid style + with pytest.raises(ValueError): + normalize_line_endings("text", "invalid") + + # Type error + with pytest.raises(TypeError): + normalize_line_endings(123) + + def test_strip_html_tags(self): + """Test HTML tag removal.""" + # Basic tags + assert strip_html_tags("Hello world
") == "Hello world" + + # Nested tags + assert ( + strip_html_tags("Hello world!
Hello World!
\r\n\t " + + # Clean HTML and whitespace + cleaned = strip_html_tags(messy_text) + normalized = clean_whitespace(cleaned) + + assert normalized == "Hello World!" + + def test_case_conversion_roundtrip(self): + """Test converting between different case styles.""" + original = "hello_world_test" + + # snake -> camel -> snake + camel = to_camel_case(original) + back_to_snake = to_snake_case(camel) + + assert back_to_snake == original + + def test_text_splitting_and_joining(self): + """Test splitting text and joining results.""" + original = "The quick brown fox jumps over the lazy dog" + + # Split into lines + lines = smart_split_lines(original, 15) + + # Join back together + rejoined = " ".join(lines) + + assert rejoined == original