From 8eee452beb755d812b4c7bf297d5508430815160 Mon Sep 17 00:00:00 2001
From: Wes Etheredge <jwesleye@gmail.com>
Date: Tue, 24 Jun 2025 17:30:22 -0500
Subject: [PATCH] Implement JSON and CSV data processing tools (Phase 1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added comprehensive data module with JSON and CSV processing capabilities:

Infrastructure:
- New exception classes: DataError, ValidationError, SerializationError
- Data-specific type definitions: DataDict, NestedData, ValidationResult
- Helper functions for loading tools by category

JSON Tools (json_tools.py):
- safe_json_serialize/deserialize with error handling
- validate_json_string for validation without parsing
- compress/decompress_json_data for efficient storage
- Full Unicode support and comprehensive error handling

CSV Tools (csv_tools.py):
- read/write_csv_file with flexible delimiter and header options
- csv_to_dict_list and dict_list_to_csv for string conversion
- detect_csv_delimiter for auto-detection
- validate_csv_structure for file validation
- clean_csv_data with configurable cleaning rules

Testing:
- 71 comprehensive tests covering all functions
- 91% coverage for CSV tools, 100% for JSON tools
- Edge cases, error conditions, and round-trip validation

Integration:
- Updated main package to export data module
- Added helper functions for selective tool loading
- Maintains project's zero runtime dependencies

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/basic_open_agent_tools/__init__.py        |  10 +-
 src/basic_open_agent_tools/data/__init__.py   |  41 +-
 src/basic_open_agent_tools/data/csv_tools.py  | 376 +++++++++++++++
 src/basic_open_agent_tools/data/json_tools.py | 144 ++++++
 src/basic_open_agent_tools/exceptions.py      |  18 +
 src/basic_open_agent_tools/helpers.py         |  87 +++-
 src/basic_open_agent_tools/types.py           |   7 +-
 tests/test_data_csv_tools.py                  | 454 ++++++++++++++++++
 tests/test_data_json_tools.py                 | 245 ++++++++++
 tests/test_helpers.py                         |  21 +-
 10 files changed, 1388 insertions(+), 15 deletions(-)
 create mode 100644 src/basic_open_agent_tools/data/csv_tools.py
 create mode 100644 src/basic_open_agent_tools/data/json_tools.py
 create mode 100644 tests/test_data_csv_tools.py
 create mode 100644 tests/test_data_json_tools.py

diff --git a/src/basic_open_agent_tools/__init__.py b/src/basic_open_agent_tools/__init__.py
index 6ce766b..549ea29 100644
--- a/src/basic_open_agent_tools/__init__.py
+++ b/src/basic_open_agent_tools/__init__.py
@@ -9,14 +9,17 @@
 __version__ = "0.2.0"
 
 # Modular structure
-from . import exceptions, file_system, text, types
+from . import data, exceptions, file_system, text, types
 
 # Helper functions for tool management
 from .helpers import (
     get_tool_info,
     list_all_available_tools,
+    load_all_data_tools,
     load_all_filesystem_tools,
     load_all_text_tools,
+    load_data_csv_tools,
+    load_data_json_tools,
     merge_tool_lists,
 )
 
@@ -31,10 +34,10 @@
     # Implemented modules
     "file_system",
     "text",
+    "data",
     # Future modules (uncomment when implemented)
     # "system",
     # "network",
-    # "data",
     # "crypto",
     # "utilities",
     # Common infrastructure
@@ -43,6 +46,9 @@
     # Helper functions
     "load_all_filesystem_tools",
     "load_all_text_tools",
+    "load_all_data_tools",
+    "load_data_json_tools",
+    "load_data_csv_tools",
     "merge_tool_lists",
     "get_tool_info",
     "list_all_available_tools",
diff --git a/src/basic_open_agent_tools/data/__init__.py b/src/basic_open_agent_tools/data/__init__.py
index 56feeb7..6246349 100644
--- a/src/basic_open_agent_tools/data/__init__.py
+++ b/src/basic_open_agent_tools/data/__init__.py
@@ -1,8 +1,45 @@
 """Data tools for AI agents.
 
-This module is not yet implemented. See TODO.md for planned functionality.
+This module provides data processing and manipulation tools organized into logical submodules:
+
+- json_tools: JSON serialization, compression, and validation
+- csv_tools: CSV file processing, parsing, and cleaning
 """
 
 from typing import List
 
-__all__: List[str] = []  # No functions available yet
+# Import all functions from submodules
+from .csv_tools import (
+    clean_csv_data,
+    csv_to_dict_list,
+    detect_csv_delimiter,
+    dict_list_to_csv,
+    read_csv_file,
+    validate_csv_structure,
+    write_csv_file,
+)
+from .json_tools import (
+    compress_json_data,
+    decompress_json_data,
+    safe_json_deserialize,
+    safe_json_serialize,
+    validate_json_string,
+)
+
+# Re-export all functions at module level for convenience
+__all__: List[str] = [
+    # JSON processing
+    "safe_json_serialize",
+    "safe_json_deserialize",
+    "validate_json_string",
+    "compress_json_data",
+    "decompress_json_data",
+    # CSV processing
+    "read_csv_file",
+    "write_csv_file",
+    "csv_to_dict_list",
+    "dict_list_to_csv",
+    "detect_csv_delimiter",
+    "validate_csv_structure",
+    "clean_csv_data",
+]
diff --git a/src/basic_open_agent_tools/data/csv_tools.py b/src/basic_open_agent_tools/data/csv_tools.py
new file mode 100644
index 0000000..76bc4a8
--- /dev/null
+++ b/src/basic_open_agent_tools/data/csv_tools.py
@@ -0,0 +1,376 @@
+"""CSV processing utilities for AI agents."""
+
+import csv
+import io
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from ..exceptions import DataError
+from ..types import DataDict, PathLike
+
+
+def read_csv_file(
+    file_path: PathLike, delimiter: str = ",", headers: bool = True
+) -> List[DataDict]:
+    """Read CSV file and return as list of dictionaries.
+
+    Args:
+        file_path: Path to the CSV file
+        delimiter: CSV field delimiter
+        headers: Whether first row contains headers
+
+    Returns:
+        List of dictionaries representing CSV rows
+
+    Raises:
+        DataError: If file cannot be read or parsed
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> # Assuming file contains: name,age\\nAlice,25\\nBob,30
+        >>> data = read_csv_file("people.csv")
+        >>> data
+        [{'name': 'Alice', 'age': '25'}, {'name': 'Bob', 'age': '30'}]
+    """
+    if not isinstance(file_path, (str, Path)):
+        raise TypeError("file_path must be a string or Path")
+    if not isinstance(delimiter, str):
+        raise TypeError("delimiter must be a string")
+    if not isinstance(headers, bool):
+        raise TypeError("headers must be a boolean")
+
+    file_path = Path(file_path)
+
+    try:
+        with open(file_path, encoding="utf-8", newline="") as csvfile:
+            if headers:
+                reader = csv.DictReader(csvfile, delimiter=delimiter)
+                return [dict(row) for row in reader]
+            else:
+                reader = csv.reader(csvfile, delimiter=delimiter)  # type: ignore[assignment]
+                rows = list(reader)
+                if not rows:
+                    return []
+                # Create numeric headers for headerless CSV
+                num_cols = len(rows[0]) if rows else 0
+                headers_list = [f"col_{i}" for i in range(num_cols)]
+                return [dict(zip(headers_list, row)) for row in rows]
+    except FileNotFoundError:
+        raise DataError(f"CSV file not found: {file_path}")
+    except UnicodeDecodeError as e:
+        raise DataError(f"Failed to decode CSV file {file_path}: {e}")
+    except csv.Error as e:
+        raise DataError(f"Failed to parse CSV file {file_path}: {e}")
+
+
+def write_csv_file(
+    data: List[DataDict],
+    file_path: PathLike,
+    delimiter: str = ",",
+    headers: bool = True,
+) -> None:
+    """Write list of dictionaries to CSV file.
+
+    Args:
+        data: List of dictionaries to write
+        file_path: Path where CSV file will be created
+        delimiter: CSV field delimiter
+        headers: Whether to write headers as first row
+
+    Raises:
+        DataError: If file cannot be written
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> data = [{'name': 'Alice', 'age': 25}, {'name': 'Bob', 'age': 30}]
+        >>> write_csv_file(data, "output.csv")
+    """
+    if not isinstance(data, list):
+        raise TypeError("data must be a list")
+    if not isinstance(file_path, (str, Path)):
+        raise TypeError("file_path must be a string or Path")
+    if not isinstance(delimiter, str):
+        raise TypeError("delimiter must be a string")
+    if not isinstance(headers, bool):
+        raise TypeError("headers must be a boolean")
+
+    if not data:
+        # Write empty file for empty data
+        Path(file_path).write_text("", encoding="utf-8")
+        return
+
+    # Validate all items are dictionaries
+    if not all(isinstance(item, dict) for item in data):
+        raise TypeError("All items in data must be dictionaries")
+
+    file_path = Path(file_path)
+
+    try:
+        # Get all unique fieldnames from all dictionaries
+        fieldnames = []
+        for item in data:
+            for key in item.keys():
+                if key not in fieldnames:
+                    fieldnames.append(key)
+
+        with open(file_path, "w", encoding="utf-8", newline="") as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=delimiter)
+            if headers:
+                writer.writeheader()
+            writer.writerows(data)
+    except OSError as e:
+        raise DataError(f"Failed to write CSV file {file_path}: {e}")
+
+
+def csv_to_dict_list(csv_data: str, delimiter: str = ",") -> List[DataDict]:
+    """Convert CSV string to list of dictionaries.
+
+    Args:
+        csv_data: CSV data as string
+        delimiter: CSV field delimiter
+
+    Returns:
+        List of dictionaries representing CSV rows
+
+    Raises:
+        DataError: If CSV data cannot be parsed
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> csv_str = "name,age\\nAlice,25\\nBob,30"
+        >>> csv_to_dict_list(csv_str)
+        [{'name': 'Alice', 'age': '25'}, {'name': 'Bob', 'age': '30'}]
+    """
+    if not isinstance(csv_data, str):
+        raise TypeError("csv_data must be a string")
+    if not isinstance(delimiter, str):
+        raise TypeError("delimiter must be a string")
+
+    try:
+        reader = csv.DictReader(io.StringIO(csv_data), delimiter=delimiter)
+        return [dict(row) for row in reader]
+    except csv.Error as e:
+        raise DataError(f"Failed to parse CSV data: {e}")
+
+
+def dict_list_to_csv(data: List[DataDict], delimiter: str = ",") -> str:
+    """Convert list of dictionaries to CSV string.
+
+    Args:
+        data: List of dictionaries to convert
+        delimiter: CSV field delimiter
+
+    Returns:
+        CSV data as string
+
+    Raises:
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> data = [{'name': 'Alice', 'age': 25}, {'name': 'Bob', 'age': 30}]
+        >>> dict_list_to_csv(data)
+        'name,age\\nAlice,25\\nBob,30\\n'
+    """
+    if not isinstance(data, list):
+        raise TypeError("data must be a list")
+    if not isinstance(delimiter, str):
+        raise TypeError("delimiter must be a string")
+
+    if not data:
+        return ""
+
+    # Validate all items are dictionaries
+    if not all(isinstance(item, dict) for item in data):
+        raise TypeError("All items in data must be dictionaries")
+
+    # Get all unique fieldnames
+    fieldnames = []
+    for item in data:
+        for key in item.keys():
+            if key not in fieldnames:
+                fieldnames.append(key)
+
+    output = io.StringIO()
+    writer = csv.DictWriter(output, fieldnames=fieldnames, delimiter=delimiter)
+    writer.writeheader()
+    writer.writerows(data)
+    return output.getvalue()
+
+
+def detect_csv_delimiter(file_path: PathLike, sample_size: int = 1024) -> str:
+    """Auto-detect CSV delimiter by analyzing file content.
+
+    Args:
+        file_path: Path to the CSV file
+        sample_size: Number of characters to sample for detection
+
+    Returns:
+        Detected delimiter character
+
+    Raises:
+        DataError: If file cannot be read or delimiter cannot be detected
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> detect_csv_delimiter("data.csv")
+        ','
+        >>> detect_csv_delimiter("data.tsv")
+        '\\t'
+    """
+    if not isinstance(file_path, (str, Path)):
+        raise TypeError("file_path must be a string or Path")
+    if not isinstance(sample_size, int) or sample_size <= 0:
+        raise TypeError("sample_size must be a positive integer")
+
+    file_path = Path(file_path)
+
+    try:
+        with open(file_path, encoding="utf-8") as csvfile:
+            sample = csvfile.read(sample_size)
+
+        if not sample:
+            raise DataError("File is empty, cannot detect delimiter")
+
+        sniffer = csv.Sniffer()
+        delimiter = sniffer.sniff(sample).delimiter
+        return delimiter
+    except FileNotFoundError:
+        raise DataError(f"CSV file not found: {file_path}")
+    except UnicodeDecodeError as e:
+        raise DataError(f"Failed to decode CSV file {file_path}: {e}")
+    except csv.Error as e:
+        raise DataError(f"Failed to detect delimiter in {file_path}: {e}")
+
+
+def validate_csv_structure(
+    file_path: PathLike, expected_columns: Optional[List[str]] = None
+) -> bool:
+    """Validate CSV file structure and column headers.
+
+    Args:
+        file_path: Path to the CSV file
+        expected_columns: List of expected column names (None to skip check)
+
+    Returns:
+        True if CSV structure is valid
+
+    Raises:
+        DataError: If file cannot be read or structure is invalid
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> validate_csv_structure("data.csv", ["name", "age", "email"])
+        True
+        >>> validate_csv_structure("malformed.csv")
+        False
+    """
+    if not isinstance(file_path, (str, Path)):
+        raise TypeError("file_path must be a string or Path")
+    if expected_columns is not None and not isinstance(expected_columns, list):
+        raise TypeError("expected_columns must be a list or None")
+
+    try:
+        # Check if file is empty first
+        file_path = Path(file_path)
+        if file_path.stat().st_size == 0:
+            return True  # Empty file is considered valid
+
+        # Try to detect delimiter first
+        delimiter = detect_csv_delimiter(file_path)
+
+        # Read first few rows to validate structure
+        data = read_csv_file(file_path, delimiter=delimiter, headers=True)
+
+        if not data:
+            return True  # Empty file is considered valid
+
+        # Check if expected columns are present
+        if expected_columns is not None:
+            first_row = data[0]
+            actual_columns = set(first_row.keys())
+            expected_set = set(expected_columns)
+
+            if not expected_set.issubset(actual_columns):
+                missing = expected_set - actual_columns
+                raise DataError(f"Missing expected columns: {missing}")
+
+        return True
+    except DataError:
+        # Re-raise DataError as-is
+        raise
+    except Exception as e:
+        raise DataError(f"Invalid CSV structure in {file_path}: {e}")
+
+
+def clean_csv_data(
+    data: List[DataDict], rules: Optional[Dict[str, Any]] = None
+) -> List[DataDict]:
+    """Clean CSV data according to specified rules.
+
+    Args:
+        data: List of dictionaries to clean
+        rules: Dictionary of cleaning rules (None for default cleaning)
+
+    Returns:
+        Cleaned list of dictionaries
+
+    Raises:
+        TypeError: If arguments have wrong types
+
+    Example:
+        >>> data = [{'name': '  Alice  ', 'age': '', 'score': 'N/A'}]
+        >>> rules = {'strip_whitespace': True, 'remove_empty': True, 'na_values': ['N/A']}
+        >>> clean_csv_data(data, rules)
+        [{'name': 'Alice', 'score': None}]
+    """
+    if not isinstance(data, list):
+        raise TypeError("data must be a list")
+    if rules is not None and not isinstance(rules, dict):
+        raise TypeError("rules must be a dictionary or None")
+
+    if not data:
+        return data
+
+    # Default cleaning rules
+    default_rules = {
+        "strip_whitespace": True,
+        "remove_empty": False,
+        "na_values": ["N/A", "n/a", "NA", "null", "NULL", "None"],
+    }
+
+    # Merge with provided rules
+    if rules:
+        default_rules.update(rules)
+
+    cleaned_data = []
+
+    for row in data:
+        if not isinstance(row, dict):
+            continue  # Skip non-dictionary items
+
+        cleaned_row = {}
+
+        for key, value in row.items():
+            # Convert to string for processing
+            if not isinstance(value, str):
+                value = str(value) if value is not None else ""
+
+            # Strip whitespace
+            if default_rules.get("strip_whitespace", False):
+                value = value.strip()
+
+            # Handle NA values
+            na_values = default_rules.get("na_values", [])
+            if isinstance(na_values, list) and value in na_values:
+                value = None
+
+            # Remove empty fields if requested
+            if default_rules.get("remove_empty", False):
+                if value == "" or value is None:
+                    continue
+
+            cleaned_row[key] = value
+
+        cleaned_data.append(cleaned_row)
+
+    return cleaned_data
diff --git a/src/basic_open_agent_tools/data/json_tools.py b/src/basic_open_agent_tools/data/json_tools.py
new file mode 100644
index 0000000..7e404ca
--- /dev/null
+++ b/src/basic_open_agent_tools/data/json_tools.py
@@ -0,0 +1,144 @@
+"""JSON processing utilities for AI agents."""
+
+import gzip
+import json
+from typing import Any, Optional
+
+from ..exceptions import SerializationError
+
+
+def safe_json_serialize(data: Any, indent: Optional[int] = None) -> str:
+    """Safely serialize data to JSON string with error handling.
+
+    Args:
+        data: Data to serialize to JSON
+        indent: Number of spaces for indentation (None for compact)
+
+    Returns:
+        JSON string representation of the data
+
+    Raises:
+        SerializationError: If data cannot be serialized to JSON
+        TypeError: If data contains non-serializable objects
+
+    Example:
+        >>> safe_json_serialize({"name": "test", "value": 42})
+        '{"name": "test", "value": 42}'
+        >>> safe_json_serialize({"a": 1, "b": 2}, indent=2)
+        '{\\n  "a": 1,\\n  "b": 2\\n}'
+    """
+    if not isinstance(indent, (int, type(None))):
+        raise TypeError("indent must be an integer or None")
+
+    try:
+        return json.dumps(data, indent=indent, ensure_ascii=False)
+    except (TypeError, ValueError) as e:
+        raise SerializationError(f"Failed to serialize data to JSON: {e}")
+
+
+def safe_json_deserialize(json_str: str) -> Any:
+    """Safely deserialize JSON string to Python object with error handling.
+
+    Args:
+        json_str: JSON string to deserialize
+
+    Returns:
+        Deserialized Python object
+
+    Raises:
+        SerializationError: If JSON string cannot be parsed
+        TypeError: If input is not a string
+
+    Example:
+        >>> safe_json_deserialize('{"name": "test", "value": 42}')
+        {'name': 'test', 'value': 42}
+        >>> safe_json_deserialize('[1, 2, 3]')
+        [1, 2, 3]
+    """
+    if not isinstance(json_str, str):
+        raise TypeError("Input must be a string")
+
+    try:
+        return json.loads(json_str)
+    except (json.JSONDecodeError, ValueError) as e:
+        raise SerializationError(f"Failed to deserialize JSON string: {e}")
+
+
+def validate_json_string(json_str: str) -> bool:
+    """Validate JSON string without deserializing.
+
+    Args:
+        json_str: JSON string to validate
+
+    Returns:
+        True if valid JSON, False otherwise
+
+    Example:
+        >>> validate_json_string('{"valid": true}')
+        True
+        >>> validate_json_string('{"invalid": }')
+        False
+    """
+    if not isinstance(json_str, str):
+        return False
+
+    try:
+        json.loads(json_str)
+        return True
+    except (json.JSONDecodeError, ValueError):
+        return False
+
+
+def compress_json_data(data: Any) -> bytes:
+    """Compress JSON data for storage or transmission.
+
+    Args:
+        data: Data to serialize and compress
+
+    Returns:
+        Compressed JSON data as bytes
+
+    Raises:
+        SerializationError: If data cannot be serialized or compressed
+        TypeError: If data contains non-serializable objects
+
+    Example:
+        >>> compressed = compress_json_data({"test": "data"})
+        >>> isinstance(compressed, bytes)
+        True
+    """
+    try:
+        json_str = safe_json_serialize(data)
+        return gzip.compress(json_str.encode("utf-8"))
+    except Exception as e:
+        raise SerializationError(f"Failed to compress JSON data: {e}")
+
+
+def decompress_json_data(compressed_data: bytes) -> Any:
+    """Decompress and deserialize JSON data.
+
+    Args:
+        compressed_data: Compressed JSON data as bytes
+
+    Returns:
+        Deserialized Python object
+
+    Raises:
+        SerializationError: If data cannot be decompressed or deserialized
+        TypeError: If input is not bytes
+
+    Example:
+        >>> original = {"test": "data"}
+        >>> compressed = compress_json_data(original)
+        >>> decompressed = decompress_json_data(compressed)
+        >>> decompressed == original
+        True
+    """
+    if not isinstance(compressed_data, bytes):
+        raise TypeError("Input must be bytes")
+
+    try:
+        json_str = gzip.decompress(compressed_data).decode("utf-8")
+        return safe_json_deserialize(json_str)
+    except Exception as e:
+        raise SerializationError(f"Failed to decompress JSON data: {e}")
diff --git a/src/basic_open_agent_tools/exceptions.py b/src/basic_open_agent_tools/exceptions.py
index eca7cd1..1d55731 100644
--- a/src/basic_open_agent_tools/exceptions.py
+++ b/src/basic_open_agent_tools/exceptions.py
@@ -11,3 +11,21 @@ class FileSystemError(BasicAgentToolsError):
     """Exception for file system operations."""
 
     pass
+
+
+class DataError(BasicAgentToolsError):
+    """Exception for data operations."""
+
+    pass
+
+
+class ValidationError(DataError):
+    """Exception for data validation operations."""
+
+    pass
+
+
+class SerializationError(DataError):
+    """Exception for data serialization/deserialization operations."""
+
+    pass
diff --git a/src/basic_open_agent_tools/helpers.py b/src/basic_open_agent_tools/helpers.py
index ddab5a5..b0939ba 100644
--- a/src/basic_open_agent_tools/helpers.py
+++ b/src/basic_open_agent_tools/helpers.py
@@ -3,7 +3,7 @@
 import inspect
 from typing import Any, Callable, Dict, List, Union
 
-from . import file_system, text
+from . import data, file_system, text
 
 
 def load_all_filesystem_tools() -> List[Callable[..., Any]]:
@@ -50,6 +50,90 @@ def load_all_text_tools() -> List[Callable[..., Any]]:
     return tools
 
 
+def load_all_data_tools() -> List[Callable[..., Any]]:
+    """Load all data processing tools as a list of callable functions.
+
+    Returns:
+        List of all data processing tool functions
+
+    Example:
+        >>> data_tools = load_all_data_tools()
+        >>> len(data_tools) > 0
+        True
+    """
+    tools = []
+
+    # Get all functions from data module
+    for name in data.__all__:
+        func = getattr(data, name)
+        if callable(func):
+            tools.append(func)
+
+    return tools
+
+
+def load_data_json_tools() -> List[Callable[..., Any]]:
+    """Load JSON processing tools as a list of callable functions.
+
+    Returns:
+        List of JSON processing tool functions
+
+    Example:
+        >>> json_tools = load_data_json_tools()
+        >>> len(json_tools) == 5
+        True
+    """
+    from .data import json_tools
+
+    tools = []
+    json_function_names = [
+        "safe_json_serialize",
+        "safe_json_deserialize",
+        "validate_json_string",
+        "compress_json_data",
+        "decompress_json_data",
+    ]
+
+    for name in json_function_names:
+        func = getattr(json_tools, name)
+        if callable(func):
+            tools.append(func)
+
+    return tools
+
+
+def load_data_csv_tools() -> List[Callable[..., Any]]:
+    """Load CSV processing tools as a list of callable functions.
+
+    Returns:
+        List of CSV processing tool functions
+
+    Example:
+        >>> csv_tools = load_data_csv_tools()
+        >>> len(csv_tools) == 7
+        True
+    """
+    from .data import csv_tools
+
+    tools = []
+    csv_function_names = [
+        "read_csv_file",
+        "write_csv_file",
+        "csv_to_dict_list",
+        "dict_list_to_csv",
+        "detect_csv_delimiter",
+        "validate_csv_structure",
+        "clean_csv_data",
+    ]
+
+    for name in csv_function_names:
+        func = getattr(csv_tools, name)
+        if callable(func):
+            tools.append(func)
+
+    return tools
+
+
 def merge_tool_lists(
     *args: Union[List[Callable[..., Any]], Callable[..., Any]],
 ) -> List[Callable[..., Any]]:
@@ -149,4 +233,5 @@ def list_all_available_tools() -> Dict[str, List[Dict[str, Any]]]:
     return {
         "file_system": [get_tool_info(tool) for tool in load_all_filesystem_tools()],
         "text": [get_tool_info(tool) for tool in load_all_text_tools()],
+        "data": [get_tool_info(tool) for tool in load_all_data_tools()],
     }
diff --git a/src/basic_open_agent_tools/types.py b/src/basic_open_agent_tools/types.py
index 0e21180..e01742a 100644
--- a/src/basic_open_agent_tools/types.py
+++ b/src/basic_open_agent_tools/types.py
@@ -1,9 +1,14 @@
 """Common type definitions for basic-open-agent-tools."""
 
 from pathlib import Path
-from typing import Union
+from typing import Any, Dict, List, Union
 
 # Common type aliases currently in use
 PathLike = Union[str, Path]
 
+# Data-related type aliases
+DataDict = Dict[str, Any]
+NestedData = Union[Dict[str, Any], List[Any], str, int, float, bool, None]
+ValidationResult = Dict[str, Union[bool, str, List[str]]]
+
 # Additional types will be added as modules are implemented
diff --git a/tests/test_data_csv_tools.py b/tests/test_data_csv_tools.py
new file mode 100644
index 0000000..a261423
--- /dev/null
+++ b/tests/test_data_csv_tools.py
@@ -0,0 +1,454 @@
+"""Tests for CSV processing tools."""
+
+
+import pytest
+
+from basic_open_agent_tools.data.csv_tools import (
+    clean_csv_data,
+    csv_to_dict_list,
+    detect_csv_delimiter,
+    dict_list_to_csv,
+    read_csv_file,
+    validate_csv_structure,
+    write_csv_file,
+)
+from basic_open_agent_tools.exceptions import DataError
+
+
+class TestReadCsvFile:
+    """Test read_csv_file function."""
+
+    def test_read_simple_csv(self, tmp_path):
+        """Test reading a simple CSV file."""
+        csv_content = "name,age\nAlice,25\nBob,30"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = read_csv_file(csv_file)
+        expected = [{"name": "Alice", "age": "25"}, {"name": "Bob", "age": "30"}]
+        assert result == expected
+
+    def test_read_csv_without_headers(self, tmp_path):
+        """Test reading CSV without headers."""
+        csv_content = "Alice,25\nBob,30"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = read_csv_file(csv_file, headers=False)
+        expected = [{"col_0": "Alice", "col_1": "25"}, {"col_0": "Bob", "col_1": "30"}]
+        assert result == expected
+
+    def test_read_csv_custom_delimiter(self, tmp_path):
+        """Test reading CSV with custom delimiter."""
+        csv_content = "name;age\nAlice;25\nBob;30"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = read_csv_file(csv_file, delimiter=";")
+        expected = [{"name": "Alice", "age": "25"}, {"name": "Bob", "age": "30"}]
+        assert result == expected
+
+    def test_read_empty_csv(self, tmp_path):
+        """Test reading empty CSV file."""
+        csv_file = tmp_path / "empty.csv"
+        csv_file.write_text("")
+
+        result = read_csv_file(csv_file)
+        assert result == []
+
+    def test_read_csv_headers_only(self, tmp_path):
+        """Test reading CSV with headers only."""
+        csv_content = "name,age"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = read_csv_file(csv_file)
+        assert result == []
+
+    def test_read_nonexistent_file(self, tmp_path):
+        """Test reading non-existent file."""
+        nonexistent = tmp_path / "nonexistent.csv"
+        with pytest.raises(DataError, match="CSV file not found"):
+            read_csv_file(nonexistent)
+
+    def test_read_csv_invalid_types(self):
+        """Test with invalid argument types."""
+        with pytest.raises(TypeError, match="file_path must be a string or Path"):
+            read_csv_file(123)
+
+        with pytest.raises(TypeError, match="delimiter must be a string"):
+            read_csv_file("test.csv", delimiter=123)
+
+        with pytest.raises(TypeError, match="headers must be a boolean"):
+            read_csv_file("test.csv", headers="yes")
+
+
+class TestWriteCsvFile:
+    """Test write_csv_file function."""
+
+    def test_write_simple_csv(self, tmp_path):
+        """Test writing a simple CSV file."""
+        data = [{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}]
+        csv_file = tmp_path / "output.csv"
+
+        write_csv_file(data, csv_file)
+
+        # Verify content
+        content = csv_file.read_text()
+        assert "name,age" in content
+        assert "Alice,25" in content
+        assert "Bob,30" in content
+
+    def test_write_csv_without_headers(self, tmp_path):
+        """Test writing CSV without headers."""
+        data = [{"name": "Alice", "age": 25}]
+        csv_file = tmp_path / "output.csv"
+
+        write_csv_file(data, csv_file, headers=False)
+
+        content = csv_file.read_text()
+        assert "name,age" not in content
+        assert "Alice,25" in content
+
+    def test_write_csv_custom_delimiter(self, tmp_path):
+        """Test writing CSV with custom delimiter."""
+        data = [{"name": "Alice", "age": 25}]
+        csv_file = tmp_path / "output.csv"
+
+        write_csv_file(data, csv_file, delimiter=";")
+
+        content = csv_file.read_text()
+        assert "name;age" in content
+        assert "Alice;25" in content
+
+    def test_write_empty_data(self, tmp_path):
+        """Test writing empty data."""
+        csv_file = tmp_path / "empty.csv"
+        write_csv_file([], csv_file)
+
+        assert csv_file.read_text() == ""
+
+    def test_write_csv_mixed_fields(self, tmp_path):
+        """Test writing CSV with mixed fields across rows."""
+        data = [
+            {"name": "Alice", "age": 25},
+            {"name": "Bob", "city": "NYC"},
+            {"age": 30, "country": "USA"},
+        ]
+        csv_file = tmp_path / "output.csv"
+
+        write_csv_file(data, csv_file)
+
+        # Should include all unique fields
+        content = csv_file.read_text()
+        assert "name" in content
+        assert "age" in content
+        assert "city" in content
+        assert "country" in content
+
+    def test_write_csv_invalid_types(self, tmp_path):
+        """Test with invalid argument types."""
+        csv_file = tmp_path / "test.csv"
+
+        with pytest.raises(TypeError, match="data must be a list"):
+            write_csv_file("not a list", csv_file)
+
+        with pytest.raises(TypeError, match="file_path must be a string or Path"):
+            write_csv_file([], 123)
+
+        with pytest.raises(TypeError, match="All items in data must be dictionaries"):
+            write_csv_file(["not", "dicts"], csv_file)
+
+
+class TestCsvToDictList:
+    """Test csv_to_dict_list function."""
+
+    def test_convert_simple_csv(self):
+        """Test converting simple CSV string."""
+        csv_str = "name,age\nAlice,25\nBob,30"
+        result = csv_to_dict_list(csv_str)
+        expected = [{"name": "Alice", "age": "25"}, {"name": "Bob", "age": "30"}]
+        assert result == expected
+
+    def test_convert_custom_delimiter(self):
+        """Test converting CSV with custom delimiter."""
+        csv_str = "name;age\nAlice;25\nBob;30"
+        result = csv_to_dict_list(csv_str, delimiter=";")
+        expected = [{"name": "Alice", "age": "25"}, {"name": "Bob", "age": "30"}]
+        assert result == expected
+
+    def test_convert_empty_csv(self):
+        """Test converting empty CSV."""
+        result = csv_to_dict_list("")
+        assert result == []
+
+    def test_convert_headers_only(self):
+        """Test converting CSV with headers only."""
+        result = csv_to_dict_list("name,age")
+        assert result == []
+
+    def test_convert_invalid_types(self):
+        """Test with invalid argument types."""
+        with pytest.raises(TypeError, match="csv_data must be a string"):
+            csv_to_dict_list(123)
+
+        with pytest.raises(TypeError, match="delimiter must be a string"):
+            csv_to_dict_list("name,age", delimiter=123)
+
+
+class TestDictListToCsv:
+    """Test dict_list_to_csv function."""
+
+    def test_convert_simple_data(self):
+        """Test converting simple data to CSV."""
+        data = [{"name": "Alice", "age": 25}, {"name": "Bob", "age": 30}]
+        result = dict_list_to_csv(data)
+
+        assert "name,age" in result
+        assert "Alice,25" in result
+        assert "Bob,30" in result
+
+    def test_convert_custom_delimiter(self):
+        """Test converting with custom delimiter."""
+        data = [{"name": "Alice", "age": 25}]
+        result = dict_list_to_csv(data, delimiter=";")
+
+        assert "name;age" in result
+        assert "Alice;25" in result
+
+    def test_convert_empty_data(self):
+        """Test converting empty data."""
+        result = dict_list_to_csv([])
+        assert result == ""
+
+    def test_convert_mixed_fields(self):
+        """Test converting data with mixed fields."""
+        data = [{"name": "Alice", "age": 25}, {"name": "Bob", "city": "NYC"}]
+        result = dict_list_to_csv(data)
+
+        lines = result.strip().split("\n")
+        header = lines[0]
+        assert "name" in header
+        assert "age" in header
+        assert "city" in header
+
+    def test_convert_invalid_types(self):
+        """Test with invalid argument types."""
+        with pytest.raises(TypeError, match="data must be a list"):
+            dict_list_to_csv("not a list")
+
+        with pytest.raises(TypeError, match="All items in data must be dictionaries"):
+            dict_list_to_csv(["not", "dicts"])
+
+
+class TestDetectCsvDelimiter:
+    """Test detect_csv_delimiter function."""
+
+    def test_detect_comma_delimiter(self, tmp_path):
+        """Test detecting comma delimiter."""
+        csv_content = "name,age\nAlice,25\nBob,30"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = detect_csv_delimiter(csv_file)
+        assert result == ","
+
+    def test_detect_semicolon_delimiter(self, tmp_path):
+        """Test detecting semicolon delimiter."""
+        csv_content = "name;age\nAlice;25\nBob;30"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = detect_csv_delimiter(csv_file)
+        assert result == ";"
+
+    def test_detect_tab_delimiter(self, tmp_path):
+        """Test detecting tab delimiter."""
+        csv_content = "name\tage\nAlice\t25\nBob\t30"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = detect_csv_delimiter(csv_file)
+        assert result == "\t"
+
+    def test_detect_custom_sample_size(self, tmp_path):
+        """Test detection with custom sample size."""
+        csv_content = "name,age\n" + "Alice,25\n" * 1000
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = detect_csv_delimiter(csv_file, sample_size=100)
+        assert result == ","
+
+    def test_detect_empty_file(self, tmp_path):
+        """Test detecting delimiter in empty file."""
+        csv_file = tmp_path / "empty.csv"
+        csv_file.write_text("")
+
+        with pytest.raises(DataError, match="File is empty, cannot detect delimiter"):
+            detect_csv_delimiter(csv_file)
+
+    def test_detect_nonexistent_file(self, tmp_path):
+        """Test detecting delimiter in non-existent file."""
+        nonexistent = tmp_path / "nonexistent.csv"
+        with pytest.raises(DataError, match="CSV file not found"):
+            detect_csv_delimiter(nonexistent)
+
+    def test_detect_invalid_types(self):
+        """Test with invalid argument types."""
+        with pytest.raises(TypeError, match="file_path must be a string or Path"):
+            detect_csv_delimiter(123)
+
+        with pytest.raises(TypeError, match="sample_size must be a positive integer"):
+            detect_csv_delimiter("test.csv", sample_size=0)
+
+
+class TestValidateCsvStructure:
+    """Test validate_csv_structure function."""
+
+    def test_validate_valid_structure(self, tmp_path):
+        """Test validating valid CSV structure."""
+        csv_content = "name,age,email\nAlice,25,alice@example.com"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = validate_csv_structure(csv_file, ["name", "age"])
+        assert result is True
+
+    def test_validate_missing_columns(self, tmp_path):
+        """Test validating CSV with missing expected columns."""
+        csv_content = "name,age\nAlice,25"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        with pytest.raises(DataError, match="Missing expected columns"):
+            validate_csv_structure(csv_file, ["name", "age", "email"])
+
+    def test_validate_no_expected_columns(self, tmp_path):
+        """Test validating without expected columns."""
+        csv_content = "name,age\nAlice,25"
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text(csv_content)
+
+        result = validate_csv_structure(csv_file)
+        assert result is True
+
+    def test_validate_empty_file(self, tmp_path):
+        """Test validating empty CSV file."""
+        csv_file = tmp_path / "empty.csv"
+        csv_file.write_text("")
+
+        result = validate_csv_structure(csv_file)
+        assert result is True
+
+    def test_validate_invalid_types(self):
+        """Test with invalid argument types."""
+        with pytest.raises(TypeError, match="file_path must be a string or Path"):
+            validate_csv_structure(123)
+
+        with pytest.raises(TypeError, match="expected_columns must be a list or None"):
+            validate_csv_structure("test.csv", "not a list")
+
+
+class TestCleanCsvData:
+    """Test clean_csv_data function."""
+
+    def test_clean_default_rules(self):
+        """Test cleaning with default rules."""
+        data = [
+            {"name": "  Alice  ", "age": "25", "score": ""},
+            {"name": "Bob", "age": "N/A", "score": "95"},
+        ]
+
+        result = clean_csv_data(data)
+        expected = [
+            {"name": "Alice", "age": "25", "score": ""},
+            {"name": "Bob", "age": None, "score": "95"},
+        ]
+        assert result == expected
+
+    def test_clean_custom_rules(self):
+        """Test cleaning with custom rules."""
+        data = [
+            {"name": "  Alice  ", "age": "", "score": "N/A"},
+            {"name": "Bob", "age": "30", "score": "95"},
+        ]
+
+        rules = {
+            "strip_whitespace": True,
+            "remove_empty": True,
+            "na_values": ["N/A", "", "null"],
+        }
+
+        result = clean_csv_data(data, rules)
+        expected = [
+            {"name": "Alice"},  # Empty values removed when remove_empty=True
+            {"name": "Bob", "age": "30", "score": "95"},
+        ]
+        assert result == expected
+
+    def test_clean_no_strip_whitespace(self):
+        """Test cleaning without stripping whitespace."""
+        data = [{"name": "  Alice  ", "age": "25"}]
+        rules = {"strip_whitespace": False}
+
+        result = clean_csv_data(data, rules)
+        assert result[0]["name"] == "  Alice  "
+
+    def test_clean_empty_data(self):
+        """Test cleaning empty data."""
+        result = clean_csv_data([])
+        assert result == []
+
+    def test_clean_invalid_types(self):
+        """Test with invalid argument types."""
+        with pytest.raises(TypeError, match="data must be a list"):
+            clean_csv_data("not a list")
+
+        with pytest.raises(TypeError, match="rules must be a dictionary or None"):
+            clean_csv_data([], "not a dict")
+
+    def test_clean_skip_non_dict_items(self):
+        """Test cleaning skips non-dictionary items."""
+        data = [
+            {"name": "Alice", "age": "25"},
+            "not a dict",
+            {"name": "Bob", "age": "30"},
+        ]
+
+        result = clean_csv_data(data)
+        assert len(result) == 2
+        assert result[0]["name"] == "Alice"
+        assert result[1]["name"] == "Bob"
+
+
+class TestRoundTripCsvOperations:
+    """Test round-trip CSV operations."""
+
+    def test_write_read_roundtrip(self, tmp_path):
+        """Test that write -> read returns original data."""
+        original_data = [
+            {"name": "Alice", "age": "25", "city": "NYC"},
+            {"name": "Bob", "age": "30", "city": "LA"},
+            {"name": "Charlie", "age": "35", "city": "Chicago"},
+        ]
+
+        csv_file = tmp_path / "roundtrip.csv"
+        write_csv_file(original_data, csv_file)
+        read_data = read_csv_file(csv_file)
+
+        # Convert age back to string for comparison (CSV always returns strings)
+        expected = []
+        for item in original_data:
+            expected.append({k: str(v) for k, v in item.items()})
+
+        assert read_data == expected
+
+    def test_dict_to_csv_to_dict_roundtrip(self):
+        """Test that dict_list -> CSV string -> dict_list returns original."""
+        original_data = [{"name": "Alice", "age": "25"}, {"name": "Bob", "age": "30"}]
+
+        csv_string = dict_list_to_csv(original_data)
+        converted_back = csv_to_dict_list(csv_string)
+
+        assert converted_back == original_data
diff --git a/tests/test_data_json_tools.py b/tests/test_data_json_tools.py
new file mode 100644
index 0000000..14948f9
--- /dev/null
+++ b/tests/test_data_json_tools.py
@@ -0,0 +1,245 @@
+"""Tests for JSON processing tools."""
+
+import pytest
+
+from basic_open_agent_tools.data.json_tools import (
+    compress_json_data,
+    decompress_json_data,
+    safe_json_deserialize,
+    safe_json_serialize,
+    validate_json_string,
+)
+from basic_open_agent_tools.exceptions import SerializationError
+
+
+class TestSafeJsonSerialize:
+    """Test safe_json_serialize function."""
+
+    def test_serialize_dict(self):
+        """Test serializing a dictionary."""
+        data = {"name": "test", "value": 42}
+        result = safe_json_serialize(data)
+        assert result == '{"name": "test", "value": 42}'
+
+    def test_serialize_list(self):
+        """Test serializing a list."""
+        data = [1, 2, 3]
+        result = safe_json_serialize(data)
+        assert result == "[1, 2, 3]"
+
+    def test_serialize_with_indent(self):
+        """Test serializing with indentation."""
+        data = {"a": 1, "b": 2}
+        result = safe_json_serialize(data, indent=2)
+        expected = '{\n  "a": 1,\n  "b": 2\n}'
+        assert result == expected
+
+    def test_serialize_unicode(self):
+        """Test serializing Unicode characters."""
+        data = {"message": "Hello 世界"}
+        result = safe_json_serialize(data)
+        assert "世界" in result
+
+    def test_serialize_none(self):
+        """Test serializing None."""
+        result = safe_json_serialize(None)
+        assert result == "null"
+
+    def test_serialize_invalid_indent_type(self):
+        """Test with invalid indent type."""
+        with pytest.raises(TypeError, match="indent must be an integer or None"):
+            safe_json_serialize({"test": "data"}, indent="invalid")
+
+    def test_serialize_non_serializable_object(self):
+        """Test serializing non-serializable object."""
+
+        class CustomClass:
+            pass
+
+        with pytest.raises(
+            SerializationError, match="Failed to serialize data to JSON"
+        ):
+            safe_json_serialize({"obj": CustomClass()})
+
+
+class TestSafeJsonDeserialize:
+    """Test safe_json_deserialize function."""
+
+    def test_deserialize_dict(self):
+        """Test deserializing a dictionary."""
+        json_str = '{"name": "test", "value": 42}'
+        result = safe_json_deserialize(json_str)
+        assert result == {"name": "test", "value": 42}
+
+    def test_deserialize_list(self):
+        """Test deserializing a list."""
+        json_str = "[1, 2, 3]"
+        result = safe_json_deserialize(json_str)
+        assert result == [1, 2, 3]
+
+    def test_deserialize_unicode(self):
+        """Test deserializing Unicode characters."""
+        json_str = '{"message": "Hello 世界"}'
+        result = safe_json_deserialize(json_str)
+        assert result == {"message": "Hello 世界"}
+
+    def test_deserialize_null(self):
+        """Test deserializing null."""
+        result = safe_json_deserialize("null")
+        assert result is None
+
+    def test_deserialize_invalid_type(self):
+        """Test with invalid input type."""
+        with pytest.raises(TypeError, match="Input must be a string"):
+            safe_json_deserialize({"invalid": "input"})
+
+    def test_deserialize_invalid_json(self):
+        """Test deserializing invalid JSON."""
+        with pytest.raises(
+            SerializationError, match="Failed to deserialize JSON string"
+        ):
+            safe_json_deserialize('{"invalid": }')
+
+    def test_deserialize_empty_string(self):
+        """Test deserializing empty string."""
+        with pytest.raises(
+            SerializationError, match="Failed to deserialize JSON string"
+        ):
+            safe_json_deserialize("")
+
+
+class TestValidateJsonString:
+    """Test validate_json_string function."""
+
+    def test_validate_valid_json(self):
+        """Test validating valid JSON."""
+        assert validate_json_string('{"valid": true}') is True
+        assert validate_json_string("[1, 2, 3]") is True
+        assert validate_json_string('"string"') is True
+        assert validate_json_string("null") is True
+
+    def test_validate_invalid_json(self):
+        """Test validating invalid JSON."""
+        assert validate_json_string('{"invalid": }') is False
+        assert validate_json_string("[1, 2,]") is False
+        assert validate_json_string("undefined") is False
+        assert validate_json_string("") is False
+
+    def test_validate_non_string(self):
+        """Test validating non-string input."""
+        assert validate_json_string(None) is False
+        assert validate_json_string(123) is False
+        assert validate_json_string({"dict": "input"}) is False
+        assert validate_json_string([1, 2, 3]) is False
+
+
+class TestCompressJsonData:
+    """Test compress_json_data function."""
+
+    def test_compress_simple_data(self):
+        """Test compressing simple data."""
+        data = {"test": "data"}
+        compressed = compress_json_data(data)
+        assert isinstance(compressed, bytes)
+        assert len(compressed) > 0
+
+    def test_compress_large_data(self):
+        """Test compressing larger data for better compression."""
+        data = {"repeated": "data" * 100, "numbers": list(range(100))}
+        compressed = compress_json_data(data)
+        original_json = safe_json_serialize(data)
+
+        # Compressed should be smaller than original for repetitive data
+        assert len(compressed) < len(original_json.encode("utf-8"))
+
+    def test_compress_unicode_data(self):
+        """Test compressing Unicode data."""
+        data = {"unicode": "Hello 世界", "emoji": "🚀"}
+        compressed = compress_json_data(data)
+        assert isinstance(compressed, bytes)
+
+    def test_compress_non_serializable(self):
+        """Test compressing non-serializable data."""
+
+        class CustomClass:
+            pass
+
+        with pytest.raises(SerializationError, match="Failed to compress JSON data"):
+            compress_json_data({"obj": CustomClass()})
+
+
+class TestDecompressJsonData:
+    """Test decompress_json_data function."""
+
+    def test_decompress_simple_data(self):
+        """Test decompressing simple data."""
+        original = {"test": "data"}
+        compressed = compress_json_data(original)
+        decompressed = decompress_json_data(compressed)
+        assert decompressed == original
+
+    def test_decompress_complex_data(self):
+        """Test decompressing complex data."""
+        original = {
+            "string": "Hello 世界",
+            "number": 42,
+            "list": [1, 2, 3],
+            "nested": {"key": "value"},
+            "null": None,
+            "boolean": True,
+        }
+        compressed = compress_json_data(original)
+        decompressed = decompress_json_data(compressed)
+        assert decompressed == original
+
+    def test_decompress_invalid_type(self):
+        """Test decompressing invalid input type."""
+        with pytest.raises(TypeError, match="Input must be bytes"):
+            decompress_json_data("not bytes")
+
+    def test_decompress_invalid_data(self):
+        """Test decompressing invalid compressed data."""
+        with pytest.raises(SerializationError, match="Failed to decompress JSON data"):
+            decompress_json_data(b"invalid compressed data")
+
+    def test_decompress_empty_bytes(self):
+        """Test decompressing empty bytes."""
+        with pytest.raises(SerializationError, match="Failed to decompress JSON data"):
+            decompress_json_data(b"")
+
+
+class TestRoundTripSerialization:
+    """Test round-trip serialization scenarios."""
+
+    def test_serialize_deserialize_roundtrip(self):
+        """Test that serialize -> deserialize returns original data."""
+        test_cases = [
+            {"simple": "dict"},
+            [1, 2, 3, "mixed", {"nested": "list"}],
+            None,
+            True,
+            False,
+            42,
+            3.14,
+            "string with unicode 世界",
+            {"complex": {"nested": {"deeply": [1, 2, {"more": "nesting"}]}}},
+        ]
+
+        for original in test_cases:
+            serialized = safe_json_serialize(original)
+            deserialized = safe_json_deserialize(serialized)
+            assert deserialized == original
+
+    def test_compress_decompress_roundtrip(self):
+        """Test that compress -> decompress returns original data."""
+        test_cases = [
+            {"simple": "dict"},
+            [1, 2, 3, "mixed", {"nested": "list"}],
+            {"large": "data" * 1000},  # Test compression benefits
+            {"unicode": "世界 🌍 🚀"},
+        ]
+
+        for original in test_cases:
+            compressed = compress_json_data(original)
+            decompressed = decompress_json_data(compressed)
+            assert decompressed == original
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 91ffe12..2004e34 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -143,18 +143,20 @@ def test_merge_tool_lists_deduplication(self):
         # Load the same tools multiple times
         fs_tools_1 = load_all_filesystem_tools()
         fs_tools_2 = load_all_filesystem_tools()
-        
+
         # Merge with duplicates
         merged = merge_tool_lists(fs_tools_1, fs_tools_2)
-        
+
         # Should have same length as single load (duplicates removed)
         assert len(merged) == len(fs_tools_1)
-        
+
         # Check that no function name appears twice
         function_names = [tool.__name__ for tool in merged]
         unique_names = set(function_names)
-        assert len(function_names) == len(unique_names), "Found duplicate function names"
-        
+        assert len(function_names) == len(unique_names), (
+            "Found duplicate function names"
+        )
+
         # Should still contain all expected functions
         expected_names = [tool.__name__ for tool in fs_tools_1]
         for name in expected_names:
@@ -162,20 +164,21 @@ def test_merge_tool_lists_deduplication(self):
 
     def test_merge_tool_lists_different_modules_same_name(self):
         """Test handling of functions with same name from different modules."""
+
         # Create two functions with the same name but different modules
         def test_function():
             return "first"
-        
+
         def another_test_function():
             return "second"
-        
+
         # Manually set different module names to simulate different sources
         test_function.__module__ = "module1"
         another_test_function.__module__ = "module2"
         another_test_function.__name__ = "test_function"  # Same name as first
-        
+
         merged = merge_tool_lists([test_function], [another_test_function])
-        
+
         # Should keep both since they're from different modules
         assert len(merged) == 2
         assert test_function in merged