From f3a9c5a70ab92b40c265ccf358c412d31360fd90 Mon Sep 17 00:00:00 2001 From: nightcityblade Date: Sat, 16 May 2026 23:06:29 +0800 Subject: [PATCH] docs: add utility docstrings --- utils/paths.py | 37 +++++++++++++++++++++++++++++++++++++ utils/text.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/utils/paths.py b/utils/paths.py index 5c9728b..3f5e8be 100644 --- a/utils/paths.py +++ b/utils/paths.py @@ -2,6 +2,16 @@ def resolve_path(base: str | Path, path: str | Path): + """Resolve a path relative to a base directory. + + Args: + base: Base directory used when ``path`` is relative. + path: Absolute or relative path to resolve. + + Returns: + The resolved absolute path when ``path`` is absolute, otherwise the + resolved base directory joined with ``path``. + """ path = Path(path) if path.is_absolute(): return path.resolve() @@ -10,6 +20,16 @@ def resolve_path(base: str | Path, path: str | Path): def display_path_rel_to_cwd(path: str, cwd: Path | None) -> str: + """Return a path display string relative to the current working directory. + + Args: + path: Path to display. + cwd: Current working directory to make ``path`` relative to, if possible. + + Returns: + ``path`` relative to ``cwd`` when possible; otherwise, the original path + string or normalized path string. + """ try: p = Path(path) except Exception: @@ -25,6 +45,14 @@ def display_path_rel_to_cwd(path: str, cwd: Path | None) -> str: def ensure_parent_directory(path: str | Path) -> Path: + """Ensure the parent directory for a path exists. + + Args: + path: File path whose parent directory should be created. + + Returns: + The input path converted to a ``Path`` instance. + """ path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) @@ -32,6 +60,15 @@ def ensure_parent_directory(path: str | Path) -> Path: def is_binary_file(path: str | Path) -> bool: + """Check whether a file appears to contain binary data. + + Args: + path: File path to inspect. + + Returns: + True if the first bytes of the file contain a null byte; otherwise False. + Returns False if the file cannot be read. + """ try: with open(path, "rb") as f: chunk = f.read(8192) diff --git a/utils/text.py b/utils/text.py index 2675d42..123a1ea 100644 --- a/utils/text.py +++ b/utils/text.py @@ -2,6 +2,15 @@ def get_tokenizer(model: str): + """Get a tokenization function for a model. + + Args: + model: Model name used to select a tiktoken encoding. + + Returns: + A callable that encodes text into token IDs. Falls back to the + ``cl100k_base`` encoding when the model-specific encoding is unavailable. + """ try: encoding = tiktoken.encoding_for_model(model) return encoding.encode @@ -11,6 +20,15 @@ def get_tokenizer(model: str): def count_tokens(text: str, model: str = "gemini-2.0-flash-exp") -> int: + """Count the number of tokens in text for a model. + + Args: + text: Text to count tokens for. + model: Model name used to select a tokenizer. + + Returns: + Number of tokens in ``text``. + """ tokenizer = get_tokenizer(model) if tokenizer: @@ -20,6 +38,14 @@ def count_tokens(text: str, model: str = "gemini-2.0-flash-exp") -> int: def estimate_tokens(text: str) -> int: + """Estimate the number of tokens in text using character count. + + Args: + text: Text to estimate token count for. + + Returns: + Estimated token count, with a minimum of 1. + """ return max(1, len(text) // 4) @@ -30,6 +56,19 @@ def truncate_text( suffix: str = "\n... [truncated]", preserve_lines: bool = True, ): + """Truncate text to fit within a maximum token count. + + Args: + text: Text to truncate. + model: Model name used to count tokens. + max_tokens: Maximum number of tokens allowed in the returned text. + suffix: Text appended when truncation occurs. + preserve_lines: Whether to truncate only at line boundaries when possible. + + Returns: + The original text when it fits within ``max_tokens``; otherwise, a + truncated version with ``suffix`` appended. + """ current_tokens = count_tokens(text, model) if current_tokens <= max_tokens: return text