This repository was archived by the owner on Feb 7, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
281 lines (249 loc) · 10.3 KB
/
utils.py
File metadata and controls
281 lines (249 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
"""
Utility functions for the Language Translator script.
This module contains various utility functions used throughout the project.
Features:
* Dependency checking for required Python modules
* Graceful shutdown setup for handling interruptions
* JSON string cleaning and validation
* Retry mechanism with exponential backoff for API calls
Functions:
check_dependencies: Checks if all required modules are installed
setup_graceful_shutdown: Sets up signal handlers for graceful shutdown
clean_json_string: Cleans and formats JSON strings
is_json_complete: Validates if a JSON string is complete and valid
retry_with_exponential_backoff: Decorator for retrying functions with exponential backoff
Notes:
* The check_dependencies function should be called at the start of the main script
* The setup_graceful_shutdown function should be used to ensure proper cleanup on script termination
* JSON-related functions are crucial for handling API responses
* The retry decorator is particularly useful for API calls that may fail intermittently
Lessons Learned:
* Problem: Script crashes due to missing dependencies
Solution:
- Implemented a comprehensive dependency check:
def check_dependencies(logger: Logger):
required_modules = [
'anthropic', 'openai', 'rich', 'pandas', 'tiktoken'
]
missing_modules = []
for module in required_modules:
try:
__import__(module)
except ImportError:
missing_modules.append(module)
if missing_modules:
logger.error("The following required modules are missing:")
for module in missing_modules:
logger.error(f"- {module}")
logger.error("Please install these modules and try again.")
sys.exit(1)
else:
logger.info("All required modules are installed.")
- This ensures all necessary modules are available before the script runs
* Problem: Unclean script termination leading to potential data loss
Solution:
- Implemented a graceful shutdown mechanism:
def setup_graceful_shutdown(cleanup_functions: List[Callable], logger):
def graceful_shutdown(signum, frame):
logger.info("Received interrupt signal. Initiating graceful shutdown...")
threading.Event().set()
for cleanup_func in cleanup_functions:
cleanup_func()
logger.info("Graceful shutdown complete. Exiting...")
exit(0)
signal.signal(signal.SIGINT, graceful_shutdown)
signal.signal(signal.SIGTERM, graceful_shutdown)
- This allows for proper cleanup of resources and saving of data on script interruption
* Problem: Inconsistent handling of JSON strings from API responses
Solution:
- Created utility functions for JSON string cleaning and validation:
def is_json_complete(json_string: str) -> bool:
cleaned_string = clean_json_string(json_string)
try:
json.loads(cleaned_string)
return True
except json.JSONDecodeError:
return False
- These functions ensure consistent handling of JSON strings across the project
* Problem: API calls failing due to temporary issues
Solution:
- Implemented a retry decorator with exponential backoff:
def retry_with_exponential_backoff(
func: Callable,
max_retries: int = 5,
initial_wait: float = 1,
exponential_base: float = 2,
logger: Logger = None
) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
wait_time = initial_wait
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_retries - 1:
if logger:
logger.error(f"Function {func.__name__} failed after {max_retries} attempts. Error: {str(e)}")
raise
if logger:
logger.warning(f"Attempt {attempt + 1} failed. Retrying in {wait_time:.2f} seconds. Error: {str(e)}")
time.sleep(wait_time)
wait_time *= exponential_base
return wrapper
- This decorator allows for automatic retrying of failed API calls with increasing wait times
* Problem: Difficulty in tracking function versions across the project
Solution:
- Implemented a versioning decorator:
def versioned(version: str):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
wrapper.__version__ = version
return wrapper
return decorator
- This allows for easy tracking of function versions throughout the project
* Problem: Inconsistent handling of file paths across different operating systems
Solution:
- Utilized pathlib for cross-platform path handling:
from pathlib import Path
cache_file = Path.home() / ".cache" / "language_translator" / "translation_cache.pkl"
- This ensures consistent file path handling across different operating systems
* Problem: Difficulty in managing and updating required dependencies
Solution:
- Centralized the list of required modules in the check_dependencies function
- Regularly review and update this list as the project evolves
- Consider using a requirements.txt file for more complex dependency management
* Problem: Lack of type checking leading to runtime errors
Solution:
- Implemented type hints throughout the utility functions:
def clean_json_string(json_string: str) -> str:
def is_json_complete(json_string: str) -> bool:
- This improves code readability and allows for static type checking
* Problem: Difficulty in debugging complex utility functions
Solution:
- Implemented comprehensive logging in utility functions:
logger.debug(f"Cleaning JSON string: {json_string[:50]}...")
logger.debug(f"JSON complete check result: {result}")
- This provides valuable insights into the behavior of utility functions during runtime
"""
# Standard library imports
from functools import wraps
import signal
import sys
import os
import json
import re
import threading
import time
import asyncio
from typing import Any, Callable, List, Union, Dict
# Local application imports
from config import versioned
from debug_logging import LTLogger
# Global flag to indicate if the script should exit
should_exit = threading.Event()
# Shortens the text to a max length and adds ellipsis if it's too long
def truncate_text(text, max_length=100):
# First, limit to max_length
truncated = text[:max_length]
# Then, find the last line feed within this truncated text
last_lf = truncated.rfind('\n')
if last_lf != -1:
# If there's a line feed, cut off at that point
truncated = truncated[:last_lf]
# Add ellipsis if we've truncated the text
if len(truncated) < len(text):
truncated += '...'
return truncated
@versioned("2.3.0")
def setup_graceful_shutdown(cleanup_functions: List[Callable], logger: LTLogger):
def graceful_shutdown(signum, frame):
logger.info("Received interrupt signal. Initiating graceful shutdown...")
should_exit.set() # Set the exit flag
for cleanup_func in cleanup_functions:
try:
cleanup_func()
except Exception as e:
logger.error(f"Error during cleanup: {str(e)}")
logger.info("Graceful shutdown complete. Exiting...")
os._exit(0) # Force exit
signal.signal(signal.SIGINT, graceful_shutdown)
signal.signal(signal.SIGTERM, graceful_shutdown)
@versioned("1.5.0")
def check_exit_flag():
return should_exit.is_set()
@versioned("1.4.0")
def check_dependencies(logger: LTLogger):
"""
Check if all required modules are installed.
Args:
logger (Logger): Logger instance for debugging.
"""
required_modules = [
'anthropic', 'openai', 'rich', 'pandas', 'tiktoken'
]
missing_modules = []
for module in required_modules:
try:
__import__(module)
except ImportError:
missing_modules.append(module)
if missing_modules:
logger.error("The following required modules are missing:")
for module in missing_modules:
logger.error(f"- {module}")
logger.error("Please install these modules and try again.")
sys.exit(1)
else:
logger.info("All required modules are installed.")
@versioned("1.4.1")
def clean_json_string(json_string: str) -> str:
# Remove ```json from the start and ``` from the end
cleaned = re.sub(r'^```json\s*', '', json_string)
cleaned = re.sub(r'\s*```$', '', cleaned)
# Remove any leading/trailing whitespace
cleaned = cleaned.strip()
# Remove any trailing backticks that might be left
cleaned = re.sub(r'`+$', '', cleaned)
return cleaned
@versioned("1.4.0")
def is_json_complete(json_string: str) -> bool:
cleaned_string = clean_json_string(json_string)
try:
json.loads(cleaned_string)
return True
except json.JSONDecodeError:
return False
@versioned("1.4.1")
def retry_with_exponential_backoff(
func: Callable,
max_retries: int = 5,
initial_wait: float = 1,
exponential_base: float = 2
) -> Callable:
"""
Retry a function with exponential backoff.
Args:
func (Callable): The function to retry.
max_retries (int): Maximum number of retries.
initial_wait (float): Initial wait time in seconds.
exponential_base (float): Base for exponential backoff.
Returns:
Callable: Decorated function with retry logic.
"""
@wraps(func)
def wrapper(*args, **kwargs):
wait_time = initial_wait
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_retries - 1:
logger.error(f"Function {func.__name__} failed after {max_retries} attempts. Error: {str(e)}")
raise
logger.warning(f"Attempt {attempt + 1} failed. Retrying in {wait_time:.2f} seconds. Error: {str(e)}")
time.sleep(wait_time)
wait_time *= exponential_base
return wrapper