-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimizer.py
More file actions
446 lines (383 loc) · 15 KB
/
optimizer.py
File metadata and controls
446 lines (383 loc) · 15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
"""Prompt optimization utilities for reducing token usage."""
import re
from typing import Any, Dict, List, Optional, Tuple
from dataclasses import dataclass, field
from enum import Enum
from .utils import count_tokens
class OptimizationType(Enum):
"""Types of optimizations."""
WHITESPACE = "whitespace"
REDUNDANCY = "redundancy"
VERBOSITY = "verbosity"
FORMATTING = "formatting"
COMPRESSION = "compression"
@dataclass
class OptimizationResult:
"""Result of prompt optimization."""
original_text: str
optimized_text: str
original_tokens: int
optimized_tokens: int
tokens_saved: int
savings_percent: float
optimizations_applied: List[str] = field(default_factory=list)
suggestions: List[str] = field(default_factory=list)
@dataclass
class TokenEstimate:
"""Token count estimate."""
text: str
token_count: int
model: str
char_count: int
word_count: int
estimated_cost_input: float
estimated_cost_output: float
class PromptOptimizer:
"""Optimizes prompts to reduce token usage."""
# Common verbose phrases and their concise alternatives
VERBOSE_PATTERNS = [
(r"in order to", "to"),
(r"due to the fact that", "because"),
(r"at this point in time", "now"),
(r"in the event that", "if"),
(r"for the purpose of", "to"),
(r"with regard to", "about"),
(r"in spite of the fact that", "although"),
(r"has the ability to", "can"),
(r"is able to", "can"),
(r"make a decision", "decide"),
(r"take into consideration", "consider"),
(r"give consideration to", "consider"),
(r"prior to", "before"),
(r"subsequent to", "after"),
(r"in close proximity to", "near"),
(r"a large number of", "many"),
(r"a small number of", "few"),
(r"the majority of", "most"),
(r"at the present time", "now"),
(r"in today's day and age", "today"),
(r"each and every", "each"),
(r"first and foremost", "first"),
(r"various different", "various"),
(r"completely eliminate", "eliminate"),
(r"absolutely essential", "essential"),
(r"basic fundamentals", "fundamentals"),
(r"past history", "history"),
(r"future plans", "plans"),
(r"end result", "result"),
(r"final outcome", "outcome"),
]
# Redundant instruction patterns
REDUNDANT_PATTERNS = [
r"please\s+",
r"kindly\s+",
r"I would like you to\s+",
r"Could you please\s+",
r"Would you mind\s+",
r"I want you to\s+",
r"I need you to\s+",
r"Your task is to\s+",
r"You should\s+",
r"You must\s+",
r"Make sure to\s+",
r"Be sure to\s+",
r"Remember to\s+",
r"Don't forget to\s+",
]
def __init__(
self,
aggressive: bool = False,
preserve_formatting: bool = True,
min_savings_threshold: float = 0.05
):
"""
Initialize the optimizer.
Args:
aggressive: Apply more aggressive optimizations
preserve_formatting: Keep markdown/code formatting
min_savings_threshold: Minimum savings to apply optimization
"""
self.aggressive = aggressive
self.preserve_formatting = preserve_formatting
self.min_savings_threshold = min_savings_threshold
# Compile patterns
self.verbose_compiled = [
(re.compile(p, re.IGNORECASE), r)
for p, r in self.VERBOSE_PATTERNS
]
self.redundant_compiled = [
re.compile(p, re.IGNORECASE)
for p in self.REDUNDANT_PATTERNS
]
def optimize(
self,
text: str,
model: str = "gpt-4",
target_tokens: Optional[int] = None
) -> OptimizationResult:
"""
Optimize a prompt to reduce token usage.
Args:
text: The prompt text to optimize
model: Model for token counting
target_tokens: Optional target token count
Returns:
OptimizationResult with optimized text and stats
"""
original_tokens = count_tokens(text, model)
optimized = text
applied = []
suggestions = []
# Extract and preserve code blocks if needed
code_blocks = []
if self.preserve_formatting:
code_pattern = re.compile(r"```[\s\S]*?```|`[^`]+`")
code_blocks = code_pattern.findall(optimized)
for i, block in enumerate(code_blocks):
optimized = optimized.replace(block, f"__CODE_BLOCK_{i}__")
# 1. Normalize whitespace
original_len = len(optimized)
optimized = self._normalize_whitespace(optimized)
if len(optimized) < original_len:
applied.append("whitespace_normalization")
# 2. Remove redundant phrases
original_len = len(optimized)
optimized = self._remove_redundant_phrases(optimized)
if len(optimized) < original_len:
applied.append("redundancy_removal")
# 3. Replace verbose phrases
original_len = len(optimized)
optimized = self._replace_verbose_phrases(optimized)
if len(optimized) < original_len:
applied.append("verbosity_reduction")
# 4. Aggressive optimizations if enabled
if self.aggressive:
original_len = len(optimized)
optimized = self._aggressive_optimize(optimized)
if len(optimized) < original_len:
applied.append("aggressive_compression")
# 5. If target specified and not met, try compression
if target_tokens:
current_tokens = count_tokens(optimized, model)
if current_tokens > target_tokens:
optimized = self._compress_to_target(optimized, target_tokens, model)
applied.append("target_compression")
# Restore code blocks
if self.preserve_formatting:
for i, block in enumerate(code_blocks):
optimized = optimized.replace(f"__CODE_BLOCK_{i}__", block)
# Calculate results
optimized_tokens = count_tokens(optimized, model)
tokens_saved = original_tokens - optimized_tokens
savings_percent = (tokens_saved / original_tokens * 100) if original_tokens > 0 else 0
# Generate suggestions
suggestions = self._generate_suggestions(text, optimized)
return OptimizationResult(
original_text=text,
optimized_text=optimized,
original_tokens=original_tokens,
optimized_tokens=optimized_tokens,
tokens_saved=tokens_saved,
savings_percent=savings_percent,
optimizations_applied=applied,
suggestions=suggestions
)
def _normalize_whitespace(self, text: str) -> str:
"""Normalize whitespace in text."""
# Replace multiple spaces with single space
text = re.sub(r" +", " ", text)
# Replace multiple newlines with double newline
text = re.sub(r"\n{3,}", "\n\n", text)
# Remove trailing whitespace on lines
text = re.sub(r" +\n", "\n", text)
# Remove leading whitespace on lines (except indentation)
text = re.sub(r"\n +(?=[^\s])", "\n", text)
return text.strip()
def _remove_redundant_phrases(self, text: str) -> str:
"""Remove redundant instructional phrases."""
for pattern in self.redundant_compiled:
text = pattern.sub("", text)
return text
def _replace_verbose_phrases(self, text: str) -> str:
"""Replace verbose phrases with concise alternatives."""
for pattern, replacement in self.verbose_compiled:
text = pattern.sub(replacement, text)
return text
def _aggressive_optimize(self, text: str) -> str:
"""Apply aggressive optimizations."""
# Remove articles where possible
text = re.sub(r"(the|a|an)\s+(?=\w)", "", text, flags=re.IGNORECASE)
# Remove filler words
fillers = ["just", "really", "very", "quite", "rather", "somewhat", "actually", "basically"]
for filler in fillers:
text = re.sub(rf"{filler}\s+", "", text, flags=re.IGNORECASE)
return text
def _compress_to_target(
self,
text: str,
target_tokens: int,
model: str
) -> str:
"""Compress text to meet target token count."""
current_tokens = count_tokens(text, model)
if current_tokens <= target_tokens:
return text
# Split into sentences
sentences = re.split(r"(?<=[.!?])\s+", text)
if len(sentences) <= 1:
# Can't split further, truncate
ratio = target_tokens / current_tokens
return text[:int(len(text) * ratio)]
# Score sentences by importance (simple heuristic)
scored = []
for i, sent in enumerate(sentences):
score = 0
# First and last sentences are important
if i == 0 or i == len(sentences) - 1:
score += 2
# Sentences with key terms
if re.search(r"(must|should|important|key|main|critical)", sent, re.IGNORECASE):
score += 1
# Shorter sentences preferred for compression
score -= len(sent) / 1000
scored.append((score, sent))
# Sort by score and rebuild
scored.sort(reverse=True)
result = []
result_tokens = 0
for score, sent in scored:
sent_tokens = count_tokens(sent, model)
if result_tokens + sent_tokens <= target_tokens:
result.append(sent)
result_tokens += sent_tokens
# Restore original order
result_ordered = [s for s in sentences if s in result]
return " ".join(result_ordered)
def _generate_suggestions(self, original: str, optimized: str) -> List[str]:
"""Generate optimization suggestions."""
suggestions = []
# Check for repeated content
words = original.lower().split()
word_counts = {}
for word in words:
if len(word) > 4:
word_counts[word] = word_counts.get(word, 0) + 1
repeated = [w for w, c in word_counts.items() if c > 3]
if repeated:
suggestions.append(
f"Consider reducing repetition of: {', '.join(repeated[:5])}"
)
# Check for long sentences
sentences = re.split(r"(?<=[.!?])\s+", original)
long_sentences = [s for s in sentences if len(s.split()) > 40]
if long_sentences:
suggestions.append(
f"Consider breaking up {len(long_sentences)} long sentence(s) for clarity"
)
# Check for examples that could be condensed
if original.count("example") > 2 or original.count("e.g.") > 2:
suggestions.append(
"Multiple examples detected - consider using a single representative example"
)
# Check for bullet points that could be condensed
bullet_count = len(re.findall(r"^\s*[-*•]\s", original, re.MULTILINE))
if bullet_count > 10:
suggestions.append(
f"{bullet_count} bullet points detected - consider grouping or summarizing"
)
return suggestions
def estimate_tokens(
self,
text: str,
model: str = "gpt-4",
expected_output_ratio: float = 1.0
) -> TokenEstimate:
"""
Estimate token count and costs.
Args:
text: Text to estimate
model: Model for estimation
expected_output_ratio: Expected output/input ratio
Returns:
TokenEstimate with counts and costs
"""
token_count = count_tokens(text, model)
expected_output = int(token_count * expected_output_ratio)
# Pricing per 1M tokens
pricing = {
"gpt-4o": (2.50, 10.00),
"gpt-4o-mini": (0.15, 0.60),
"gpt-4-turbo": (10.00, 30.00),
"gpt-4": (30.00, 60.00),
"gpt-3.5-turbo": (0.50, 1.50),
"claude-3-5-sonnet": (3.00, 15.00),
"claude-3-5-haiku": (0.80, 4.00),
"claude-3-opus": (15.00, 75.00),
}
input_price, output_price = pricing.get(model, (1.00, 2.00))
return TokenEstimate(
text=text[:100] + "..." if len(text) > 100 else text,
token_count=token_count,
model=model,
char_count=len(text),
word_count=len(text.split()),
estimated_cost_input=(token_count / 1_000_000) * input_price,
estimated_cost_output=(expected_output / 1_000_000) * output_price
)
def suggest_optimizations(self, text: str) -> List[Dict[str, Any]]:
"""
Analyze text and suggest specific optimizations.
Args:
text: Text to analyze
Returns:
List of optimization suggestions with details
"""
suggestions = []
# Check whitespace issues
whitespace_matches = len(re.findall(r" +", text))
if whitespace_matches > 0:
suggestions.append({
"type": OptimizationType.WHITESPACE.value,
"description": f"Found {whitespace_matches} instances of multiple spaces",
"potential_savings": "low",
"auto_fixable": True
})
# Check verbose phrases
verbose_count = 0
for pattern, _ in self.verbose_compiled:
verbose_count += len(pattern.findall(text))
if verbose_count > 0:
suggestions.append({
"type": OptimizationType.VERBOSITY.value,
"description": f"Found {verbose_count} verbose phrases that can be simplified",
"potential_savings": "medium",
"auto_fixable": True
})
# Check redundant phrases
redundant_count = 0
for pattern in self.redundant_compiled:
redundant_count += len(pattern.findall(text))
if redundant_count > 0:
suggestions.append({
"type": OptimizationType.REDUNDANCY.value,
"description": f"Found {redundant_count} redundant instructional phrases",
"potential_savings": "medium",
"auto_fixable": True
})
# Check for potential compression
token_count = count_tokens(text)
if token_count > 1000:
suggestions.append({
"type": OptimizationType.COMPRESSION.value,
"description": f"Long prompt ({token_count} tokens) - consider summarization",
"potential_savings": "high",
"auto_fixable": False
})
return suggestions
def batch_optimize(
self,
texts: List[str],
model: str = "gpt-4"
) -> List[OptimizationResult]:
"""Optimize multiple texts."""
return [self.optimize(text, model) for text in texts]