api-optimizer/optimizer.py at main · Malli88/api-optimizer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446

"""Prompt optimization utilities for reducing token usage."""

import re
from typing import Any, Dict, List, Optional, Tuple
from dataclasses import dataclass, field
from enum import Enum

from .utils import count_tokens


class OptimizationType(Enum):
    """Types of optimizations."""
    WHITESPACE = "whitespace"
    REDUNDANCY = "redundancy"
    VERBOSITY = "verbosity"
    FORMATTING = "formatting"
    COMPRESSION = "compression"


@dataclass
class OptimizationResult:
    """Result of prompt optimization."""
    original_text: str
    optimized_text: str
    original_tokens: int
    optimized_tokens: int
    tokens_saved: int
    savings_percent: float
    optimizations_applied: List[str] = field(default_factory=list)
    suggestions: List[str] = field(default_factory=list)


@dataclass
class TokenEstimate:
    """Token count estimate."""
    text: str
    token_count: int
    model: str
    char_count: int
    word_count: int
    estimated_cost_input: float
    estimated_cost_output: float


class PromptOptimizer:
    """Optimizes prompts to reduce token usage."""

    # Common verbose phrases and their concise alternatives
    VERBOSE_PATTERNS = [
        (r"in order to", "to"),
        (r"due to the fact that", "because"),
        (r"at this point in time", "now"),
        (r"in the event that", "if"),
        (r"for the purpose of", "to"),
        (r"with regard to", "about"),
        (r"in spite of the fact that", "although"),
        (r"has the ability to", "can"),
        (r"is able to", "can"),
        (r"make a decision", "decide"),
        (r"take into consideration", "consider"),
        (r"give consideration to", "consider"),
        (r"prior to", "before"),
        (r"subsequent to", "after"),
        (r"in close proximity to", "near"),
        (r"a large number of", "many"),
        (r"a small number of", "few"),
        (r"the majority of", "most"),
        (r"at the present time", "now"),
        (r"in today's day and age", "today"),
        (r"each and every", "each"),
        (r"first and foremost", "first"),
        (r"various different", "various"),
        (r"completely eliminate", "eliminate"),
        (r"absolutely essential", "essential"),
        (r"basic fundamentals", "fundamentals"),
        (r"past history", "history"),
        (r"future plans", "plans"),
        (r"end result", "result"),
        (r"final outcome", "outcome"),
    ]

    # Redundant instruction patterns
    REDUNDANT_PATTERNS = [
        r"please\s+",
        r"kindly\s+",
        r"I would like you to\s+",
        r"Could you please\s+",
        r"Would you mind\s+",
        r"I want you to\s+",
        r"I need you to\s+",
        r"Your task is to\s+",
        r"You should\s+",
        r"You must\s+",
        r"Make sure to\s+",
        r"Be sure to\s+",
        r"Remember to\s+",
        r"Don't forget to\s+",
    ]

    def __init__(
        self,
        aggressive: bool = False,
        preserve_formatting: bool = True,
        min_savings_threshold: float = 0.05
    ):
        """
        Initialize the optimizer.

        Args:
            aggressive: Apply more aggressive optimizations
            preserve_formatting: Keep markdown/code formatting
            min_savings_threshold: Minimum savings to apply optimization
        """
        self.aggressive = aggressive
        self.preserve_formatting = preserve_formatting
        self.min_savings_threshold = min_savings_threshold

        # Compile patterns
        self.verbose_compiled = [
            (re.compile(p, re.IGNORECASE), r)
            for p, r in self.VERBOSE_PATTERNS
        ]
        self.redundant_compiled = [
            re.compile(p, re.IGNORECASE)
            for p in self.REDUNDANT_PATTERNS
        ]

    def optimize(
        self,
        text: str,
        model: str = "gpt-4",
        target_tokens: Optional[int] = None
    ) -> OptimizationResult:
        """
        Optimize a prompt to reduce token usage.

        Args:
            text: The prompt text to optimize
            model: Model for token counting
            target_tokens: Optional target token count

        Returns:
            OptimizationResult with optimized text and stats
        """
        original_tokens = count_tokens(text, model)
        optimized = text
        applied = []
        suggestions = []

        # Extract and preserve code blocks if needed
        code_blocks = []
        if self.preserve_formatting:
            code_pattern = re.compile(r"```[\s\S]*?```|`[^`]+`")
            code_blocks = code_pattern.findall(optimized)
            for i, block in enumerate(code_blocks):
                optimized = optimized.replace(block, f"__CODE_BLOCK_{i}__")

        # 1. Normalize whitespace
        original_len = len(optimized)
        optimized = self._normalize_whitespace(optimized)
        if len(optimized) < original_len:
            applied.append("whitespace_normalization")

        # 2. Remove redundant phrases
        original_len = len(optimized)
        optimized = self._remove_redundant_phrases(optimized)
        if len(optimized) < original_len:
            applied.append("redundancy_removal")

        # 3. Replace verbose phrases
        original_len = len(optimized)
        optimized = self._replace_verbose_phrases(optimized)
        if len(optimized) < original_len:
            applied.append("verbosity_reduction")

        # 4. Aggressive optimizations if enabled
        if self.aggressive:
            original_len = len(optimized)
            optimized = self._aggressive_optimize(optimized)
            if len(optimized) < original_len:
                applied.append("aggressive_compression")

        # 5. If target specified and not met, try compression
        if target_tokens:
            current_tokens = count_tokens(optimized, model)
            if current_tokens > target_tokens:
                optimized = self._compress_to_target(optimized, target_tokens, model)
                applied.append("target_compression")

        # Restore code blocks
        if self.preserve_formatting:
            for i, block in enumerate(code_blocks):
                optimized = optimized.replace(f"__CODE_BLOCK_{i}__", block)

        # Calculate results
        optimized_tokens = count_tokens(optimized, model)
        tokens_saved = original_tokens - optimized_tokens
        savings_percent = (tokens_saved / original_tokens * 100) if original_tokens > 0 else 0

        # Generate suggestions
        suggestions = self._generate_suggestions(text, optimized)

        return OptimizationResult(
            original_text=text,
            optimized_text=optimized,
            original_tokens=original_tokens,
            optimized_tokens=optimized_tokens,
            tokens_saved=tokens_saved,
            savings_percent=savings_percent,
            optimizations_applied=applied,
            suggestions=suggestions
        )

    def _normalize_whitespace(self, text: str) -> str:
        """Normalize whitespace in text."""
        # Replace multiple spaces with single space
        text = re.sub(r" +", " ", text)
        # Replace multiple newlines with double newline
        text = re.sub(r"\n{3,}", "\n\n", text)
        # Remove trailing whitespace on lines
        text = re.sub(r" +\n", "\n", text)
        # Remove leading whitespace on lines (except indentation)
        text = re.sub(r"\n +(?=[^\s])", "\n", text)
        return text.strip()

    def _remove_redundant_phrases(self, text: str) -> str:
        """Remove redundant instructional phrases."""
        for pattern in self.redundant_compiled:
            text = pattern.sub("", text)
        return text

    def _replace_verbose_phrases(self, text: str) -> str:
        """Replace verbose phrases with concise alternatives."""
        for pattern, replacement in self.verbose_compiled:
            text = pattern.sub(replacement, text)
        return text

    def _aggressive_optimize(self, text: str) -> str:
        """Apply aggressive optimizations."""
        # Remove articles where possible
        text = re.sub(r"(the|a|an)\s+(?=\w)", "", text, flags=re.IGNORECASE)
        # Remove filler words
        fillers = ["just", "really", "very", "quite", "rather", "somewhat", "actually", "basically"]
        for filler in fillers:
            text = re.sub(rf"{filler}\s+", "", text, flags=re.IGNORECASE)
        return text

    def _compress_to_target(
        self,
        text: str,
        target_tokens: int,
        model: str
    ) -> str:
        """Compress text to meet target token count."""
        current_tokens = count_tokens(text, model)

        if current_tokens <= target_tokens:
            return text

        # Split into sentences
        sentences = re.split(r"(?<=[.!?])\s+", text)

        if len(sentences) <= 1:
            # Can't split further, truncate
            ratio = target_tokens / current_tokens
            return text[:int(len(text) * ratio)]

        # Score sentences by importance (simple heuristic)
        scored = []
        for i, sent in enumerate(sentences):
            score = 0
            # First and last sentences are important
            if i == 0 or i == len(sentences) - 1:
                score += 2
            # Sentences with key terms
            if re.search(r"(must|should|important|key|main|critical)", sent, re.IGNORECASE):
                score += 1
            # Shorter sentences preferred for compression
            score -= len(sent) / 1000
            scored.append((score, sent))

        # Sort by score and rebuild
        scored.sort(reverse=True)
        result = []
        result_tokens = 0

        for score, sent in scored:
            sent_tokens = count_tokens(sent, model)
            if result_tokens + sent_tokens <= target_tokens:
                result.append(sent)
                result_tokens += sent_tokens

        # Restore original order
        result_ordered = [s for s in sentences if s in result]
        return " ".join(result_ordered)

    def _generate_suggestions(self, original: str, optimized: str) -> List[str]:
        """Generate optimization suggestions."""
        suggestions = []

        # Check for repeated content
        words = original.lower().split()
        word_counts = {}
        for word in words:
            if len(word) > 4:
                word_counts[word] = word_counts.get(word, 0) + 1

        repeated = [w for w, c in word_counts.items() if c > 3]
        if repeated:
            suggestions.append(
                f"Consider reducing repetition of: {', '.join(repeated[:5])}"
            )

        # Check for long sentences
        sentences = re.split(r"(?<=[.!?])\s+", original)
        long_sentences = [s for s in sentences if len(s.split()) > 40]
        if long_sentences:
            suggestions.append(
                f"Consider breaking up {len(long_sentences)} long sentence(s) for clarity"
            )

        # Check for examples that could be condensed
        if original.count("example") > 2 or original.count("e.g.") > 2:
            suggestions.append(
                "Multiple examples detected - consider using a single representative example"
            )

        # Check for bullet points that could be condensed
        bullet_count = len(re.findall(r"^\s*[-*•]\s", original, re.MULTILINE))
        if bullet_count > 10:
            suggestions.append(
                f"{bullet_count} bullet points detected - consider grouping or summarizing"
            )

        return suggestions

    def estimate_tokens(
        self,
        text: str,
        model: str = "gpt-4",
        expected_output_ratio: float = 1.0
    ) -> TokenEstimate:
        """
        Estimate token count and costs.

        Args:
            text: Text to estimate
            model: Model for estimation
            expected_output_ratio: Expected output/input ratio

        Returns:
            TokenEstimate with counts and costs
        """
        token_count = count_tokens(text, model)
        expected_output = int(token_count * expected_output_ratio)

        # Pricing per 1M tokens
        pricing = {
            "gpt-4o": (2.50, 10.00),
            "gpt-4o-mini": (0.15, 0.60),
            "gpt-4-turbo": (10.00, 30.00),
            "gpt-4": (30.00, 60.00),
            "gpt-3.5-turbo": (0.50, 1.50),
            "claude-3-5-sonnet": (3.00, 15.00),
            "claude-3-5-haiku": (0.80, 4.00),
            "claude-3-opus": (15.00, 75.00),
        }

        input_price, output_price = pricing.get(model, (1.00, 2.00))

        return TokenEstimate(
            text=text[:100] + "..." if len(text) > 100 else text,
            token_count=token_count,
            model=model,
            char_count=len(text),
            word_count=len(text.split()),
            estimated_cost_input=(token_count / 1_000_000) * input_price,
            estimated_cost_output=(expected_output / 1_000_000) * output_price
        )

    def suggest_optimizations(self, text: str) -> List[Dict[str, Any]]:
        """
        Analyze text and suggest specific optimizations.

        Args:
            text: Text to analyze

        Returns:
            List of optimization suggestions with details
        """
        suggestions = []

        # Check whitespace issues
        whitespace_matches = len(re.findall(r"  +", text))
        if whitespace_matches > 0:
            suggestions.append({
                "type": OptimizationType.WHITESPACE.value,
                "description": f"Found {whitespace_matches} instances of multiple spaces",
                "potential_savings": "low",
                "auto_fixable": True
            })

        # Check verbose phrases
        verbose_count = 0
        for pattern, _ in self.verbose_compiled:
            verbose_count += len(pattern.findall(text))
        if verbose_count > 0:
            suggestions.append({
                "type": OptimizationType.VERBOSITY.value,
                "description": f"Found {verbose_count} verbose phrases that can be simplified",
                "potential_savings": "medium",
                "auto_fixable": True
            })

        # Check redundant phrases
        redundant_count = 0
        for pattern in self.redundant_compiled:
            redundant_count += len(pattern.findall(text))
        if redundant_count > 0:
            suggestions.append({
                "type": OptimizationType.REDUNDANCY.value,
                "description": f"Found {redundant_count} redundant instructional phrases",
                "potential_savings": "medium",
                "auto_fixable": True
            })

        # Check for potential compression
        token_count = count_tokens(text)
        if token_count > 1000:
            suggestions.append({
                "type": OptimizationType.COMPRESSION.value,
                "description": f"Long prompt ({token_count} tokens) - consider summarization",
                "potential_savings": "high",
                "auto_fixable": False
            })

        return suggestions

    def batch_optimize(
        self,
        texts: List[str],
        model: str = "gpt-4"
    ) -> List[OptimizationResult]:
        """Optimize multiple texts."""
        return [self.optimize(text, model) for text in texts]