cascadeflow/examples/rate_limiting_usage.py at main · lemony-ai/cascadeflow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
"""
Example: Rate Limiting with cascadeflow

This example demonstrates per-user and per-tier rate limiting
with the sliding window algorithm.
"""

import asyncio

from cascadeflow import (
    CascadeAgent,
    RateLimiter,
    TierLevel,
    UserProfile,
)


async def main():
    print("=" * 60)
    print("cascadeflow - Rate Limiting")
    print("=" * 60)

    # ========================================================================
    # Example 1: Basic rate limiting with FREE tier
    # ========================================================================
    print("\n1. FREE tier rate limiting (10 req/hour, 100 req/day)")
    print("-" * 60)

    # Create FREE tier profile
    free_profile = UserProfile.from_tier(TierLevel.FREE, user_id="free_user")
    free_profile.preferred_models = ["gpt-4o-mini"]
    print(f"Tier: {free_profile.tier.name}")
    print(f"Hourly limit: {free_profile.get_requests_per_hour()}")
    print(f"Daily limit: {free_profile.get_requests_per_day()}")
    print(f"Daily budget: ${free_profile.get_daily_budget()}")

    # Initialize rate limiter
    limiter = RateLimiter()

    # Create agent
    agent = CascadeAgent.from_profile(free_profile)

    # Make some requests
    print("\nMaking 5 requests...")
    for i in range(5):
        # Check rate limit before making request
        allowed, reason = await limiter.check_rate_limit(free_profile)

        if not allowed:
            print(f"  Request {i+1}: BLOCKED - {reason}")
            continue

        # Make request
        result = await agent.run(f"What is {i+1} + {i+1}?")

        # Record the request
        await limiter.record_request(free_profile, cost=result.total_cost)

        print(f"  Request {i+1}: OK - Cost: ${result.total_cost:.6f}")

    # Check usage stats
    stats = await limiter.get_usage_stats(free_profile)
    print("\nUsage stats:")
    print(f"  Hourly: {stats['hourly_requests']}/{stats['hourly_limit']}")
    print(f"  Daily: {stats['daily_requests']}/{stats['daily_limit']}")
    print(f"  Cost: ${stats['daily_cost']:.6f}/${stats['daily_budget']}")

    # ========================================================================
    # Example 2: Rate limit enforcement
    # ========================================================================
    print("\n2. Rate limit enforcement demo")
    print("-" * 60)

    # Create profile with very low limits
    test_profile = UserProfile.from_tier(
        TierLevel.FREE,
        user_id="test_user",
        custom_requests_per_hour=3,  # Only 3 requests per hour
        custom_daily_budget=0.01,  # Very low budget
    )
    test_profile.preferred_models = ["gpt-4o-mini"]

    print(
        f"Custom limits: {test_profile.get_requests_per_hour()} req/hour, ${test_profile.get_daily_budget()} budget"
    )

    # Try to exceed hourly limit
    print("\nAttempting 5 requests (limit is 3)...")
    request_count = 0
    blocked_count = 0

    for i in range(5):
        allowed, reason = await limiter.check_rate_limit(test_profile)

        if not allowed:
            print(f"  Request {i+1}: BLOCKED - {reason}")
            blocked_count += 1
            continue

        result = await agent.run(f"Simple test {i+1}")
        await limiter.record_request(test_profile, cost=result.total_cost)
        request_count += 1
        print(f"  Request {i+1}: OK")

    print(f"\n✓ Processed: {request_count}, Blocked: {blocked_count}")

    # ========================================================================
    # Example 3: PRO tier with higher limits
    # ========================================================================
    print("\n3. PRO tier with higher limits")
    print("-" * 60)

    pro_profile = UserProfile.from_tier(TierLevel.PRO, user_id="pro_user")
    pro_profile.preferred_models = ["gpt-4o-mini"]
    print(f"Tier: {pro_profile.tier.name}")
    print(f"Hourly limit: {pro_profile.get_requests_per_hour()}")
    print(f"Daily limit: {pro_profile.get_requests_per_day()}")
    print(f"Daily budget: ${pro_profile.get_daily_budget()}")

    # PRO users can make many more requests
    print("\nMaking 10 rapid requests...")
    for i in range(10):
        allowed, reason = await limiter.check_rate_limit(pro_profile)
        if allowed:
            result = await agent.run(f"Quick query {i+1}")
            await limiter.record_request(pro_profile, cost=result.total_cost)
            print(f"  Request {i+1}: OK")
        else:
            print(f"  Request {i+1}: BLOCKED")

    stats = await limiter.get_usage_stats(pro_profile)
    print("\nPRO user usage:")
    print(
        f"  Hourly: {stats['hourly_requests']}/{stats['hourly_limit']} ({stats['hourly_remaining']} remaining)"
    )
    print(
        f"  Daily: {stats['daily_requests']}/{stats['daily_limit']} ({stats['daily_remaining']} remaining)"
    )
    print(
        f"  Budget: ${stats['daily_cost']:.6f}/${stats['daily_budget']} (${stats['budget_remaining']:.4f} remaining)"
    )

    # ========================================================================
    # Example 4: Budget-based rate limiting
    # ========================================================================
    print("\n4. Budget-based rate limiting")
    print("-" * 60)

    budget_profile = UserProfile.from_tier(
        TierLevel.FREE,
        user_id="budget_user",
        custom_daily_budget=0.05,  # $0.05 daily budget
    )
    budget_profile.preferred_models = ["gpt-4o-mini"]

    print(f"Daily budget: ${budget_profile.get_daily_budget()}")

    # Simulate requests until budget is exceeded
    print("\nMaking requests until budget exceeded...")
    total_cost = 0.0
    request_num = 0

    while True:
        # Check with estimated cost
        allowed, reason = await limiter.check_rate_limit(budget_profile, cost=0.01)

        if not allowed:
            print(f"\n{reason}")
            break

        result = await agent.run(f"Budget test {request_num+1}")
        await limiter.record_request(budget_profile, cost=result.total_cost)

        request_num += 1
        total_cost += result.total_cost
        print(f"  Request {request_num}: ${result.total_cost:.6f} (total: ${total_cost:.6f})")

        if request_num >= 20:  # Safety limit
            break

    print(f"\n✓ Completed {request_num} requests before hitting budget limit")

    # ========================================================================
    # Example 5: Comparing tier limits
    # ========================================================================
    print("\n5. Comparing tier limits")
    print("-" * 60)

    tiers = [TierLevel.FREE, TierLevel.STARTER, TierLevel.PRO, TierLevel.BUSINESS]

    print(f"{'Tier':<12} {'Req/Hour':<12} {'Req/Day':<12} {'Daily Budget':<15}")
    print("-" * 60)

    for tier in tiers:
        profile = UserProfile.from_tier(tier, user_id=f"{tier.value}_user")
        req_hour = profile.get_requests_per_hour() or "Unlimited"
        req_day = profile.get_requests_per_day() or "Unlimited"
        budget = f"${profile.get_daily_budget()}" if profile.get_daily_budget() else "Unlimited"

        print(f"{tier.value:<12} {str(req_hour):<12} {str(req_day):<12} {budget:<15}")

    print("\n" + "=" * 60)
    print("Rate limiting examples completed!")
    print("=" * 60)


if __name__ == "__main__":
    asyncio.run(main())