Observer/conversation_test.py at main · SyncForMe/Observer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#!/usr/bin/env python3
import requests
import json
import time
import os
import sys
from dotenv import load_dotenv

# Load environment variables from frontend/.env
load_dotenv('/app/frontend/.env')

# Get the backend URL from environment variables
BACKEND_URL = os.environ.get('REACT_APP_BACKEND_URL')
if not BACKEND_URL:
    print("Error: REACT_APP_BACKEND_URL not found in environment variables")
    sys.exit(1)

# Ensure the URL ends with /api
API_URL = f"{BACKEND_URL}/api"
print(f"Using API URL: {API_URL}")

def run_test(test_name, endpoint, method="GET", data=None):
    """Run a test against the specified endpoint"""
    url = f"{API_URL}{endpoint}"
    print(f"\n{'='*80}\nTesting: {test_name} ({method} {url})")

    try:
        if method == "GET":
            response = requests.get(url)
        elif method == "POST":
            response = requests.post(url, json=data)
        elif method == "DELETE":
            response = requests.delete(url)
        else:
            print(f"Unsupported method: {method}")
            return None

        # Print response details
        print(f"Status Code: {response.status_code}")

        # Check if response is JSON
        try:
            response_data = response.json()
            print(f"Response: {json.dumps(response_data, indent=2)}")
            return response_data
        except json.JSONDecodeError:
            print(f"Response is not JSON: {response.text}")
            return None

    except Exception as e:
        print(f"Error during test: {e}")
        return None

def main():
    """Investigate conversation generation and auto-mode issues"""
    print("Starting investigation of conversation generation and auto-mode issues...")

    # 1. Check current simulation state
    print("\nChecking current simulation state...")
    state_data = run_test("Get Simulation State", "/simulation/state")

    if state_data:
        print("\nCurrent simulation state:")
        print(f"  - Day: {state_data.get('current_day')}")
        print(f"  - Time Period: {state_data.get('current_time_period')}")
        print(f"  - Active: {state_data.get('is_active')}")
        print(f"  - Auto Conversations: {state_data.get('auto_conversations', False)}")
        print(f"  - Auto Time: {state_data.get('auto_time', False)}")
        print(f"  - Conversation Interval: {state_data.get('conversation_interval')}")
        print(f"  - Time Interval: {state_data.get('time_interval')}")

    # 2. Check API usage
    print("\nChecking API usage...")
    api_usage_data = run_test("Get API Usage", "/api-usage")

    if api_usage_data:
        print("\nAPI Usage Status:")
        print(f"  - Requests Used: {api_usage_data.get('requests_used')}/{api_usage_data.get('max_requests')}")
        print(f"  - Remaining: {api_usage_data.get('remaining')}")
        print(f"  - API Available: {api_usage_data.get('api_available')}")
        if api_usage_data.get('note'):
            print(f"  - Note: {api_usage_data.get('note')}")

    # 3. Test manual conversation generation
    print("\nTesting manual conversation generation...")
    conv_data = run_test("Generate Conversation", "/conversation/generate", method="POST")

    if conv_data:
        messages = conv_data.get("messages", [])
        if len(messages) < 1:
            print("No messages generated in the conversation")
        else:
            print("\nVerifying conversation quality:")

            # Check for generic fallback patterns
            generic_patterns = [
                "is analyzing",
                "is questioning",
                "is taking a moment",
                "is carefully considering",
                "nods thoughtfully",
                "API limit reached"
            ]

            all_valid = True
            for i, msg in enumerate(messages):
                agent_name = msg.get("agent_name", "Unknown")
                message_text = msg.get("message", "")

                # Check if message is a generic fallback
                is_generic = False
                for pattern in generic_patterns:
                    if pattern in message_text:
                        is_generic = True
                        break

                # Check if message is too short
                is_too_short = len(message_text) < 10

                if is_generic or is_too_short:
                    print(f"  ❌ {agent_name}: '{message_text}' (Generic fallback or too short)")
                    all_valid = False
                else:
                    print(f"  ✅ {agent_name}: '{message_text}'")

            if all_valid:
                print("\nSuccess: All agent responses are actual dialogue, not generic fallbacks")
            else:
                print("\nWarning: Some agent responses are generic fallbacks, which may indicate API quota issues")

    # 4. Test observer input
    print("\nTesting observer input functionality...")
    observer_data = {
        "observer_message": "What progress have you made on the project so far?"
    }
    observer_result = run_test("Send Observer Message", "/observer/send-message", method="POST", data=observer_data)

    if observer_result:
        agent_responses = observer_result.get("agent_responses", {}).get("messages", [])
        if len(agent_responses) < 1:
            print("No agent responses to observer message")
        else:
            print("\nVerifying agent responses to observer:")

            # Check for generic fallback patterns
            generic_patterns = [
                "is analyzing",
                "is questioning",
                "is taking a moment",
                "is carefully considering",
                "nods thoughtfully",
                "API limit reached"
            ]

            all_valid = True
            for i, msg in enumerate(agent_responses):
                agent_name = msg.get("agent_name", "Unknown")
                message_text = msg.get("message", "")

                # Check if message is a generic fallback
                is_generic = False
                for pattern in generic_patterns:
                    if pattern in message_text:
                        is_generic = True
                        break

                # Check if message is too short
                is_too_short = len(message_text) < 10

                if is_generic or is_too_short:
                    print(f"  ❌ {agent_name}: '{message_text}' (Generic fallback or too short)")
                    all_valid = False
                else:
                    print(f"  ✅ {agent_name}: '{message_text}'")

            if all_valid:
                print("\nSuccess: All agent responses to observer are actual dialogue, not generic fallbacks")
            else:
                print("\nWarning: Some agent responses to observer are generic fallbacks, which may indicate API quota issues")

    # 5. Toggle auto-mode on
    print("\nTesting auto-mode toggle functionality...")
    auto_mode_data = {
        "auto_conversations": True,
        "auto_time": True,
        "conversation_interval": 15,
        "time_interval": 45
    }
    auto_result = run_test("Toggle Auto Mode On", "/simulation/toggle-auto-mode", method="POST", data=auto_mode_data)

    if auto_result:
        print("\nAuto mode settings:")
        print(f"  - Auto Conversations: {auto_result.get('auto_conversations')}")
        print(f"  - Auto Time: {auto_result.get('auto_time')}")
        print(f"  - Conversation Interval: {auto_result.get('conversation_interval')} seconds")
        print(f"  - Time Interval: {auto_result.get('time_interval')} seconds")

        # Verify settings were saved in simulation state
        state_after_data = run_test("Verify Auto Mode Settings", "/simulation/state")

        if state_after_data:
            auto_conversations = state_after_data.get('auto_conversations')
            auto_time = state_after_data.get('auto_time')
            conversation_interval = state_after_data.get('conversation_interval')
            time_interval = state_after_data.get('time_interval')

            print("\nAuto mode settings in simulation state:")
            print(f"  - Auto Conversations: {auto_conversations}")
            print(f"  - Auto Time: {auto_time}")
            print(f"  - Conversation Interval: {conversation_interval}")
            print(f"  - Time Interval: {time_interval}")

            # Verify settings match what we sent
            settings_match = (
                auto_conversations == auto_mode_data["auto_conversations"] and
                auto_time == auto_mode_data["auto_time"] and
                conversation_interval == auto_mode_data["conversation_interval"] and
                time_interval == auto_mode_data["time_interval"]
            )

            if settings_match:
                print("✅ Auto mode settings correctly saved in simulation state")
            else:
                print("❌ Auto mode settings in simulation state don't match requested values")

    # 6. Get recent conversations to check history
    print("\nChecking recent conversation history...")
    conversations_data = run_test("Get Conversations", "/conversations")

    if conversations_data and isinstance(conversations_data, list):
        print(f"\nFound {len(conversations_data)} conversations in history")

        if len(conversations_data) > 0:
            # Analyze the last few conversations to see if they show a pattern
            recent_convs = conversations_data[-5:] if len(conversations_data) >= 5 else conversations_data

            print("\nAnalyzing recent conversations for patterns:")
            for i, conv in enumerate(recent_convs):
                day_period = conv.get("time_period", "Unknown")
                messages = conv.get("messages", [])

                print(f"\nConversation {len(conversations_data)-len(recent_convs)+i+1}/{len(conversations_data)} - {day_period}:")

                # Check for generic fallback patterns
                generic_count = 0
                for msg in messages:
                    agent_name = msg.get("agent_name", "Unknown")
                    message_text = msg.get("message", "")

                    # Check if message is a generic fallback
                    is_generic = False
                    for pattern in ["is analyzing", "is questioning", "is taking a moment", "is carefully considering", "nods thoughtfully", "API limit reached"]:
                        if pattern in message_text:
                            is_generic = True
                            break

                    if is_generic or len(message_text) < 10:
                        generic_count += 1
                        print(f"  ❌ {agent_name}: '{message_text}' (Generic)")
                    else:
                        print(f"  ✅ {agent_name}: '{message_text[:50]}...' (Authentic)")

                generic_percentage = (generic_count / len(messages)) * 100 if messages else 0
                print(f"  Generic responses: {generic_count}/{len(messages)} ({generic_percentage:.1f}%)")

    # 7. Toggle auto-mode off
    print("\nTurning auto-mode off...")
    auto_mode_off_data = {
        "auto_conversations": False,
        "auto_time": False,
        "conversation_interval": 10,
        "time_interval": 30
    }
    auto_off_result = run_test("Toggle Auto Mode Off", "/simulation/toggle-auto-mode", method="POST", data=auto_mode_off_data)

    # Provide analysis of the issue
    print("\n" + "="*80)
    print("ISSUE ANALYSIS")
    print("="*80)

    if api_usage_data:
        if api_usage_data.get('api_available') == "quota_exceeded":
            print("CRITICAL ISSUE: API quota has been exceeded. This is preventing the generation of authentic conversations.")
            print("The system is falling back to generic responses when API limits are reached.")
            print("Recommendation: Wait for the API quota to reset (typically daily) or upgrade the API plan.")
        elif api_usage_data.get('requests_used', 0) > 900:
            print("WARNING: API usage is approaching the limit (>900 requests). This may cause intermittent failures.")
            print("The system may start falling back to generic responses as the limit is approached.")
            print("Recommendation: Monitor API usage and consider reducing usage or upgrading the API plan.")

    if state_data and not state_data.get('auto_conversations', False):
        print("ISSUE: Auto-conversations mode is currently disabled. This explains why conversations aren't generating automatically.")
        print("Recommendation: Enable auto-conversations mode using the toggle-auto-mode endpoint.")

    if conv_data and any("API limit reached" in msg.get("message", "") for msg in conv_data.get("messages", [])):
        print("ISSUE: Some conversation messages indicate API limits have been reached.")
        print("This is causing the system to use fallback responses instead of generating authentic dialogue.")
        print("Recommendation: Wait for API quota to reset or upgrade the API plan.")

    print("\nSUMMARY OF FINDINGS:")
    if api_usage_data:
        print(f"- API Usage: {api_usage_data.get('requests_used')}/{api_usage_data.get('max_requests')} requests used")
        print(f"- API Status: {'Available' if api_usage_data.get('api_available') == True else 'Unavailable or Limited'}")

    if state_data:
        print(f"- Auto Conversations: {'Enabled' if state_data.get('auto_conversations', False) else 'Disabled'}")
        print(f"- Auto Time Progression: {'Enabled' if state_data.get('auto_time', False) else 'Disabled'}")

    if conversations_data and isinstance(conversations_data, list) and len(conversations_data) > 0:
        recent_convs = conversations_data[-5:] if len(conversations_data) >= 5 else conversations_data
        generic_msgs = sum(1 for conv in recent_convs for msg in conv.get("messages", [])
                         if any(pattern in msg.get("message", "")
                               for pattern in ["is analyzing", "is questioning", "is taking a moment",
                                              "is carefully considering", "nods thoughtfully", "API limit reached"]))
        total_msgs = sum(len(conv.get("messages", [])) for conv in recent_convs)
        generic_percentage = (generic_msgs / total_msgs) * 100 if total_msgs else 0

        print(f"- Recent Conversations Quality: {generic_percentage:.1f}% generic responses")

if __name__ == "__main__":
    main()