-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
161 lines (130 loc) · 5.06 KB
/
server.py
File metadata and controls
161 lines (130 loc) · 5.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from flask import Flask, jsonify, request, send_from_directory
import os
import sys
import datetime
import traceback
import requests
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from benchmark_system.runner import (
generate_clock,
evaluate_clock,
calculate_score,
call_openrouter,
JUDGE_PROMPT_TEMPLATE,
JUDGE_SPEC,
PROMPT
)
app = Flask(__name__, static_folder=None)
LOG_FILE = "log.txt"
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
def fetch_openrouter_models():
if not OPENROUTER_API_KEY:
return []
try:
response = requests.get("https://openrouter.ai/api/v1/models", headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}"
})
response.raise_for_status()
models = response.json()["data"]
return sorted([m["id"] for m in models])
except Exception as e:
print(f"Warning: Could not fetch dynamic model list: {e}")
return [
"anthropic/claude-3.5-sonnet",
"google/gemini-pro-1.5",
"openai/gpt-4o",
"meta-llama/llama-3.1-405b-instruct",
"deepseek/deepseek-chat"
]
def log(message):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
entry = f"[{timestamp}] {message}"
print(entry)
with open(LOG_FILE, "a") as f:
f.write(entry + "\n")
@app.route("/api/models", methods=["GET"])
def get_models():
try:
models = fetch_openrouter_models()
return jsonify({"models": models})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/api/benchmark", methods=["POST"])
def run_single_benchmark():
data = request.get_json()
model = data.get("model")
judge_model = data.get("judge_model")
if not model:
return jsonify({"error": "model is required"}), 400
if not judge_model:
return jsonify({"error": "judge_model is required"}), 400
log(f"Adding model: {model}")
try:
log("Generating clock...")
html_content = generate_clock(model)
if not html_content:
log("ERROR: Clock generation failed")
return jsonify({"error": "Clock generation failed"}), 500
safe_model_name = model.replace("/", "_").replace(":", "_")
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = os.path.join("runs", timestamp)
os.makedirs(run_dir, exist_ok=True)
file_path = os.path.join(run_dir, f"{safe_model_name}.html")
with open(file_path, "w") as f:
f.write(html_content)
log(f"Clock saved to {file_path}")
log(f"Evaluating with judge: {judge_model}")
audit_data = evaluate_clock(judge_model, html_content)
if not audit_data:
log("ERROR: Evaluation failed - see judge_response.txt for details")
judge_prompt = f"""You are a deterministic code auditor. Your task is to audit the provided HTML/JS code for an analog clock based on the following specification:
{JUDGE_SPEC}
Analyze the code and return ONLY a valid JSON object following the "Audit JSON" schema defined in the specification.
Do not include any markdown formatting, preamble, or explanation. Just the raw JSON.
CODE TO AUDIT:
{html_content}
"""
messages = [{"role": "user", "content": judge_prompt}]
response = call_openrouter(judge_model, messages)
content = response['choices'][0]['message']['content'].strip()
with open("judge_response.txt", "w") as f:
f.write(f"MODEL: {model}\n")
f.write(f"JUDGE: {judge_model}\n\n")
f.write("RESPONSE:\n")
f.write(content)
return jsonify({"error": "Evaluation failed"}), 500
final_score, breakdown = calculate_score(audit_data)
log(f"Score: {final_score} | Time:{breakdown['time']} Visual:{breakdown['visual']} Dial:{breakdown['dial']} Code:{breakdown['code']} Motion:{breakdown['motion']}")
result = {
"model": model,
"judge_model": judge_model,
"timestamp": timestamp,
"file": file_path,
"score": final_score,
"breakdown": breakdown,
"audit": audit_data
}
summary_path = os.path.join(run_dir, "summary.json")
with open(summary_path, "w") as f:
import json
json.dump(result, f, indent=2)
return jsonify(result)
except Exception as e:
log(f"ERROR: {str(e)}")
traceback.print_exc()
return jsonify({"error": str(e)}), 500
@app.route("/")
def index():
return send_from_directory(".", "index.html")
@app.route("/cloud/<path:filename>")
def cloud_static(filename):
return send_from_directory("cloud", filename)
@app.route("/local%20/<path:filename>")
def local_static(filename):
return send_from_directory("local ", filename)
@app.route("/runs/<path:filename>")
def runs_static(filename):
return send_from_directory("runs", filename)
if __name__ == "__main__":
log("Server started")
app.run(host="0.0.0.0", port=5000, debug=False)