AI-JS-Test/geminiClient.js at main · RiggedToEncodeINFO3604Project/AI-JS-Test · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
/**
 * src/geminiClient.js   —  RENDER-OPTIMISED (stateless + rate-limited)
 * ─────────────────────────────────────────────────────────────────────
 * Two critical changes from the original version:
 *
 *   1. STATELESS — no in-memory session Map.
 *      Render's free tier spins the instance down after 15 min idle;
 *      when it wakes again every variable in RAM is gone.
 *      Fix: the frontend owns conversation history (an array of
 *      {role, text} objects) and sends it with every request.  The server
 *      rebuilds the full prompt each time.  Cost impact is negligible —
 *      a 4-turn history is still only ~1 300 input tokens.
 *
 *   2. REQUEST QUEUE with exponential back-off.
 *      Gemini 1.5 Flash free tier allows 15 requests / minute.
 *      Under a burst the API returns HTTP 429.  The queue serialises
 *      outgoing calls and retries with back-off so no request is dropped.
 *
 * GITHUB PATH  →  src/geminiClient.js
 */

"use strict";

const { GoogleGenerativeAI }          = require("@google/generative-ai");
const { getFullKnowledgeBase, getRelevantContext } = require("./knowledgeBase");

// ── SDK init ─────────────────────────────────────────────────────────────────
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
const MODEL = "gemini-1.5-flash";

// ── System prompt (rebuilt on every request — stateless) ─────────────────────
function buildSystemPrompt() {
  return `You are the official Skedulelt Support Assistant.
Skedulelt is a mobile scheduling & payment app for service providers
and customers in Trinidad & Tobago.

Rules:
  • Answer ONLY based on the knowledge base below.
  • If the question falls outside the knowledge base, say:
    "I'm sorry, I don't have information on that. Please contact
     our support team for further assistance."
  • Be friendly, concise, and helpful.
  • Do NOT hallucinate features, policies, or prices.
  • Respond in English.

════════════════════════════════════════════════
 SKEDULELT KNOWLEDGE BASE
════════════════════════════════════════════════
${getFullKnowledgeBase()}
════════════════════════════════════════════════`;
}

// ── Rate-limit queue ─────────────────────────────────────────────────────────
// Simple FIFO.  Each item is { resolve, reject, fn }.  The runner pulls one
// at a time; on a 429 it waits and retries in-place before moving on.

const queue    = [];
let   running  = false;

const MAX_RETRIES   = 4;
const BASE_DELAY_MS = 2000;   // first retry after 2 s; then 4, 8, 16 s

async function enqueue(fn) {
  return new Promise((resolve, reject) => {
    queue.push({ resolve, reject, fn });
    if (!running) runQueue();
  });
}

async function runQueue() {
  running = true;
  while (queue.length > 0) {
    const { resolve, reject, fn } = queue.shift();
    try      { resolve(await executeWithRetry(fn)); }
    catch(e) { reject(e); }
  }
  running = false;
}

async function executeWithRetry(fn) {
  let lastErr;
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
    try { return await fn(); }
    catch (err) {
      lastErr = err;
      const status = err.status || (err.response && err.response.status);
      if (status === 429 && attempt < MAX_RETRIES) {
        const delay = BASE_DELAY_MS * Math.pow(2, attempt);
        console.warn(`[Queue] 429 — retry ${attempt + 1}/${MAX_RETRIES} in ${delay} ms`);
        await new Promise(r => setTimeout(r, delay));
      } else { throw err; }
    }
  }
  throw lastErr;
}

// ── Public API ───────────────────────────────────────────────────────────────

/**
 * Stateless chat.
 *
 * @param {Array<{role:"user"|"assistant", text:string}>} history
 *        Full conversation so far — owned & sent by the frontend.
 * @param {string} currentMessage   The user's latest message.
 * @returns {Promise<{ answer:string, matchedSections:string[] }>}
 */
async function chat(history, currentMessage) {
  const { matched } = getRelevantContext(currentMessage);

  // Build the Gemini history array:
  //   [0] user  → system prompt + KB
  //   [1] model → short grounding ack
  //   [2..n]    → prior conversation from frontend
  const geminiHistory = [
    { role: "user",  parts: [{ text: buildSystemPrompt() }] },
    { role: "model", parts: [{ text: "Got it. I'm the Skedulelt Support Assistant. I'll answer only based on the knowledge base provided. How can I help?" }] }
  ];

  for (const turn of history) {
    geminiHistory.push({
      role : turn.role === "assistant" ? "model" : "user",
      parts: [{ text: turn.text }]
    });
  }

  // ── Enqueue the Gemini call (respects 15 RPM free-tier limit) ──────────
  const answer = await enqueue(async () => {
    const model       = genAI.getGenerativeModel({ model: MODEL });
    const chatSession = model.startChat({ history: geminiHistory });
    const response    = await chatSession.sendMessage(currentMessage);
    return response.text;
  });

  return { answer, matchedSections: matched };
}

module.exports = { chat };