Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions =0.23.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[version-2 a8dd7b3] Added zstandard
3 files changed, 12 insertions(+)
create mode 100644 "=0.23.0\n"
create mode 100644 changai/changai/setup/install.py
2 changes: 2 additions & 0 deletions =0.23.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[version-2 f5bbc36] Added zstandard
1 file changed, 1 insertion(+), 1 deletion(-)
3 changes: 3 additions & 0 deletions changai/changai/api/v2/assets/business_keywords_v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -6403,6 +6403,9 @@
"headline earnings",
"exceptional item",
"project demand",
"maintenance",
"logs",
"machines",
"project request",
"project intake",
"project scoring",
Expand Down
57 changes: 51 additions & 6 deletions changai/changai/api/v2/build_cards_faiss_index_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ def _get_fvs_paths() -> tuple:

table_path = os.path.join(app_base, "table_fvs")
schema_path = os.path.join(app_base, "schema_fvs")
schema_emb_path = os.path.join(app_base, "emb_dir")
master_path = os.path.join(private_base, "masterdata_fvs")

for p in (app_base, private_base, table_path, schema_path, master_path):
os.makedirs(p, exist_ok=True)

return app_base, private_base, table_path, schema_path, master_path
return app_base, private_base, table_path, schema_path, master_path,schema_emb_path

RAG_FOLDER = "Home/RAG Sources"
HNSW_M = 32
Expand Down Expand Up @@ -200,7 +201,10 @@ def build_schema_docs(schema: Dict[str, Any]) -> List[Document]:

if not isinstance(tables, list):
return docs

GENERIC_FIELDS = {
'creation', 'modified', 'owner', 'parenttype','old_parent',
'parentfield', 'parent', 'idx', 'name', 'docstatus'
}
for table_block in tables:
if not _is_valid_schema_table(table_block):
continue
Expand All @@ -211,8 +215,12 @@ def build_schema_docs(schema: Dict[str, Any]) -> List[Document]:

if not isinstance(fields, list):
continue

for field_row in fields:
field_name = field_row.get("name")
if field_name in GENERIC_FIELDS:
continue

doc = _build_field_document(table_name, module, field_row)
if doc:
docs.append(doc)
Expand Down Expand Up @@ -338,7 +346,7 @@ def build_all_fvs() -> Dict[str, Any]:

def build_table_fvs_job():
try:
app_base, _, table_path, _, _ = _get_fvs_paths()
app_base, _, table_path, _, _,_ = _get_fvs_paths()
tables_list = _load_json_from_file_doc("tables.json")
table_docs = build_table_docs(tables_list)
_build_and_save_faiss(table_docs, table_path, "ERPNext Table FVS", app_base)
Expand All @@ -347,13 +355,50 @@ def build_table_fvs_job():
frappe.log_error(frappe.get_traceback(), "Build Table FVS Failed")
raise

import os
import pickle
import numpy as np
def save_field_matrix(schema_docs, base_dir):
emb = get_embedding_engine()

texts = [d.page_content for d in schema_docs]
vectors = emb.embed_documents(texts)

embs = np.array(vectors, dtype="float32")
embs = embs / np.clip(
np.linalg.norm(embs, axis=1, keepdims=True),
1e-12,
None
)

table_to_idx = {}

for i, d in enumerate(schema_docs):
meta = getattr(d, "metadata", {}) or {}
table = meta.get("table")
field = meta.get("field")

if table and field:
table_to_idx.setdefault(table, []).append(i)

os.makedirs(base_dir, exist_ok=True)

np.save(os.path.join(base_dir, "field_embs.npy"), embs)

with open(os.path.join(base_dir, "field_docs.pkl"), "wb") as f:
pickle.dump(schema_docs, f)

with open(os.path.join(base_dir, "table_to_idx.pkl"), "wb") as f:
pickle.dump(table_to_idx, f)


def build_schema_fvs_job():
try:
schema = _load_yaml_from_file_doc("schema.yaml")
schema_docs = build_schema_docs(schema)
app_base, _, _, schema_path, _ = _get_fvs_paths()
app_base, _, _, schema_path, _,schema_emb_dir = _get_fvs_paths()
_build_and_save_faiss(schema_docs, schema_path, "ERPNext Schema FVS", app_base)
save_field_matrix(schema_docs, schema_emb_dir)
frappe.logger().info(f"ERPNext Schema FVS built: {len(schema_docs)} docs")
except Exception :
frappe.log_error(frappe.get_traceback(), "Build Schema FVS Failed")
Expand All @@ -362,7 +407,7 @@ def build_schema_fvs_job():

def build_master_data_fvs_job():
try:
_, private_base, _, _, master_path = _get_fvs_paths()
_, private_base, _, _, master_path,_ = _get_fvs_paths()
master_data = _load_yaml_from_file_doc("master_data.yaml")
entity_docs = build_entity_docs(master_data)
_build_and_save_faiss(entity_docs, master_path, "ERPNext Master Data FVS", private_base)
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/schema_fvs/index.pkl
Binary file not shown.
Binary file modified changai/changai/api/v2/fvs_stores/erpnext/table_fvs/index.faiss
Binary file not shown.
95 changes: 0 additions & 95 deletions changai/changai/api/v2/process_data.py

This file was deleted.

143 changes: 62 additions & 81 deletions changai/changai/api/v2/store_chats.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,94 +76,75 @@ def get_chat_history(session_id: str) -> list:
return []

return history[-5:]
PROMPT_FOLLOWUP = """You are ChangAI, an ERP entity-value detector + query rewriter.

Return ONLY valid JSON with EXACTLY these keys:
{{"standalone_question":"...","contains_values":true/false}}

### TASK 1 — SPELL CORRECTION:
- Fix any typos or spelling mistakes in the latest message before doing anything else
- Examples:
- "slaes order of lst mnoth" → "sales order of last month"
- "whcih custoemr has pendign" → "which customer has pending"
- "stok of chiar item" → "stock of chair item"

### TASK 2 — CONTINUITY DETECTION:
- Check if the latest message is a follow-up or refers to previous conversation
- Look at the last 3-4 human messages in chat history for context
- If it IS a follow-up, rewrite as a fully self-contained standalone question
- Always put the final rewritten (and corrected) question in "standalone_question"

Follow-up indicators:
- Pronouns with no clear referent: "it", "they", "that", "those", "him", "her", "his"
- Incomplete references: "same customer", "that item", "the one", "same period"
- Continuation words: "also", "and what about", "what else", "show more"
- Short vague messages: "and today?", "what about last month?", "how many?"

Examples:
History: "show sales of ahmed"
Latest: "what about his pending invoices"
→ standalone_question: "show pending invoices of ahmed"

History: "stock of office chair in main warehouse"
Latest: "what about side tabel?"
→ standalone_question: "stock of side table in main warehouse"

History: "top 5 customers this month"
Latest: "show lst month"
→ standalone_question: "top 5 customers last month"

History: "employees in accounts department"
Latest: "hw many are absent today?"
→ standalone_question: "how many employees in accounts department are absent today"

### TASK 3 — ENTITY DETECTION (contains_values):
Meaning of contains_values (STRICT):

TRUE = standalone_question contains ANY explicit or implied ENTITY IDENTIFIER
that should be matched to master data
(customer/supplier/item/warehouse/employee/category etc.)

Examples (TRUE):
- "invoice of ayan" (name)
- "who bought laptop last month" (product)
- "sales of pens today" (product)
- "top items in electronics category" (category)
- "stock of office chair in main warehouse" (item + warehouse)

FALSE = NO entity identifier mentioned.
Only filters, time ranges, counts, ranking words, or statuses.

Examples (FALSE):
- "show all customers"
- "unpaid suppliers list"
- "sales orders pending delivery"
- "payment received this month"
- "top vendor dues list"
- "today sales"

Rules:
- Any product/item/category reference → contains_values = true
- Only entity names/codes or product/category references → contains_values = true
- When unsure between item vs non-item → prefer contains_values = true

### OUTPUT FORMAT (STRICT — no extra keys, no markdown):
{{"standalone_question":"...","contains_values":true/false}}

Chat history (use ONLY human lines):
{rows}

Latest user message:
{qstn}
- Always output the "standalone_question" in clear English, regardless of the user's original language, preserving the exact meaning.
"""
@frappe.whitelist(allow_guest=False)
def respond_from_cache(user_question:str):
if user_question:
doc=frappe.db.get_value("ChangAI Logs",{"user_question":user_question},["sql_generated","result"],as_dict=False)
return doc


PROMPT_FOLLOWUP = """You are ChangAI, an ERP query rewriter and entity detector.
Return ONLY valid JSON:
{{"standalone_question":"...","contains_values":true/false}}
TASK 1 — FOLLOW-UP
- If the query depends on previous messages, rewrite it as a complete standalone question.
- Otherwise keep it unchanged.
TASK 2 — ENTITY DETECTION
contains_values = TRUE if the query includes any specific entity
(customer, supplier, item, warehouse, employee, etc.), else FALSE.
When unsure → TRUE.
TASK 3 — ERP CONTEXTUAL REWRITE
1. Normalize:
- Fix typos, clear English
- Do NOT change entity values
2. Complete intent:
- Expand vague queries (total, list, top, filter)
3. ERP mapping:
- Map generic terms to standard ERPNext concepts based on intent
- Avoid vague words if clearer business terms exist
- Do NOT invent documents or use report names that
Examples:
invoice → Sales Invoice / Purchase Invoice
order → Sales Order / Purchase Order
stock → Bin / Stock Ledger Entry
production → Work Order
timesheet → Timesheet / Timesheet Detail
finance/profit → GL Entry (use credit and debit)
4. Field hints (max 1–2):
Use natural phrasing ("based on", "using")
sales → grand_total
qty → qty
stock → actual_qty
production → produced_qty
finance → debit / credit
status → status
5. Time fields:
Sales/Stock/Finance → posting_date
Work Order → actual_start_date / actual_end_date
Timesheet → start_date / end_date
Timesheet Detail → from_time / to_time
STRICT:
- NEVER use posting_date for Timesheet
- NEVER use creation unless asked
6. Relationships:
- Include linked entities if required
STYLE:
- Natural business language
- No SQL, no tab* names
EXAMPLES:
"sales invoice last month"
→ What is the total sales amount from Sales Invoices last month based on grand_total and posting_date?

"stock in warehouse a"
→ What is the stock quantity in Warehouse A based on actual_qty from Bin?

"who worked today"
→ Which employees logged time today based on Timesheet start_date or Timesheet Detail from_time?
Chat history:
{rows}
User:
{qstn}
"""
@frappe.whitelist(allow_guest=False)
def inject_prompt(user_qstn: str, session_id: str) -> str:
rows=get_chat_history(session_id)
Expand Down
Loading
Loading