From 8dacd74ae7ca5970591b4df95ddcd6531234d810 Mon Sep 17 00:00:00 2001
From: Shivesh Vishwakarma <shiveshvishwakarma71@gmail.com>
Date: Wed, 21 Jan 2026 18:37:54 +0530
Subject: [PATCH] Add project files for Research Assistant

---
 .gitignore                      |   6 ++
 requirements.txt                |  11 +++
 src/__init__.py                 |   0
 src/agents/__init__.py          |   0
 src/agents/llm_agent.py         |  16 +++++
 src/agents/planner_agent.py     |  14 ++++
 src/agents/searcher_agent.py    |  93 +++++++++++++++++++++++++
 src/agents/writer_agent.py      | 119 ++++++++++++++++++++++++++++++++
 src/api.py                      |  19 +++++
 src/config.py                   |  15 ++++
 src/core/__init__.py            |   0
 src/core/langgraph_pipeline.py  |  45 ++++++++++++
 src/core/state.py               |  20 ++++++
 src/debug_env.py                |   3 +
 src/main.py                     |  72 +++++++++++++++++++
 src/models/__init__.py          |   0
 src/models/llm.py               |  20 ++++++
 src/models/research_prompt.py   |  55 +++++++++++++++
 src/services/__init__.py        |   0
 src/services/llm.py             |  28 ++++++++
 src/services/lmstudio_client.py |  17 +++++
 src/services/local_llm.py       |  22 ++++++
 src/services/tavily_client.py   |  37 ++++++++++
 src/test_searcher.py            |   9 +++
 src/test_tavily.py              |   4 ++
 src/utils/export.py             |  45 ++++++++++++
 test_tavily_manual.py           |  16 +++++
 ui.py                           |  99 ++++++++++++++++++++++++++
 28 files changed, 785 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 requirements.txt
 create mode 100644 src/__init__.py
 create mode 100644 src/agents/__init__.py
 create mode 100644 src/agents/llm_agent.py
 create mode 100644 src/agents/planner_agent.py
 create mode 100644 src/agents/searcher_agent.py
 create mode 100644 src/agents/writer_agent.py
 create mode 100644 src/api.py
 create mode 100644 src/config.py
 create mode 100644 src/core/__init__.py
 create mode 100644 src/core/langgraph_pipeline.py
 create mode 100644 src/core/state.py
 create mode 100644 src/debug_env.py
 create mode 100644 src/main.py
 create mode 100644 src/models/__init__.py
 create mode 100644 src/models/llm.py
 create mode 100644 src/models/research_prompt.py
 create mode 100644 src/services/__init__.py
 create mode 100644 src/services/llm.py
 create mode 100644 src/services/lmstudio_client.py
 create mode 100644 src/services/local_llm.py
 create mode 100644 src/services/tavily_client.py
 create mode 100644 src/test_searcher.py
 create mode 100644 src/test_tavily.py
 create mode 100644 src/utils/export.py
 create mode 100644 test_tavily_manual.py
 create mode 100644 ui.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7239815
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.env
+__pycache__/
+*.pyc
+.vscode/
+.idea/
+.env.*
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..d904aa9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+openai
+fastapi
+uvicorn
+pydantic
+httpx
+python-dotenv
+tavily-python
+streamlit
+requests
+langgraph
+reportlab
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/agents/llm_agent.py b/src/agents/llm_agent.py
new file mode 100644
index 0000000..2d62d7b
--- /dev/null
+++ b/src/agents/llm_agent.py
@@ -0,0 +1,16 @@
+from dataclasses import dataclass
+from src.services.lmstudio_client import chat_with_lmstudio
+
+
+@dataclass
+class LLMAgent:
+    name: str = "local_llm"
+
+    def run(self, query: str, context: str | None = None) -> str:
+        prompt = query
+        if context:
+            prompt = (
+                "Use the following context to answer the question.\n\n"
+                f"Context:\n{context}\n\nQuestion:\n{query}"
+            )
+        return chat_with_lmstudio(prompt)
diff --git a/src/agents/planner_agent.py b/src/agents/planner_agent.py
new file mode 100644
index 0000000..7b043b3
--- /dev/null
+++ b/src/agents/planner_agent.py
@@ -0,0 +1,14 @@
+class PlannerAgent:
+    def run(self, state: dict) -> dict:
+        state["outline"] = [
+            "Abstract",
+            "Introduction",
+            "Background",
+            "Applications",
+            "Trends",
+            "Challenges",
+            "Future Scope",
+            "Conclusion",
+            "References"
+        ]
+        return state
diff --git a/src/agents/searcher_agent.py b/src/agents/searcher_agent.py
new file mode 100644
index 0000000..9fef991
--- /dev/null
+++ b/src/agents/searcher_agent.py
@@ -0,0 +1,93 @@
+from src.services.tavily_client import TavilyClient
+
+
+class SearcherAgent:
+    def __init__(self):
+        self.client = TavilyClient()
+
+    def _clean(self, text: str) -> str:
+        if not text:
+            return ""
+
+        remove = [
+            "subscribe", "likes", "views", "watch", "upgrade",
+            "reddit", "youtube", "emoji"
+        ]
+
+        text = text.replace("\n", " ")
+        for r in remove:
+            text = text.replace(r, "")
+
+        return text.strip()
+
+    def _summarize(self, content: str) -> str:
+        content = self._clean(content)
+        sentences = [s.strip() for s in content.split(".") if len(s.strip()) > 40]
+        return ". ".join(sentences[:3]) + "." if sentences else ""
+
+    def run(self, state: dict):
+        results = []
+
+        for question in state.get("sub_questions", []):
+            data = self.client.search(question)
+
+            sources = []
+            for item in data.get("results", []):
+                summary = self._summarize(item.get("content", ""))
+                if not summary:
+                    continue
+
+                sources.append({
+                    "summary": summary,
+                    "url": item.get("url", ""),
+                    "confidence": round(item.get("score", 0.85), 2)
+                })
+
+            results.append({
+                "question": question,
+                "sources": sources
+            })
+
+        state["search_results"] = results
+        return state
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/agents/writer_agent.py b/src/agents/writer_agent.py
new file mode 100644
index 0000000..e762b4d
--- /dev/null
+++ b/src/agents/writer_agent.py
@@ -0,0 +1,119 @@
+class WriterAgent:
+    def __init__(self, llm):
+        self.llm = llm
+
+    def _section(self, title: str, topic: str, min_words: int):
+        prompt = f"""
+You are writing an academic research paper.
+
+Write the section titled:
+{title}
+
+Research topic:
+{topic}
+
+Rules:
+- Formal academic writing
+- Paragraph-based (no bullets unless necessary)
+- Minimum {min_words} words
+- Do NOT write conclusion
+- Do NOT write references
+"""
+        return self.llm.invoke(prompt)
+
+    def _conclusion(self, topic: str):
+        prompt = f"""
+Write the FINAL CONCLUSION section for an academic research paper.
+
+Topic:
+{topic}
+
+Rules:
+- Summarize key insights
+- Discuss implications
+- Mention future scope
+- 300–400 words
+"""
+        return self.llm.invoke(prompt)
+
+    def run(self, state: dict):
+        topic = state["input"]
+
+        paper = []
+
+        # Title
+        paper.append(f"# {topic}\n")
+
+        # Abstract
+        paper.append("## Abstract\n" + self._section("Abstract", topic, 200))
+
+        # Keywords
+        paper.append(
+            "## Keywords\n"
+            "Artificial Intelligence, Machine Learning, Data Analytics, Automation, Ethics, Sustainability\n"
+        )
+
+        # Main body sections
+        paper.append("## 1. Introduction\n" + self._section("Introduction", topic, 400))
+        paper.append("## 2. Literature Review\n" + self._section("Literature Review", topic, 500))
+        paper.append("## 3. Methodology\n" + self._section("Methodology", topic, 400))
+        paper.append("## 4. Applications\n" + self._section("Applications", topic, 500))
+        paper.append("## 5. Challenges and Limitations\n" + self._section("Challenges and Limitations", topic, 400))
+        paper.append("## 6. Future Trends\n" + self._section("Future Trends", topic, 400))
+
+        # ONE final conclusion
+        paper.append("## 7. Conclusion\n" + self._conclusion(topic))
+
+        # ONE references section
+        paper.append(
+            "## References\n"
+            "[1] https://www.ajournals.org/ijai/article/details/1006-2538/79\n"
+            "[2] https://www.sciencedirect.com/topics/artificial-intelligence\n"
+            "[3] https://ieeexplore.ieee.org/Xplore/home.jsp\n"
+        )
+
+        state["final_answer"] = "\n\n".join(paper)
+        return state
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/api.py b/src/api.py
new file mode 100644
index 0000000..4ce9843
--- /dev/null
+++ b/src/api.py
@@ -0,0 +1,19 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+from src.core.langgraph_pipeline import build_graph
+
+app = FastAPI()
+graph = build_graph()
+
+
+class Prompt(BaseModel):
+    message: str
+
+
+@app.post("/chat")
+def chat(prompt: Prompt):
+    result = graph.invoke({"query": prompt.message})
+    return {
+        "reply": result["answer"],
+        "plan": result["plan"]
+    }
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..686d17c
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,15 @@
+from pydantic_settings import BaseSettings
+
+
+class Settings(BaseSettings):
+    LMSTUDIO_BASE_URL: str
+    LMSTUDIO_API_KEY: str
+    LMSTUDIO_MODEL: str
+
+    class Config:
+        env_file = ".env"
+        extra = "ignore"
+
+
+settings = Settings()
+
diff --git a/src/core/__init__.py b/src/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/core/langgraph_pipeline.py b/src/core/langgraph_pipeline.py
new file mode 100644
index 0000000..97fdc56
--- /dev/null
+++ b/src/core/langgraph_pipeline.py
@@ -0,0 +1,45 @@
+from langgraph.graph import StateGraph
+from src.agents.writer_agent import WriterAgent
+from src.services.llm import get_llm
+
+
+def build_graph():
+    llm = get_llm()
+
+    graph = StateGraph(dict)
+
+    writer = WriterAgent(llm)
+
+    graph.add_node("writer", writer.run)
+    graph.set_entry_point("writer")
+
+    return graph.compile()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/core/state.py b/src/core/state.py
new file mode 100644
index 0000000..1d0a072
--- /dev/null
+++ b/src/core/state.py
@@ -0,0 +1,20 @@
+from typing import TypedDict, List, Dict
+
+
+class GraphState(TypedDict, total=False):
+    input: str
+
+    planner_steps: List[str]
+    sub_questions: List[str]
+
+    search_results: List[Dict[str, str]]
+    sources: List[Dict[str, str]]
+
+    final_answer: str
+    final_output: str
+
+
+
+
+
+
diff --git a/src/debug_env.py b/src/debug_env.py
new file mode 100644
index 0000000..e933538
--- /dev/null
+++ b/src/debug_env.py
@@ -0,0 +1,3 @@
+from src.config import settings
+
+print("TAVILY_API_KEY =", settings.TAVILY_API_KEY)
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..0f60e20
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,72 @@
+from src.core.langgraph_pipeline import build_graph
+
+def main():
+    app = build_graph()
+    print("LangGraph agent system ready. Type 'exit' to quit.")
+
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() == "exit":
+            break
+
+        state = {"input": user_input}
+        result = app.invoke(state)
+
+        print("\n--- Planner Agent Output ---")
+        for i, q in enumerate(result["sub_questions"], 1):
+            print(f"{i}. {q}")
+
+        print("\n--- Searcher Agent Output ---")
+        for item in result["search_results"]:
+            print(f"\nQuestion: {item['question']}")
+            for i, src in enumerate(item["sources"], 1):
+                print(f"Source {i}:")
+                print(src["summary"])
+                print(f"URL: {src['url']} (confidence: {src['confidence']})")
+
+        print("\n--- Writer Agent Output ---\n")
+        print(result["final_answer"])
+
+if __name__ == "__main__":
+    main()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/models/llm.py b/src/models/llm.py
new file mode 100644
index 0000000..240c0ea
--- /dev/null
+++ b/src/models/llm.py
@@ -0,0 +1,20 @@
+import requests
+
+LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions"
+MODEL_NAME = "qwen2.5-coder-0.5b-instruct"
+
+def generate_research_paper(prompt: str) -> str:
+    payload = {
+        "model": MODEL_NAME,
+        "messages": [
+            {"role": "system", "content": "You are a professional academic researcher."},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0.3,
+        "max_tokens": 3000
+    }
+
+    response = requests.post(LM_STUDIO_URL, json=payload)
+    response.raise_for_status()
+
+    return response.json()["choices"][0]["message"]["content"]
diff --git a/src/models/research_prompt.py b/src/models/research_prompt.py
new file mode 100644
index 0000000..986e68f
--- /dev/null
+++ b/src/models/research_prompt.py
@@ -0,0 +1,55 @@
+def build_research_prompt(topic: str) -> str:
+    return f"""
+You are an academic research writer.
+
+Write a FULL LENGTH research paper on the topic:
+
+"{topic}"
+
+STRICT REQUIREMENTS:
+- Write in formal academic tone
+- Minimum 1500–2000 words
+- Follow EXACT research paper structure
+- Do NOT write questions inside sections
+- Do NOT summarize briefly
+- Expand each section with depth and examples
+
+FORMAT (MANDATORY):
+
+Title
+
+Abstract
+(150–200 words)
+
+Keywords
+(5–7 keywords)
+
+1. Introduction
+(Background, motivation, scope)
+
+2. Literature Review
+(Existing studies, trends, gaps)
+
+3. Methodology / Approach
+(Conceptual or analytical approach)
+
+4. Applications / Use Cases
+(Detailed real-world examples)
+
+5. Challenges and Limitations
+(Technical, ethical, societal)
+
+6. Future Scope
+(Emerging trends, opportunities)
+
+7. Conclusion
+(Summary of findings)
+
+References
+(Numbered list with URLs)
+
+IMPORTANT:
+- Use academic language
+- No bullet-point-only answers
+- Write like a real journal paper
+"""
diff --git a/src/services/__init__.py b/src/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/services/llm.py b/src/services/llm.py
new file mode 100644
index 0000000..6366bac
--- /dev/null
+++ b/src/services/llm.py
@@ -0,0 +1,28 @@
+import requests
+
+LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions"
+MODEL_NAME = "qwen2.5-coder-0.5b-instruct"
+
+
+class LocalLLM:
+    def invoke(self, prompt: str) -> str:
+        payload = {
+            "model": MODEL_NAME,
+            "messages": [
+                {"role": "system", "content": "You are an academic research paper writer."},
+                {"role": "user", "content": prompt}
+            ],
+            "temperature": 0.3,
+            "max_tokens": 3000
+        }
+
+        response = requests.post(LM_STUDIO_URL, json=payload)
+        response.raise_for_status()
+
+        return response.json()["choices"][0]["message"]["content"]
+
+
+def get_llm():
+    return LocalLLM()
+
+
diff --git a/src/services/lmstudio_client.py b/src/services/lmstudio_client.py
new file mode 100644
index 0000000..deef71b
--- /dev/null
+++ b/src/services/lmstudio_client.py
@@ -0,0 +1,17 @@
+from openai import OpenAI
+from src.config import settings
+
+# Client that talks to LM Studio's OpenAI-compatible server
+client = OpenAI(
+    base_url=settings.LMSTUDIO_BASE_URL,
+    api_key=settings.LMSTUDIO_API_KEY,  # LM Studio just needs any non-empty string
+)
+
+
+def chat_with_lmstudio(message: str) -> str:
+    """Send a single message to the local LLM and return its reply."""
+    response = client.chat.completions.create(
+        model=settings.LMSTUDIO_MODEL,
+        messages=[{"role": "user", "content": message}],
+    )
+    return response.choices[0].message.content
diff --git a/src/services/local_llm.py b/src/services/local_llm.py
new file mode 100644
index 0000000..542e82c
--- /dev/null
+++ b/src/services/local_llm.py
@@ -0,0 +1,22 @@
+from openai import OpenAI
+
+class LocalLLM:
+    def __init__(self):
+        self.client = OpenAI(
+            base_url="http://localhost:1234/v1",
+            api_key="lm-studio"  # dummy key, required by SDK
+        )
+
+        self.model = "qwen2.5-coder-0.5b-instruct"
+
+    def generate(self, prompt: str, temperature: float = 0.2) -> str:
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "You are an academic research paper writer."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=temperature
+        )
+
+        return response.choices[0].message.content.strip()
diff --git a/src/services/tavily_client.py b/src/services/tavily_client.py
new file mode 100644
index 0000000..f5dfcfc
--- /dev/null
+++ b/src/services/tavily_client.py
@@ -0,0 +1,37 @@
+import os
+import requests
+
+
+class TavilyClient:
+    def __init__(self):
+        self.api_key = os.getenv("TAVILY_API_KEY")
+        if not self.api_key:
+            raise RuntimeError("TAVILY_API_KEY not set")
+
+        self.url = "https://api.tavily.com/search"
+
+    def search(self, query: str, max_results: int = 3, search_depth: str = "basic"):
+        payload = {
+            "api_key": self.api_key,
+            "query": query,
+            "max_results": max_results,
+            "search_depth": search_depth,
+        }
+
+        response = requests.post(self.url, json=payload, timeout=30)
+        response.raise_for_status()
+        return response.json()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/test_searcher.py b/src/test_searcher.py
new file mode 100644
index 0000000..f5ccdd8
--- /dev/null
+++ b/src/test_searcher.py
@@ -0,0 +1,9 @@
+from src.agents.searcher_agent import SearcherAgent
+
+searcher = SearcherAgent()
+
+query = "Explain machine learning"
+result = searcher.run(query)
+
+print("\n--- Tavily Research Output ---\n")
+print(result)
diff --git a/src/test_tavily.py b/src/test_tavily.py
new file mode 100644
index 0000000..dd4a7b8
--- /dev/null
+++ b/src/test_tavily.py
@@ -0,0 +1,4 @@
+from src.services.tavily_client import TavilySearchService
+
+t = TavilySearchService()
+print(t.search("What is machine learning?"))
diff --git a/src/utils/export.py b/src/utils/export.py
new file mode 100644
index 0000000..78d63a2
--- /dev/null
+++ b/src/utils/export.py
@@ -0,0 +1,45 @@
+import io
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.lib.pagesizes import A4
+
+
+def export_markdown(text: str) -> str:
+    """
+    Export research paper as Markdown
+    """
+    return text
+
+
+def export_pdf(text: str) -> bytes:
+    """
+    Export research paper as PDF using ReportLab
+    """
+    buffer = io.BytesIO()
+
+    doc = SimpleDocTemplate(
+        buffer,
+        pagesize=A4,
+        rightMargin=40,
+        leftMargin=40,
+        topMargin=40,
+        bottomMargin=40,
+    )
+
+    styles = getSampleStyleSheet()
+    story = []
+
+    for line in text.split("\n"):
+        if line.strip() == "":
+            story.append(Spacer(1, 12))
+        else:
+            story.append(Paragraph(line, styles["Normal"]))
+            story.append(Spacer(1, 8))
+
+    doc.build(story)
+    buffer.seek(0)
+    return buffer.read()
+
+
+
+
diff --git a/test_tavily_manual.py b/test_tavily_manual.py
new file mode 100644
index 0000000..6e9c58f
--- /dev/null
+++ b/test_tavily_manual.py
@@ -0,0 +1,16 @@
+from dotenv import load_dotenv
+load_dotenv()
+
+from src.services.tavily_client import TavilySearchService
+
+t = TavilySearchService()
+
+results = t.search("What is cyber security")
+
+print("\n=== TAVILY TEST OUTPUT ===\n")
+
+for r in results:
+    print("Title:", r["title"])
+    print("URL:", r["url"])
+    print("Content:", r["content"][:200])  # first 200 chars
+    print("-" * 40)
diff --git a/ui.py b/ui.py
new file mode 100644
index 0000000..c739215
--- /dev/null
+++ b/ui.py
@@ -0,0 +1,99 @@
+import streamlit as st
+from src.core.langgraph_pipeline import build_graph
+
+# --------------------------------------------------
+# Page config
+# --------------------------------------------------
+st.set_page_config(
+    page_title="AI Research Assistant",
+    page_icon="📘",
+    layout="centered"
+)
+
+st.title("📘 AI Research Assistant")
+st.caption("Academic Research Paper Generator (Local Qwen LLM)")
+
+# --------------------------------------------------
+# Build graph once
+# --------------------------------------------------
+@st.cache_resource
+def load_graph():
+    return build_graph()
+
+graph = load_graph()
+
+# --------------------------------------------------
+# Session state
+# --------------------------------------------------
+if "paper" not in st.session_state:
+    st.session_state.paper = ""
+
+if "question" not in st.session_state:
+    st.session_state.question = ""
+
+# --------------------------------------------------
+# Input
+# --------------------------------------------------
+question = st.chat_input("Ask a research question (academic topic)...")
+
+if question:
+    st.session_state.question = question
+
+    with st.spinner("Generating research paper..."):
+        state = {"input": question}
+        result = graph.invoke(state)
+
+        st.session_state.paper = result.get("final_answer", "")
+
+# --------------------------------------------------
+# DISPLAY QUESTION (THIS WAS MISSING)
+# --------------------------------------------------
+if st.session_state.question:
+    st.markdown("### 🧠 Research Question")
+    st.markdown(f"**{st.session_state.question}**")
+    st.divider()
+
+# --------------------------------------------------
+# DISPLAY PAPER
+# --------------------------------------------------
+if st.session_state.paper:
+    st.markdown(st.session_state.paper)
+
+    st.divider()
+
+    # --------------------------------------------------
+    # Export buttons
+    # --------------------------------------------------
+    st.download_button(
+        "📄 Export Markdown",
+        data=st.session_state.paper,
+        file_name="research_paper.md",
+        mime="text/markdown"
+    )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+