From 8dacd74ae7ca5970591b4df95ddcd6531234d810 Mon Sep 17 00:00:00 2001 From: Shivesh Vishwakarma Date: Wed, 21 Jan 2026 18:37:54 +0530 Subject: [PATCH] Add project files for Research Assistant --- .gitignore | 6 ++ requirements.txt | 11 +++ src/__init__.py | 0 src/agents/__init__.py | 0 src/agents/llm_agent.py | 16 +++++ src/agents/planner_agent.py | 14 ++++ src/agents/searcher_agent.py | 93 +++++++++++++++++++++++++ src/agents/writer_agent.py | 119 ++++++++++++++++++++++++++++++++ src/api.py | 19 +++++ src/config.py | 15 ++++ src/core/__init__.py | 0 src/core/langgraph_pipeline.py | 45 ++++++++++++ src/core/state.py | 20 ++++++ src/debug_env.py | 3 + src/main.py | 72 +++++++++++++++++++ src/models/__init__.py | 0 src/models/llm.py | 20 ++++++ src/models/research_prompt.py | 55 +++++++++++++++ src/services/__init__.py | 0 src/services/llm.py | 28 ++++++++ src/services/lmstudio_client.py | 17 +++++ src/services/local_llm.py | 22 ++++++ src/services/tavily_client.py | 37 ++++++++++ src/test_searcher.py | 9 +++ src/test_tavily.py | 4 ++ src/utils/export.py | 45 ++++++++++++ test_tavily_manual.py | 16 +++++ ui.py | 99 ++++++++++++++++++++++++++ 28 files changed, 785 insertions(+) create mode 100644 .gitignore create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/agents/__init__.py create mode 100644 src/agents/llm_agent.py create mode 100644 src/agents/planner_agent.py create mode 100644 src/agents/searcher_agent.py create mode 100644 src/agents/writer_agent.py create mode 100644 src/api.py create mode 100644 src/config.py create mode 100644 src/core/__init__.py create mode 100644 src/core/langgraph_pipeline.py create mode 100644 src/core/state.py create mode 100644 src/debug_env.py create mode 100644 src/main.py create mode 100644 src/models/__init__.py create mode 100644 src/models/llm.py create mode 100644 src/models/research_prompt.py create mode 100644 src/services/__init__.py create mode 100644 src/services/llm.py create mode 100644 src/services/lmstudio_client.py create mode 100644 src/services/local_llm.py create mode 100644 src/services/tavily_client.py create mode 100644 src/test_searcher.py create mode 100644 src/test_tavily.py create mode 100644 src/utils/export.py create mode 100644 test_tavily_manual.py create mode 100644 ui.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7239815 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.env +__pycache__/ +*.pyc +.vscode/ +.idea/ +.env.* diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d904aa9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +openai +fastapi +uvicorn +pydantic +httpx +python-dotenv +tavily-python +streamlit +requests +langgraph +reportlab diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/agents/__init__.py b/src/agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/agents/llm_agent.py b/src/agents/llm_agent.py new file mode 100644 index 0000000..2d62d7b --- /dev/null +++ b/src/agents/llm_agent.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass +from src.services.lmstudio_client import chat_with_lmstudio + + +@dataclass +class LLMAgent: + name: str = "local_llm" + + def run(self, query: str, context: str | None = None) -> str: + prompt = query + if context: + prompt = ( + "Use the following context to answer the question.\n\n" + f"Context:\n{context}\n\nQuestion:\n{query}" + ) + return chat_with_lmstudio(prompt) diff --git a/src/agents/planner_agent.py b/src/agents/planner_agent.py new file mode 100644 index 0000000..7b043b3 --- /dev/null +++ b/src/agents/planner_agent.py @@ -0,0 +1,14 @@ +class PlannerAgent: + def run(self, state: dict) -> dict: + state["outline"] = [ + "Abstract", + "Introduction", + "Background", + "Applications", + "Trends", + "Challenges", + "Future Scope", + "Conclusion", + "References" + ] + return state diff --git a/src/agents/searcher_agent.py b/src/agents/searcher_agent.py new file mode 100644 index 0000000..9fef991 --- /dev/null +++ b/src/agents/searcher_agent.py @@ -0,0 +1,93 @@ +from src.services.tavily_client import TavilyClient + + +class SearcherAgent: + def __init__(self): + self.client = TavilyClient() + + def _clean(self, text: str) -> str: + if not text: + return "" + + remove = [ + "subscribe", "likes", "views", "watch", "upgrade", + "reddit", "youtube", "emoji" + ] + + text = text.replace("\n", " ") + for r in remove: + text = text.replace(r, "") + + return text.strip() + + def _summarize(self, content: str) -> str: + content = self._clean(content) + sentences = [s.strip() for s in content.split(".") if len(s.strip()) > 40] + return ". ".join(sentences[:3]) + "." if sentences else "" + + def run(self, state: dict): + results = [] + + for question in state.get("sub_questions", []): + data = self.client.search(question) + + sources = [] + for item in data.get("results", []): + summary = self._summarize(item.get("content", "")) + if not summary: + continue + + sources.append({ + "summary": summary, + "url": item.get("url", ""), + "confidence": round(item.get("score", 0.85), 2) + }) + + results.append({ + "question": question, + "sources": sources + }) + + state["search_results"] = results + return state + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/agents/writer_agent.py b/src/agents/writer_agent.py new file mode 100644 index 0000000..e762b4d --- /dev/null +++ b/src/agents/writer_agent.py @@ -0,0 +1,119 @@ +class WriterAgent: + def __init__(self, llm): + self.llm = llm + + def _section(self, title: str, topic: str, min_words: int): + prompt = f""" +You are writing an academic research paper. + +Write the section titled: +{title} + +Research topic: +{topic} + +Rules: +- Formal academic writing +- Paragraph-based (no bullets unless necessary) +- Minimum {min_words} words +- Do NOT write conclusion +- Do NOT write references +""" + return self.llm.invoke(prompt) + + def _conclusion(self, topic: str): + prompt = f""" +Write the FINAL CONCLUSION section for an academic research paper. + +Topic: +{topic} + +Rules: +- Summarize key insights +- Discuss implications +- Mention future scope +- 300–400 words +""" + return self.llm.invoke(prompt) + + def run(self, state: dict): + topic = state["input"] + + paper = [] + + # Title + paper.append(f"# {topic}\n") + + # Abstract + paper.append("## Abstract\n" + self._section("Abstract", topic, 200)) + + # Keywords + paper.append( + "## Keywords\n" + "Artificial Intelligence, Machine Learning, Data Analytics, Automation, Ethics, Sustainability\n" + ) + + # Main body sections + paper.append("## 1. Introduction\n" + self._section("Introduction", topic, 400)) + paper.append("## 2. Literature Review\n" + self._section("Literature Review", topic, 500)) + paper.append("## 3. Methodology\n" + self._section("Methodology", topic, 400)) + paper.append("## 4. Applications\n" + self._section("Applications", topic, 500)) + paper.append("## 5. Challenges and Limitations\n" + self._section("Challenges and Limitations", topic, 400)) + paper.append("## 6. Future Trends\n" + self._section("Future Trends", topic, 400)) + + # ONE final conclusion + paper.append("## 7. Conclusion\n" + self._conclusion(topic)) + + # ONE references section + paper.append( + "## References\n" + "[1] https://www.ajournals.org/ijai/article/details/1006-2538/79\n" + "[2] https://www.sciencedirect.com/topics/artificial-intelligence\n" + "[3] https://ieeexplore.ieee.org/Xplore/home.jsp\n" + ) + + state["final_answer"] = "\n\n".join(paper) + return state + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/api.py b/src/api.py new file mode 100644 index 0000000..4ce9843 --- /dev/null +++ b/src/api.py @@ -0,0 +1,19 @@ +from fastapi import FastAPI +from pydantic import BaseModel +from src.core.langgraph_pipeline import build_graph + +app = FastAPI() +graph = build_graph() + + +class Prompt(BaseModel): + message: str + + +@app.post("/chat") +def chat(prompt: Prompt): + result = graph.invoke({"query": prompt.message}) + return { + "reply": result["answer"], + "plan": result["plan"] + } diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..686d17c --- /dev/null +++ b/src/config.py @@ -0,0 +1,15 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + LMSTUDIO_BASE_URL: str + LMSTUDIO_API_KEY: str + LMSTUDIO_MODEL: str + + class Config: + env_file = ".env" + extra = "ignore" + + +settings = Settings() + diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/core/langgraph_pipeline.py b/src/core/langgraph_pipeline.py new file mode 100644 index 0000000..97fdc56 --- /dev/null +++ b/src/core/langgraph_pipeline.py @@ -0,0 +1,45 @@ +from langgraph.graph import StateGraph +from src.agents.writer_agent import WriterAgent +from src.services.llm import get_llm + + +def build_graph(): + llm = get_llm() + + graph = StateGraph(dict) + + writer = WriterAgent(llm) + + graph.add_node("writer", writer.run) + graph.set_entry_point("writer") + + return graph.compile() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/core/state.py b/src/core/state.py new file mode 100644 index 0000000..1d0a072 --- /dev/null +++ b/src/core/state.py @@ -0,0 +1,20 @@ +from typing import TypedDict, List, Dict + + +class GraphState(TypedDict, total=False): + input: str + + planner_steps: List[str] + sub_questions: List[str] + + search_results: List[Dict[str, str]] + sources: List[Dict[str, str]] + + final_answer: str + final_output: str + + + + + + diff --git a/src/debug_env.py b/src/debug_env.py new file mode 100644 index 0000000..e933538 --- /dev/null +++ b/src/debug_env.py @@ -0,0 +1,3 @@ +from src.config import settings + +print("TAVILY_API_KEY =", settings.TAVILY_API_KEY) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..0f60e20 --- /dev/null +++ b/src/main.py @@ -0,0 +1,72 @@ +from src.core.langgraph_pipeline import build_graph + +def main(): + app = build_graph() + print("LangGraph agent system ready. Type 'exit' to quit.") + + while True: + user_input = input("You: ") + if user_input.lower() == "exit": + break + + state = {"input": user_input} + result = app.invoke(state) + + print("\n--- Planner Agent Output ---") + for i, q in enumerate(result["sub_questions"], 1): + print(f"{i}. {q}") + + print("\n--- Searcher Agent Output ---") + for item in result["search_results"]: + print(f"\nQuestion: {item['question']}") + for i, src in enumerate(item["sources"], 1): + print(f"Source {i}:") + print(src["summary"]) + print(f"URL: {src['url']} (confidence: {src['confidence']})") + + print("\n--- Writer Agent Output ---\n") + print(result["final_answer"]) + +if __name__ == "__main__": + main() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/llm.py b/src/models/llm.py new file mode 100644 index 0000000..240c0ea --- /dev/null +++ b/src/models/llm.py @@ -0,0 +1,20 @@ +import requests + +LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions" +MODEL_NAME = "qwen2.5-coder-0.5b-instruct" + +def generate_research_paper(prompt: str) -> str: + payload = { + "model": MODEL_NAME, + "messages": [ + {"role": "system", "content": "You are a professional academic researcher."}, + {"role": "user", "content": prompt} + ], + "temperature": 0.3, + "max_tokens": 3000 + } + + response = requests.post(LM_STUDIO_URL, json=payload) + response.raise_for_status() + + return response.json()["choices"][0]["message"]["content"] diff --git a/src/models/research_prompt.py b/src/models/research_prompt.py new file mode 100644 index 0000000..986e68f --- /dev/null +++ b/src/models/research_prompt.py @@ -0,0 +1,55 @@ +def build_research_prompt(topic: str) -> str: + return f""" +You are an academic research writer. + +Write a FULL LENGTH research paper on the topic: + +"{topic}" + +STRICT REQUIREMENTS: +- Write in formal academic tone +- Minimum 1500–2000 words +- Follow EXACT research paper structure +- Do NOT write questions inside sections +- Do NOT summarize briefly +- Expand each section with depth and examples + +FORMAT (MANDATORY): + +Title + +Abstract +(150–200 words) + +Keywords +(5–7 keywords) + +1. Introduction +(Background, motivation, scope) + +2. Literature Review +(Existing studies, trends, gaps) + +3. Methodology / Approach +(Conceptual or analytical approach) + +4. Applications / Use Cases +(Detailed real-world examples) + +5. Challenges and Limitations +(Technical, ethical, societal) + +6. Future Scope +(Emerging trends, opportunities) + +7. Conclusion +(Summary of findings) + +References +(Numbered list with URLs) + +IMPORTANT: +- Use academic language +- No bullet-point-only answers +- Write like a real journal paper +""" diff --git a/src/services/__init__.py b/src/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/services/llm.py b/src/services/llm.py new file mode 100644 index 0000000..6366bac --- /dev/null +++ b/src/services/llm.py @@ -0,0 +1,28 @@ +import requests + +LM_STUDIO_URL = "http://localhost:1234/v1/chat/completions" +MODEL_NAME = "qwen2.5-coder-0.5b-instruct" + + +class LocalLLM: + def invoke(self, prompt: str) -> str: + payload = { + "model": MODEL_NAME, + "messages": [ + {"role": "system", "content": "You are an academic research paper writer."}, + {"role": "user", "content": prompt} + ], + "temperature": 0.3, + "max_tokens": 3000 + } + + response = requests.post(LM_STUDIO_URL, json=payload) + response.raise_for_status() + + return response.json()["choices"][0]["message"]["content"] + + +def get_llm(): + return LocalLLM() + + diff --git a/src/services/lmstudio_client.py b/src/services/lmstudio_client.py new file mode 100644 index 0000000..deef71b --- /dev/null +++ b/src/services/lmstudio_client.py @@ -0,0 +1,17 @@ +from openai import OpenAI +from src.config import settings + +# Client that talks to LM Studio's OpenAI-compatible server +client = OpenAI( + base_url=settings.LMSTUDIO_BASE_URL, + api_key=settings.LMSTUDIO_API_KEY, # LM Studio just needs any non-empty string +) + + +def chat_with_lmstudio(message: str) -> str: + """Send a single message to the local LLM and return its reply.""" + response = client.chat.completions.create( + model=settings.LMSTUDIO_MODEL, + messages=[{"role": "user", "content": message}], + ) + return response.choices[0].message.content diff --git a/src/services/local_llm.py b/src/services/local_llm.py new file mode 100644 index 0000000..542e82c --- /dev/null +++ b/src/services/local_llm.py @@ -0,0 +1,22 @@ +from openai import OpenAI + +class LocalLLM: + def __init__(self): + self.client = OpenAI( + base_url="http://localhost:1234/v1", + api_key="lm-studio" # dummy key, required by SDK + ) + + self.model = "qwen2.5-coder-0.5b-instruct" + + def generate(self, prompt: str, temperature: float = 0.2) -> str: + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": "You are an academic research paper writer."}, + {"role": "user", "content": prompt} + ], + temperature=temperature + ) + + return response.choices[0].message.content.strip() diff --git a/src/services/tavily_client.py b/src/services/tavily_client.py new file mode 100644 index 0000000..f5dfcfc --- /dev/null +++ b/src/services/tavily_client.py @@ -0,0 +1,37 @@ +import os +import requests + + +class TavilyClient: + def __init__(self): + self.api_key = os.getenv("TAVILY_API_KEY") + if not self.api_key: + raise RuntimeError("TAVILY_API_KEY not set") + + self.url = "https://api.tavily.com/search" + + def search(self, query: str, max_results: int = 3, search_depth: str = "basic"): + payload = { + "api_key": self.api_key, + "query": query, + "max_results": max_results, + "search_depth": search_depth, + } + + response = requests.post(self.url, json=payload, timeout=30) + response.raise_for_status() + return response.json() + + + + + + + + + + + + + + diff --git a/src/test_searcher.py b/src/test_searcher.py new file mode 100644 index 0000000..f5ccdd8 --- /dev/null +++ b/src/test_searcher.py @@ -0,0 +1,9 @@ +from src.agents.searcher_agent import SearcherAgent + +searcher = SearcherAgent() + +query = "Explain machine learning" +result = searcher.run(query) + +print("\n--- Tavily Research Output ---\n") +print(result) diff --git a/src/test_tavily.py b/src/test_tavily.py new file mode 100644 index 0000000..dd4a7b8 --- /dev/null +++ b/src/test_tavily.py @@ -0,0 +1,4 @@ +from src.services.tavily_client import TavilySearchService + +t = TavilySearchService() +print(t.search("What is machine learning?")) diff --git a/src/utils/export.py b/src/utils/export.py new file mode 100644 index 0000000..78d63a2 --- /dev/null +++ b/src/utils/export.py @@ -0,0 +1,45 @@ +import io +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer +from reportlab.lib.styles import getSampleStyleSheet +from reportlab.lib.pagesizes import A4 + + +def export_markdown(text: str) -> str: + """ + Export research paper as Markdown + """ + return text + + +def export_pdf(text: str) -> bytes: + """ + Export research paper as PDF using ReportLab + """ + buffer = io.BytesIO() + + doc = SimpleDocTemplate( + buffer, + pagesize=A4, + rightMargin=40, + leftMargin=40, + topMargin=40, + bottomMargin=40, + ) + + styles = getSampleStyleSheet() + story = [] + + for line in text.split("\n"): + if line.strip() == "": + story.append(Spacer(1, 12)) + else: + story.append(Paragraph(line, styles["Normal"])) + story.append(Spacer(1, 8)) + + doc.build(story) + buffer.seek(0) + return buffer.read() + + + + diff --git a/test_tavily_manual.py b/test_tavily_manual.py new file mode 100644 index 0000000..6e9c58f --- /dev/null +++ b/test_tavily_manual.py @@ -0,0 +1,16 @@ +from dotenv import load_dotenv +load_dotenv() + +from src.services.tavily_client import TavilySearchService + +t = TavilySearchService() + +results = t.search("What is cyber security") + +print("\n=== TAVILY TEST OUTPUT ===\n") + +for r in results: + print("Title:", r["title"]) + print("URL:", r["url"]) + print("Content:", r["content"][:200]) # first 200 chars + print("-" * 40) diff --git a/ui.py b/ui.py new file mode 100644 index 0000000..c739215 --- /dev/null +++ b/ui.py @@ -0,0 +1,99 @@ +import streamlit as st +from src.core.langgraph_pipeline import build_graph + +# -------------------------------------------------- +# Page config +# -------------------------------------------------- +st.set_page_config( + page_title="AI Research Assistant", + page_icon="📘", + layout="centered" +) + +st.title("📘 AI Research Assistant") +st.caption("Academic Research Paper Generator (Local Qwen LLM)") + +# -------------------------------------------------- +# Build graph once +# -------------------------------------------------- +@st.cache_resource +def load_graph(): + return build_graph() + +graph = load_graph() + +# -------------------------------------------------- +# Session state +# -------------------------------------------------- +if "paper" not in st.session_state: + st.session_state.paper = "" + +if "question" not in st.session_state: + st.session_state.question = "" + +# -------------------------------------------------- +# Input +# -------------------------------------------------- +question = st.chat_input("Ask a research question (academic topic)...") + +if question: + st.session_state.question = question + + with st.spinner("Generating research paper..."): + state = {"input": question} + result = graph.invoke(state) + + st.session_state.paper = result.get("final_answer", "") + +# -------------------------------------------------- +# DISPLAY QUESTION (THIS WAS MISSING) +# -------------------------------------------------- +if st.session_state.question: + st.markdown("### 🧠 Research Question") + st.markdown(f"**{st.session_state.question}**") + st.divider() + +# -------------------------------------------------- +# DISPLAY PAPER +# -------------------------------------------------- +if st.session_state.paper: + st.markdown(st.session_state.paper) + + st.divider() + + # -------------------------------------------------- + # Export buttons + # -------------------------------------------------- + st.download_button( + "📄 Export Markdown", + data=st.session_state.paper, + file_name="research_paper.md", + mime="text/markdown" + ) + + + + + + + + + + + + + + + + + + + + + + + + + + +