-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
132 lines (110 loc) · 5.29 KB
/
Copy pathapp.py
File metadata and controls
132 lines (110 loc) · 5.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import tempfile
import streamlit as st
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_mistralai import ChatMistralAI
from langchain_core.prompts import ChatPromptTemplate
# Load environment variables
load_dotenv()
st.set_page_config(page_title="RAG Document Chatbot", page_icon="📄", layout="wide")
st.title("📄 RAG Document Chatbot")
st.markdown("Upload a PDF document and ask questions about its content. This session uses a fresh database!")
# --- Session State Initialization ---
if "messages" not in st.session_state:
st.session_state.messages = []
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "current_file" not in st.session_state:
st.session_state.current_file = None
# --- Sidebar for File Upload ---
with st.sidebar:
st.header("Document Upload")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if st.button("Clear Session"):
st.session_state.messages = []
st.session_state.retriever = None
st.session_state.current_file = None
st.rerun()
if uploaded_file:
# If a new file is uploaded, process it
if st.session_state.current_file != uploaded_file.name:
st.session_state.current_file = uploaded_file.name
st.session_state.messages = [] # Clear chat history
with st.spinner("Processing document... (This might take a moment)"):
# Save uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
try:
# Load and split
loader = PyPDFLoader(tmp_file_path)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = splitter.split_documents(docs)
# Create embeddings and vectorstore (in-memory, no persist_directory)
embedding_model = HuggingFaceEmbeddings()
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embedding_model
)
# Create retriever
st.session_state.retriever = vectorstore.as_retriever(
search_type="mmr",
search_kwargs={
"k": 3,
"fetch_k": 10,
"lambda_mult": 0.5
}
)
st.success("Document processed successfully! You can now ask questions.")
except Exception as e:
st.error(f"Error processing file: {e}")
finally:
# Clean up the temporary file
os.unlink(tmp_file_path)
# --- Main Chat Interface ---
# Display chat history
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Chat Input
if query := st.chat_input("Ask a question about your document"):
if not st.session_state.retriever:
st.warning("Please upload a document first from the sidebar.")
else:
# Display user message
with st.chat_message("user"):
st.markdown(query)
st.session_state.messages.append({"role": "user", "content": query})
# Setup LLM and Prompt
llm = ChatMistralAI(model="mistral-small-latest")
prompt = ChatPromptTemplate.from_messages(
[
("system", """You are a helpful assistant that provides concise answers based on retrieved documents. If the retrieved documents do not contain enough information to answer the question, say "I could not find the information in the given document" """),
("human", "Context:{context}\n\nQuestion:{question}")
]
)
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
try:
# Retrieve documents
docs = st.session_state.retriever.invoke(query)
context = "\n\n".join([doc.page_content for doc in docs])
# Generate response
final_prompt = prompt.invoke({
"context": context,
"question": query
})
response = llm.invoke(final_prompt)
answer = response.content
st.markdown(answer)
st.session_state.messages.append({"role": "assistant", "content": answer})
except Exception as e:
st.error(f"An error occurred during response generation: {e}")