diff --git a/app.py b/app.py new file mode 100644 index 0000000..60a3861 --- /dev/null +++ b/app.py @@ -0,0 +1,996 @@ +import io +from datetime import datetime +from typing import Any, Dict, List, Optional +import streamlit as st +from planner_agent import PlannerAgent +from searcher_agent import SearcherAgent +from writer_agent import WriterAgent +def _render_css() -> None: + st.markdown( +"""""", + unsafe_allow_html=True, + ) +def _clean_report(report: str) -> str: + """ + Clean report to remove headings and any prompt text. + Preserves paragraph structure while ensuring natural flowing narrative. + """ + if not report: + return report + + lines = report.split('\n') + cleaned_lines = [] + skip_next_line = False + current_paragraph = [] + + # Find where the actual report content ends (before research materials) + report_content = [] + in_research_materials = False + + for line in lines: + line = line.strip() + + # Stop processing when we hit research materials section + if "research question:" in line.lower() or "research materials:" in line.lower(): + break + + # Skip empty lines (but preserve as paragraph breaks) + if not line: + if current_paragraph: # End current paragraph if we have content + cleaned_lines.append(' '.join(current_paragraph)) + current_paragraph = [] + continue + + # Skip if it's a heading (contains common heading words) + heading_indicators = [ + 'introduction:', 'conclusion:', 'analysis:', 'summary:', 'findings:', 'background:', + 'methodology:', 'results:', 'discussion:', 'how do', 'what are', 'what is', 'sources:', + 'advantages of', 'comparison with', 'different types of', 'training', 'rnns have', + 'combining', 'dropout and', 'rnns have been', 'impacts', 'challenges', + 'comparisons', 'future trends', 'how rnn', 'what are the impacts', + 'what are the challenges', 'how does', 'what are the future', 'body:', 'references:', + 'references', 'bibliography', 'appendix', 'acknowledgement', 'abstract' + ] + is_heading = any(line.lower().startswith(indicator) for indicator in heading_indicators) + + # Skip if it's prompt text (enhanced detection) + prompt_indicators = [ + 'report:', 'write a', 'please ensure', 'do not', 'weave all', 'unified narrative', 'cohesive passage', + 'requirements:', 'write exactly', 'each paragraph must', 'use complex sentence', 'ensure smooth transitions', + 'include in-text citations', 'do not include any instructions', 'only the report itself', + 'introduction: introduce topic', 'background: explain current', 'analysis: present key findings', + 'implications: discuss practical', 'conclusion: summarize and provide' + ] + is_prompt = any(indicator in line.lower() for indicator in prompt_indicators) + + # Skip reference citations like [1], [2], etc. + if line.startswith('[') and ']' in line and len(line) < 10: + continue + + if is_heading or is_prompt: + skip_next_line = True + continue + + if skip_next_line and line and len(line) < 50: # Skip short lines after headings + skip_next_line = False + continue + + skip_next_line = False + + # Add line to current paragraph + current_paragraph.append(line) + + # Add the last paragraph if we have content + if current_paragraph: + cleaned_lines.append(' '.join(current_paragraph)) + + # Join paragraphs with proper spacing + cleaned_report = '\n\n'.join(cleaned_lines) + + # Fix spacing issues + cleaned_report = cleaned_report.replace('. ', '. ').replace(' ', ' ') + + # Remove duplicate sentences while preserving paragraph structure + paragraphs = cleaned_report.split('\n\n') + unique_paragraphs = [] + seen_sentences = set() + + for paragraph in paragraphs: + sentences = paragraph.split('. ') + unique_sentences = [] + + for sentence in sentences: + sentence = sentence.strip() + if sentence and sentence not in seen_sentences: + unique_sentences.append(sentence) + seen_sentences.add(sentence) + + if unique_sentences: + unique_paragraphs.append('. '.join(unique_sentences)) + + cleaned_report = '\n\n'.join(unique_paragraphs) + + # Ensure it ends with a period + if cleaned_report and not cleaned_report.endswith('.'): + cleaned_report += '.' + + return cleaned_report + +def _pdf_bytes(title: str, subquestions: List[Dict[str, Any]], report: str) -> bytes: + try: + from reportlab.lib.pagesizes import LETTER + from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle + from reportlab.lib.units import inch + from reportlab.platypus import ListFlowable, ListItem, Paragraph, SimpleDocTemplate, Spacer + except Exception as e: + raise RuntimeError( + "PDF support requires 'reportlab'. Install it with: pip install reportlab" + ) from e + buf = io.BytesIO() + doc = SimpleDocTemplate( + buf, + pagesize=LETTER, + title=title, + author="OpenDeepResearcher", + ) + styles = getSampleStyleSheet() + + # Create custom styles with larger font + title_style = ParagraphStyle( + 'CustomTitle', + parent=styles['Title'], + fontSize=18, + spaceAfter=20, + textColor='#2c3e50' + ) + + heading_style = ParagraphStyle( + 'CustomHeading', + parent=styles['Heading2'], + fontSize=14, + spaceAfter=12, + spaceBefore=20, + textColor='#34495e' + ) + + body_style = ParagraphStyle( + 'CustomBody', + parent=styles['Normal'], + fontSize=12, + spaceAfter=12, + leading=16, # Line spacing + textColor='#2c3e50' + ) + + story = [] + story.append(Paragraph(title, title_style)) + story.append(Paragraph(datetime.now().strftime("Generated on %Y-%m-%d %H:%M"), body_style)) + story.append(Spacer(1, 12)) + story.append(Paragraph("Planner Output (Subquestions)", heading_style)) + sq_items: List[ListItem] = [] + for sq in subquestions: + qid = str(sq.get("id", "")) + qtype = str(sq.get("type", "")) + priority = str(sq.get("priority", "")) + text = str(sq.get("text", "")) + line = f"{qid} (priority {priority}, {qtype}): {text}".strip() + sq_items.append(ListItem(Paragraph(line, body_style))) + story.append(ListFlowable(sq_items, bulletType="bullet")) + story.append(Spacer(1, 14)) + story.append(Paragraph("Synthesis Report", heading_style)) + + # Split report into paragraphs and add each as separate paragraph + paragraphs = report.split('\n\n') # Split by paragraph breaks, not sentences + for paragraph in paragraphs: + paragraph = paragraph.strip() + if paragraph: + # Add period if it doesn't end with one + if not paragraph.endswith('.'): + paragraph += '.' + story.append(Paragraph(paragraph, body_style)) + story.append(Spacer(1, 8)) + + doc.build(story) + return buf.getvalue() + +def _init_session() -> None: + if "planner_result" not in st.session_state: + st.session_state.planner_result = None + if "subquestions" not in st.session_state: + st.session_state.subquestions = None + if "report" not in st.session_state: + st.session_state.report = None + if "last_topic" not in st.session_state: + st.session_state.last_topic = "" +def _render_header() -> None: + st.markdown( + """
+
+
+
+
+

OpenDeepResearcher

+
+
+⭐ +Premium AI Research Platform +
+
+
+
+
+✨ Where Curiosity Meets Clarity +
+
+Transform your research questions into comprehensive, beautifully crafted reports with AI-powered precision and elegant presentation. +
+
+
+
šŸŽÆ
+
Precise Analysis
+
+
+
šŸ“š
+
Comprehensive Reports
+
+
+
✨
+
Beautiful Results
+
+
+
+
+
+šŸš€ +Start Your Research Journey +
+
+
+
+ """, + unsafe_allow_html=True, + ) +def _section_header(title: str, subtitle: str) -> None: + st.markdown( + f"""
+

{title}

+

{subtitle}

+
""", + unsafe_allow_html=True, + ) +def _render_subquestions(subquestions: List[Dict[str, Any]]) -> None: + st.markdown("
", unsafe_allow_html=True) + # Dramatic header with visual element + st.markdown( + f"""
+
+ 🌿 + Research Plan + {len(subquestions)} research questions crafted +
+
""", + unsafe_allow_html=True, + ) + tab_table, tab_cards = st.tabs([" Table View", "šŸŽ“ Card View"]) + with tab_table: + try: + import pandas as pd + rows = [ + { + "#": i + 1, + "ID": sq.get('id', f'q{i+1}'), + "Priority": sq.get('priority', 'N/A'), + "Type": sq.get('type', 'N/A'), + "Subquestion": sq.get('text', ''), + } + for i, sq in enumerate(subquestions) + ] + st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True) + except ImportError: + st.info("Please install pandas to view the table format.") + with tab_cards: + for i, sq in enumerate(subquestions): + # Add staggered animation delay + delay = i * 0.1 + st.markdown( + f"""
+
+
+
+ {i + 1}. {sq.get('text')} +
+
+ + šŸŽÆ + Priority: {sq.get('priority', 'N/A')} + + + šŸ“‚ + Type: {sq.get('type', 'N/A')} + + + #ļøāƒ£ + ID: {sq.get('id', 'N/A')} + +
+
+
+
""", + unsafe_allow_html=True + ) + # Add fade in animation + st.markdown("""""", + unsafe_allow_html=True, + ) +def main() -> None: + st.set_page_config( + page_title="OpenDeepResearcher", + page_icon="šŸ“š", + layout="wide", + initial_sidebar_state="expanded", + ) + _init_session() + _render_css() + _render_header() + with st.sidebar: + st.markdown( +"""
+
+ āš™ļø + Configuration +
+
""", + unsafe_allow_html=True, + ) + st.markdown( +"""
+
+ ā˜• Local Model Setup +
+
+ LM Studio must be running at http://127.0.0.1:1234 to serve the local model. +
+
""", + unsafe_allow_html=True, + ) + # --- Main Content Sections with enhanced drama --- + st.markdown("
", unsafe_allow_html=True) + # Enhanced section header with more drama + st.markdown("""
+

+ 🌿 Begin Your Research Journey +

+

+ Share your curiosity and we'll transform it into comprehensive insights with elegant precision. +

+
""", + unsafe_allow_html=True, + ) + # Dramatic floating input area with no white box + st.markdown( +"""
+
+ ā˜• + What would you like to explore today? +
+
+ Share your research topic and we'll craft a comprehensive analysis with AI-powered precision. +
+
+
+ šŸŽÆ + Precise +
+
+ šŸ“š + Comprehensive +
+
+ ✨ + Beautiful +
+
+
""", + unsafe_allow_html=True, + ) + # Enhanced text area with dramatic styling + st.markdown( +"""
+
+
+ šŸ“ + Enter Your Topic +
+
+
""", + unsafe_allow_html=True, + ) + topic = st.text_area( + "Enter your research topic", + value=st.session_state.last_topic, + placeholder="🌿 The evolution of sustainable architecture in urban environments (2010-2024)\n\n✨ Share your curiosity and let AI craft comprehensive insights...", + height=160, + label_visibility="collapsed", + ) + # Dramatic button area with enhanced visual effects + st.markdown( +"""
+
+
+
+
+
+
+
+
+
+
""", + unsafe_allow_html=True, + ) + col1, col2, col3 = st.columns([1, 1.2, 1]) + with col1: + st.markdown("
", unsafe_allow_html=True) + with col2: + plan_clicked = st.button("šŸš€ Plan Research", type="primary", use_container_width=True) + with col3: + clear_clicked = st.button("Clear All", use_container_width=True) + # Dramatic visual flow indicator + st.markdown( +"""
+
+ ā˜• + Curiosity +
+
→
+
+ 🌿 + Research +
+
→
+
+ šŸ“– + Insights +
+
""", + unsafe_allow_html=True, + ) + if clear_clicked: + st.session_state.planner_result = None + st.session_state.subquestions = None + st.session_state.report = None + st.session_state.last_topic = "" + st.rerun() + if plan_clicked: + if not topic.strip(): + st.error("Please provide a research topic to begin.") + else: + st.session_state.last_topic = topic + st.session_state.report = None + with st.spinner("Analyzing topic and creating research plan..."): + planner = PlannerAgent() + st.session_state.planner_result = planner.plan(topic.strip()) + st.session_state.subquestions = st.session_state.planner_result.get("subquestions", []) + if st.session_state.subquestions: + _render_subquestions(st.session_state.subquestions) + # --- Generate Report Section --- + if st.session_state.subquestions: + st.markdown("
", unsafe_allow_html=True) + st.markdown("
", unsafe_allow_html=True) + _section_header( + "Craft Your Comprehensive Report", + "Transform your research questions into a beautifully structured, citation-ready document with AI-powered synthesis." + ) + # Centered generate button with warm styling + st.markdown( +"""
+
""", + unsafe_allow_html=True, + ) + generate_clicked = st.button( + " Generate Full Report", + type="primary", + use_container_width=True + ) + st.markdown("""""", + unsafe_allow_html=True, + ) + if generate_clicked: + with st.spinner("🌿 Researching sources and crafting insights..."): + searcher = SearcherAgent() + search_results = searcher.search_all(st.session_state.subquestions) + writer = WriterAgent() + raw_report = writer.synthesize_report( + research_question=topic.strip(), + subquestions=st.session_state.subquestions, + search_results=search_results, + ) + # Clean the report to remove headings and prompt text + st.session_state.report = _clean_report(raw_report) + if st.session_state.report: + # Dramatic success indicator + st.markdown( +"""
+
+ šŸŽ‰ + Report Crafted Successfully! +
+
""", + unsafe_allow_html=True, + ) + # Enhanced download section only - no report display + st.markdown("
", unsafe_allow_html=True) + st.markdown( +"""
+

šŸ’¾ Download Your Report

+
""", + unsafe_allow_html=True, + ) + pdf_data = _pdf_bytes( + title=f"Research Report: {topic.strip()}", + subquestions=st.session_state.subquestions, + report=st.session_state.report + ) + st.download_button( + label="šŸ“„ Download as PDF", + data=pdf_data, + file_name=f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf", + mime="application/pdf", + use_container_width=True + ) + st.markdown("
", unsafe_allow_html=True) # Close section + st.markdown("", unsafe_allow_html=True) # Close section + st.markdown("", unsafe_allow_html=True) # Close section + st.markdown("", unsafe_allow_html=True) # Close section +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/planner_agent.py b/planner_agent.py new file mode 100644 index 0000000..6102c22 --- /dev/null +++ b/planner_agent.py @@ -0,0 +1,342 @@ +""" +planner_agent.py + +Planner Agent - Uses LM Studio to split user questions into unique subquestions. +Calls the running LM Studio server to generate intelligent subquestions for each input. +""" + +import json +import re +import uuid +import requests +from datetime import datetime +from typing import Dict, Any, List, Optional + + +class PlannerAgent: + """ + Planner Agent that uses LM Studio to generate unique subquestions + for each user query. Connects to the running LM Studio server. + """ + + def __init__(self): + self.api_url = "http://127.0.0.1:1234/v1/chat/completions" + self.model_name = "local-model" + self.question_id = str(uuid.uuid4()) + self.created_at = datetime.utcnow().isoformat() + "Z" + + def _build_system_prompt(self) -> str: + """Kept for backward compatibility but not used in agent-based mode.""" + return "Agent-based planner: no system prompt needed." + + def _call_lm_studio(self, messages: List[Dict[str, str]]) -> str: + """ + Call LM Studio server and return the response text. + """ + try: + # messages: list of {role,content} + payload = { + "model": self.model_name, + "messages": messages, + # allow caller to embed temperature/max_tokens in messages if needed + "temperature": 0.3, + "max_tokens": 1024, + } + print("[planner] Sending request to LM Studio (may take a moment)...") + response = requests.post(self.api_url, json=payload, timeout=180) + + if response.status_code != 200: + print(f"[planner] LM Studio error: {response.status_code}: {response.text[:200]}") + return "" + + data = response.json() + return data["choices"][0]["message"]["content"].strip() + except requests.exceptions.Timeout: + print("[planner] Warning: LM Studio took too long to respond. The model may be slow.") + print("[planner] Try again or check if the server is overloaded.") + return "" + except requests.exceptions.ConnectionError: + print("[planner] Error: Cannot connect to LM Studio on port 1234. Make sure the server is running.") + return "" + except Exception as e: + print(f"[planner] Error calling LM Studio: {e}") + return "" + + def _generate_subquestions_lm(self, user_prompt: str) -> List[Dict[str, Any]]: + """ + Use LM Studio to intelligently break down the user's question into subquestions. + Returns all subquestions in ONE call (no retries). + """ + # Strong instruction with a concrete example to encourage correct JSON output + system_prompt = ( + "You are a research planning expert. Your single task is to break a research question into EXACTLY 6-8 " + "atomic, focused subquestions. Each subquestion must be answerable and non-overlapping. " + "Return ONLY a JSON array (no markdown, no explanation). Each item must be an object: " + "{\"id\": \"q1\", \"text\": \"...\", \"priority\": 1, \"type\": \"analysis\"} . " + "Types allowed: background, definition, analysis, methodology, causal, impact, comparative, historical. " + "Prioritize by importance (1 = highest). Keep each subquestion concise (<=140 chars). " + "IMPORTANT: You MUST generate exactly 6-8 subquestions, not 1-2." + ) + + # Provide a generic example without specific content + example = ( + "EXAMPLE:\nUser prompt: \"What are the main impacts of artificial intelligence?\"\n" + "Output JSON:\n" + "[\n {\"id\": \"q1\", \"text\": \"What are the main types of AI technologies?\", \"priority\": 1, \"type\": \"background\"},\n" + " {\"id\": \"q2\", \"text\": \"How do AI systems process information?\", \"priority\": 2, \"type\": \"methodology\"},\n" + " {\"id\": \"q3\", \"text\": \"What are the economic impacts of AI adoption?\", \"priority\": 3, \"type\": \"impact\"},\n" + " {\"id\": \"q4\", \"text\": \"What are the ethical challenges of AI implementation?\", \"priority\": 4, \"type\": \"analysis\"},\n" + " {\"id\": \"q5\", \"text\": \"How does AI compare to traditional computing methods?\", \"priority\": 5, \"type\": \"comparative\"},\n" + " {\"id\": \"q6\", \"text\": \"What are the future trends in AI development?\", \"priority\": 6, \"type\": \"historical\"}\n]" + ) + + user_message = ( + f"Produce EXACTLY 6-8 focused subquestions for the research question below. Output ONLY the JSON array with all subquestions.\n\n" + f"Research question:\n{user_prompt}\n\nPlease ensure items are concise, non-overlapping, and answerable. You must generate 6-8 subquestions." + ) + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": example + "\n\n" + user_message} + ] + + response_text = self._call_lm_studio(messages) + + if not response_text: + print("[planner] LM Studio returned empty response.") + # Generate multiple fallback subquestions instead of just one + return [ + {"id": "q1", "text": f"What is the background of {user_prompt}?", "priority": 1, "type": "background"}, + {"id": "q2", "text": f"How does {user_prompt} work?", "priority": 2, "type": "methodology"}, + {"id": "q3", "text": f"What are the impacts of {user_prompt}?", "priority": 3, "type": "impact"}, + {"id": "q4", "text": f"What are the challenges with {user_prompt}?", "priority": 4, "type": "analysis"}, + {"id": "q5", "text": f"How does {user_prompt} compare to alternatives?", "priority": 5, "type": "comparative"}, + {"id": "q6", "text": f"What are the future trends for {user_prompt}?", "priority": 6, "type": "historical"} + ] + + # Try to parse JSON from response + parsed = None + try: + parsed = json.loads(response_text) + except json.JSONDecodeError: + # try to extract JSON array + m = re.search(r"\[.*\]", response_text, re.DOTALL) + if m: + try: + parsed = json.loads(m.group(0)) + except json.JSONDecodeError: + parsed = None + + if isinstance(parsed, list) and len(parsed) > 0: + return parsed + + # Fallback: return multiple subquestions if parsing fails + print(f"[planner] Could not parse JSON from LM Studio. Raw output:\n{response_text[:500]}") + return [ + {"id": "q1", "text": f"What is the background of {user_prompt}?", "priority": 1, "type": "background"}, + {"id": "q2", "text": f"How does {user_prompt} work?", "priority": 2, "type": "methodology"}, + {"id": "q3", "text": f"What are the impacts of {user_prompt}?", "priority": 3, "type": "impact"}, + {"id": "q4", "text": f"What are the challenges with {user_prompt}?", "priority": 4, "type": "analysis"}, + {"id": "q5", "text": f"How does {user_prompt} compare to alternatives?", "priority": 5, "type": "comparative"}, + {"id": "q6", "text": f"What are the future trends for {user_prompt}?", "priority": 6, "type": "historical"} + ] + + @staticmethod + def extract_keywords(text: str) -> List[str]: + """Extract meaningful keywords from text.""" + # Remove common stop words + stop_words = { + 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', + 'of', 'with', 'by', 'from', 'is', 'are', 'was', 'were', 'be', 'been', + 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', + 'could', 'should', 'may', 'might', 'must', 'can', 'what', 'which', + 'who', 'when', 'where', 'why', 'how', 'this', 'that', 'these', 'those' + } + + # Extract words (remove punctuation) + words = re.findall(r'\b[a-z]+\b', text.lower()) + keywords = [w for w in words if w not in stop_words and len(w) > 3] + return list(set(keywords))[:10] # Return unique keywords, max 10 + + @staticmethod + def detect_question_type(text: str) -> str: + """Detect the type of research question.""" + text_lower = text.lower() + + if any(word in text_lower for word in ['how', 'method', 'process', 'way']): + return 'methodology' + elif any(word in text_lower for word in ['why', 'cause', 'reason', 'effect']): + return 'causal' + elif any(word in text_lower for word in ['what', 'definition', 'mean', 'is']): + return 'definition' + elif any(word in text_lower for word in ['compare', 'difference', 'versus', 'vs']): + return 'comparative' + elif any(word in text_lower for word in ['trend', 'history', 'evolution', 'over time']): + return 'historical' + elif any(word in text_lower for word in ['impact', 'effect', 'consequence', 'result']): + return 'impact' + else: + return 'analysis' + + @staticmethod + def split_into_subquestions(user_prompt: str) -> List[Dict[str, Any]]: + """ + Delegates to LM Studio for intelligent subquestion generation. + """ + agent = PlannerAgent() + return agent._generate_subquestions_lm(user_prompt) + + @staticmethod + def create_research_plan(subquestions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Create a structured research plan based on subquestions. + """ + plan = [] + step = 1 + + # Search step for each subquestion + for subq in subquestions: + search_target = ["web", "academic"] + if subq["type"] in ["historical", "trend"]: + search_target.append("archive") + + plan.append({ + "step": step, + "action": "search", + "target": search_target, + "query_template": f"{subq['text']} -wiki", + "expected_results": 5, + "depends_on": [subq["id"]] + }) + step += 1 + + # Synthesis step + subq_ids = [sq["id"] for sq in subquestions] + plan.append({ + "step": step, + "action": "synthesize", + "depends_on": subq_ids, + "method": "cross-reference analysis" + }) + step += 1 + + # Validation step + plan.append({ + "step": step, + "action": "validate", + "depends_on": ["synthesis"], + "method": "fact-check against multiple sources" + }) + + return plan + + @staticmethod + def extract_constraints(user_prompt: str) -> Dict[str, Any]: + """ + Extract constraints from the user prompt. + """ + constraints = { + "time_horizon": None, + "source_types": ["peer-reviewed", "official", "news"], + "domain_focus": None + } + + # Check for date ranges + date_pattern = r'(\d{4})\s*(?:to|-|–)\s*(\d{4})' + date_match = re.search(date_pattern, user_prompt) + if date_match: + constraints["time_horizon"] = f"{date_match.group(1)}-01-01 to {date_match.group(2)}-12-31" + + # Check for specific source preferences + if any(word in user_prompt.lower() for word in ['scholarly', 'academic', 'peer-reviewed']): + constraints["source_types"] = ["peer-reviewed"] + elif any(word in user_prompt.lower() for word in ['news', 'recent']): + constraints["source_types"] = ["news", "official"] + + # Extract domain focus + keywords = PlannerAgent.extract_keywords(user_prompt) + if keywords: + constraints["domain_focus"] = keywords[0] + + return constraints + + def plan(self, user_prompt: str) -> Dict[str, Any]: + """ + Generate a planner JSON using LM Studio to generate intelligent subquestions. + + Returns the parsed planner JSON (and writes it to file). + """ + print(f"\n[planner] Processing question: {user_prompt[:100]}...") + print("[planner] Calling LM Studio to generate subquestions...") + + # Generate unique subquestions using LM Studio + subquestions = self._generate_subquestions_lm(user_prompt) + print(f"\n[planner] Generated {len(subquestions)} subquestions:") + print("-" * 70) + + for sq in subquestions: + priority = sq.get("priority", "?") + q_type = sq.get("type", "?") + text = sq.get("text", "") + print(f" [{sq['id']} - Priority {priority} - {q_type}] {text}") + + print("-" * 70) + + # Create research plan + plan = self.create_research_plan(subquestions) + print(f"[planner] Created research plan with {len(plan)} steps") + + # Extract constraints + constraints = self.extract_constraints(user_prompt) + print("[planner] Extracted constraints from prompt") + + # Build the final JSON output + result = { + "question_id": self.question_id, + "original_prompt": user_prompt, + "summary": user_prompt.split('.')[0] + "." if '.' in user_prompt else user_prompt, + "subquestions": subquestions, + "plan": plan, + "constraints": constraints, + "metadata": { + "planner_version": "v2-lm-studio", + "created_at": self.created_at, + "method": "LM Studio inference" + } + } + + # Save to disk + filename = f"planner_output_{self.question_id}.json" + with open(filename, "w", encoding="utf-8") as f: + json.dump(result, f, indent=2, ensure_ascii=False) + print(f"[planner] Saved planner output to: {filename}\n") + + return result + + +if __name__ == "__main__": + agent = PlannerAgent() + + print("=" * 70) + print("Planner Agent (Using LM Studio Server)") + print("Generates intelligent subquestions using the loaded model.") + print("=" * 70) + print("Make sure LM Studio is running with port 1234 active!") + print("=" * 70) + + try: + while True: + q = input("\nEnter your research question → ").strip() + if not q: + continue + if q.lower() in ("exit", "quit"): + print("Exiting.") + break + try: + result = agent.plan(q) + except Exception as e: + print(f"[planner] Error: {e}") + import traceback + traceback.print_exc() + except KeyboardInterrupt: + print("\nInterrupted. Bye.") diff --git a/searcher_agent.py b/searcher_agent.py new file mode 100644 index 0000000..0391e35 --- /dev/null +++ b/searcher_agent.py @@ -0,0 +1,122 @@ +import requests +import json +from typing import List, Dict, Any +from planner_agent import PlannerAgent + +class SearcherAgent: + """ + Searcher Agent: Uses Tavily API to retrieve relevant, up-to-date content for each subquestion. + """ + + def __init__(self, api_key: str = "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"): + self.api_key = api_key + self.base_url = "https://api.tavily.com/search" + + def search_subquestion(self, subquestion: str) -> List[Dict[str, Any]]: + """ + Search for relevant sources using Tavily API for a single subquestion. + Returns a list of source dictionaries with title, url, content (snippet), and score. + """ + payload = { + "api_key": self.api_key, + "query": subquestion, + "search_depth": "advanced", + "include_images": False, + "include_answer": False, + "include_raw_content": False, + "max_results": 5, + "include_domains": [], + "exclude_domains": [] + } + + try: + response = requests.post(self.base_url, json=payload, timeout=30) + response.raise_for_status() + data = response.json() + results = data.get("results", []) + sources = [] + for result in results: + sources.append({ + "title": result.get("title", ""), + "url": result.get("url", ""), + "content": result.get("content", ""), + "score": result.get("score", 0.0) + }) + return sources + except requests.RequestException as e: + print(f"Error searching for '{subquestion}': {e}") + return [] + + def search_all(self, subquestions: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]: + """ + Search for sources for all subquestions. + Handles both dict and string types in subquestions list. + Returns a dict where keys are subquestion IDs and values are lists of sources. + Enhanced to display detailed source information in terminal. + """ + results = {} + print("\n" + "="*80) + print("SEARCH AGENT RESULTS - DETAILED SOURCE INFORMATION") + print("="*80) + + for subq in subquestions: + qid = subq["id"] + text = subq["text"] + print(f"\nšŸ” Searching for subquestion {qid}: {text}") + sources = self.search_subquestion(text) + results[qid] = sources + + print(f"\nšŸ“š Found {len(sources)} sources for {qid}:") + print("-" * 60) + + for i, source in enumerate(sources, 1): + print(f"\nšŸ“„ Source {i}:") + print(f" Title: {source['title']}") + print(f" URL: {source['url']}") + print(f" Content: {source['content'][:300]}{'...' if len(source['content']) > 300 else ''}") + print(f" Score: {source['score']}") + print(f" {'─' * 50}") + + print(f"\nāœ… Search completed for {len(subquestions)} subquestions") + print("="*80) + return results + +if __name__ == "__main__": + # First, run the planner agent to get subquestions + print("Enter your research question:") + research_question = input("> ").strip() + if not research_question: + print("No question provided.") + exit(1) + + print("[system] Running planner agent...") + planner = PlannerAgent() + result = planner.plan(research_question) + subquestions = result.get("subquestions", []) + print(f"[system] Planner generated {len(subquestions)} subquestions.") + + print("Subquestions:") + for i, subq in enumerate(subquestions): + if isinstance(subq, dict): + qid = subq.get("id", f"q{i+1}") + qtype = subq.get("type", "unknown") + text = subq.get("text", str(subq)) + print(f" {i+1}. [{qid} - {qtype}] {text}") + else: + print(f" {i+1}. {subq}") + print() + + # Now, run the searcher agent on those subquestions + print("[system] Running searcher agent...") + agent = SearcherAgent() # Uses default API key + + results = agent.search_all(subquestions) + print("\nSearch Results:") + for qid, sources in results.items(): + print(f"\n{qid}:") + for source in sources: + print(f" Title: {source['title']}") + print(f" URL: {source['url']}") + print(f" Content: {source['content'][:500]}{'...' if len(source['content']) > 500 else ''}") + print(f" Score: {source['score']}") + print(" ---") \ No newline at end of file diff --git a/writer_agent.py b/writer_agent.py new file mode 100644 index 0000000..5750c3f --- /dev/null +++ b/writer_agent.py @@ -0,0 +1,388 @@ +import requests +import json +from typing import List, Dict, Any +from planner_agent import PlannerAgent +from searcher_agent import SearcherAgent + +class WriterAgent: + """ + Writer Agent: Synthesizes retrieved data from searcher into structured, coherent summaries using LM Studio. + """ + + def __init__(self): + self.api_url = "http://127.0.0.1:1234/v1/chat/completions" + self.model_name = "local-model" + + def _call_lm_studio(self, messages: List[Dict[str, str]]) -> str: + """ + Call LM Studio to generate synthesis report - LM Studio output only. + """ + payload = { + "model": self.model_name, + "messages": messages, + "temperature": 0.3, + "max_tokens": 3072, # Increased for full-page content + "stream": False + } + + # LM Studio only - no fallbacks + timeout = 600 # Back to 10 minutes with optimized settings + print(f"[writer] Generating report with LM Studio (timeout: {timeout}s)...") + + # Use raw connection for better control + response_text = "" + try: + response = requests.post( + self.api_url, + json=payload, + timeout=timeout, + headers={'Connection': 'close'} # Force connection close + ) + + # Get raw response text + response_text = response.text + print(f"[writer] LM Studio response received! Length: {len(response_text)} chars") + + # Parse JSON manually + try: + data = json.loads(response_text) + if "choices" in data and len(data["choices"]) > 0 and "message" in data["choices"][0]: + content = data["choices"][0]["message"]["content"] + print(f"[writer] Report generation completed successfully!") + return content + else: + print(f"[writer] Invalid response structure: missing 'choices' or 'message' field") + print(f"[writer] Response data: {str(data)[:500]}...") + return f"LM Studio returned invalid response structure. Missing 'choices' or 'message' field." + except (KeyError, IndexError, TypeError) as struct_error: + print(f"[writer] Response structure error: {struct_error}") + print(f"[writer] Response data: {response_text[:500]}...") + return f"LM Studio response structure error: {str(struct_error)}" + + except json.JSONDecodeError as e: + print(f"[writer] JSON decode error: {e}") + print(f"[writer] Raw response: {response_text[:500]}...") + # Try to extract content from partial JSON + if response_text and '"content"' in response_text: + try: + import re + content_match = re.search(r'"content":\s*"([^"]*(?:\\.[^"]*)*)"', response_text) + if content_match: + content = content_match.group(1).replace('\\"', '"').replace('\\n', '\n') + print(f"[writer] Extracted content from partial JSON") + return content + except Exception as extract_error: + print(f"[writer] Content extraction failed: {extract_error}") + return f"LM Studio response error: {str(e)}" + except requests.exceptions.ReadTimeout as e: + print(f"[writer] Read timeout occurred but content may be available: {e}") + # Try to extract partial content if available + if response_text: + print(f"[writer] Attempting to extract content from {len(response_text)} characters of response...") + print(f"[writer] Response preview: {response_text[:200]}...") + + try: + # Try to parse as JSON first + data = json.loads(response_text) + if "choices" in data and len(data["choices"]) > 0: + content = data["choices"][0]["message"]["content"] + print(f"[writer] Successfully extracted content from timeout response! Length: {len(content)} characters") + return content + except json.JSONDecodeError as je: + print(f"[writer] JSON parsing failed: {je}") + # If JSON parsing fails, try multiple extraction methods + content_extracted = None + + # Method 1: Try to find content field with regex + try: + import re + # More robust regex that handles escaped quotes and newlines + content_patterns = [ + r'"content":\s*"((?:[^"\\]|\\.)*)"', # Basic pattern + r'"content":\s*"([^"]*(?:\\.[^"]*)*)"', # Extended pattern + r'"content":\s*"(.+?)"(?=\s*,|\s*})', # Until next comma or brace + ] + + for pattern in content_patterns: + content_match = re.search(pattern, response_text, re.DOTALL) + if content_match: + content = content_match.group(1) + content = content.replace('\\"', '"').replace('\\n', '\n').replace('\\t', '\t') + if len(content) > 100: # Only accept if substantial + content_extracted = content + print(f"[writer] Extracted content using regex pattern! Length: {len(content)} characters") + break + except Exception as regex_error: + print(f"[writer] Regex extraction failed: {regex_error}") + + # Method 2: Try to extract from the actual response structure + if not content_extracted: + try: + # Look for the actual content between assistant tags + assistant_match = re.search(r'<\|im_start\|>assistant\n(.*?)<\|im_end\|>', response_text, re.DOTALL) + if assistant_match: + content = assistant_match.group(1).strip() + if len(content) > 100: + content_extracted = content + print(f"[writer] Extracted content from assistant tags! Length: {len(content)} characters") + except Exception as tag_error: + print(f"[writer] Tag extraction failed: {tag_error}") + + # Method 3: Last resort - extract everything after the last assistant tag + if not content_extracted: + try: + last_assistant_pos = response_text.rfind('assistant\n') + if last_assistant_pos != -1: + content = response_text[last_assistant_pos + 21:].strip() + # Remove any trailing tags + content = re.sub(r'<\|im_end\|>.*$', '', content, flags=re.DOTALL) + if len(content) > 100: + content_extracted = content + print(f"[writer] Extracted content using fallback method! Length: {len(content)} characters") + except Exception as fallback_error: + print(f"[writer] Fallback extraction failed: {fallback_error}") + + # Method 4: Try to extract any text that looks like a report + if not content_extracted: + try: + # Look for text that starts with a capital letter and contains multiple sentences + text_patterns = [ + r'([A-Z][^.!?]*[.!?](?:\s+[A-Z][^.!?]*[.!?]){2,})', # Multiple sentences + r'([A-Z][^.!?]*[.!?]\s+[A-Z][^.!?]*[.!?])', # At least 2 sentences + ] + + for pattern in text_patterns: + text_match = re.search(pattern, response_text, re.DOTALL) + if text_match: + content = text_match.group(1).strip() + if len(content) > 200: # Substantial content + content_extracted = content + print(f"[writer] Extracted content using text pattern! Length: {len(content)} characters") + break + except Exception as text_error: + print(f"[writer] Text pattern extraction failed: {text_error}") + + if content_extracted: + print(f"[writer] Successfully extracted LM Studio generated content despite timeout!") + return content_extracted + else: + print(f"[writer] Could not extract substantial content from timeout response") + print(f"[writer] Full response preview: {response_text[:1000]}...") + + # If we can't extract content, provide a helpful message + return f"LM Studio timeout after {timeout}s, but content was generated. Please try again or check LM Studio logs for the complete response." + except Exception as e: + print(f"[writer] LM Studio connection error: {e}") + return f"LM Studio connection error: {str(e)}" + + def _generate_fallback_report(self, messages: List[Dict[str, str]]) -> str: + """ + Generate a basic structured report as fallback. + """ + try: + # Extract research question from user message + research_question = "Unknown Research Topic" + for message in messages: + if message["role"] == "user": + lines = message["content"].split("\n") + for line in lines: + if "Research Question:" in line: + research_question = line.split("Research Question:")[1].strip() + break + break + + # Create a structured 5-paragraph fallback report + report = f"""The field of {research_question.lower()} represents a significant area of contemporary research and practice. This comprehensive analysis examines the fundamental aspects and implications of the subject matter, drawing upon available scholarly sources and empirical evidence. The importance of understanding this topic has grown substantially in recent years, reflecting its relevance to both theoretical frameworks and practical applications. Researchers and practitioners alike recognize the need for thorough investigation into the various dimensions and complexities that characterize this domain. Such inquiry provides valuable insights that can inform future developments and strategic decision-making processes across multiple contexts. + +Current research indicates that {research_question.lower()} has evolved through several distinct phases of development and refinement. Existing literature demonstrates a progression from early theoretical foundations to more sophisticated contemporary approaches that integrate advanced methodologies and technologies. The historical context reveals how various factors have influenced the trajectory of research in this area, including technological advancements, changing societal needs, and evolving theoretical paradigms. This evolutionary process has resulted in a rich tapestry of knowledge that continues to expand and diversify as new discoveries emerge. Understanding this developmental trajectory is essential for contextualizing current research efforts and identifying promising directions for future investigation. + +The analysis of available sources reveals several key findings that highlight the multifaceted nature of {research_question.lower()}. Research evidence consistently demonstrates the interconnected relationships between various components and subsystems within this domain, suggesting the need for holistic approaches to understanding and implementation. Empirical studies have identified critical success factors and potential challenges that must be addressed to achieve optimal outcomes in practice. Furthermore, the integration of theoretical frameworks with practical applications has yielded valuable insights into effective strategies and methodologies. These findings collectively contribute to a more nuanced understanding of the subject matter and provide a foundation for evidence-based decision-making. + +The practical implications of {research_question.lower()} extend across numerous sectors and applications, making it a critical area of focus for policymakers, practitioners, and researchers. Implementation challenges and opportunities vary significantly depending on contextual factors, including organizational structures, resource availability, and stakeholder engagement. The potential benefits of successful application include improved efficiency, enhanced effectiveness, and sustainable outcomes that align with broader strategic objectives. However, realizing these benefits requires careful consideration of various constraints and limitations that may impact implementation processes. Stakeholders must therefore adopt strategic approaches that balance innovation with practical considerations to maximize positive outcomes while minimizing potential risks and challenges. + +Future research directions in {research_question.lower()} should focus on addressing current knowledge gaps and exploring emerging opportunities for advancement. Interdisciplinary collaboration and methodological innovation will be essential for advancing understanding and developing more sophisticated approaches to complex challenges. The integration of emerging technologies and novel theoretical frameworks offers promising avenues for future investigation and application. Additionally, increased emphasis on longitudinal studies and cross-cultural comparisons can provide valuable insights into the generalizability and transferability of findings across different contexts. Such continued research efforts will contribute to the ongoing evolution of the field and support the development of more effective and sustainable solutions. + +Note: This is a fallback report generated due to technical difficulties with the AI service. Please try again for a comprehensive analysis with current research data and citations.""" + return report + except Exception as e: + print(f"[writer] Fallback generation error: {e}") + return f"Error generating report. Please try again. Technical details: {str(e)}" + + def _generate_immediate_fallback(self, research_question: str, subquestions: List[Dict[str, Any]]) -> str: + """ + Generate an immediate fallback report when search results are empty. + """ + try: + # Create a structured 5-paragraph immediate fallback report + report = f"""The investigation into {research_question.lower()} represents an important scholarly endeavor that seeks to advance understanding in this significant area of study. This comprehensive analysis aims to explore the fundamental dimensions and implications of the subject matter through systematic examination of available evidence and theoretical frameworks. The relevance of this research has become increasingly apparent in contemporary academic and professional contexts, reflecting growing recognition of its importance across multiple domains of practice. Scholars and practitioners continue to emphasize the critical need for thorough investigation into the various aspects and complexities that characterize this field. Such systematic inquiry provides essential insights that can inform theoretical development and practical applications in meaningful ways. + +The current state of knowledge regarding {research_question.lower()} reveals a complex landscape of theoretical perspectives and empirical findings that have evolved over time. Contemporary literature demonstrates significant progress in understanding the underlying mechanisms and principles that govern phenomena within this domain of study. Historical developments have laid important groundwork for current research efforts, establishing foundational concepts and methodological approaches that continue to inform scholarly investigation. The evolution of thought in this area reflects broader trends in academic research, including increased emphasis on interdisciplinary collaboration and methodological sophistication. Understanding this historical and theoretical context is crucial for situating current research within the broader scholarly conversation and identifying promising directions for future investigation. + +Analysis of the subject matter reveals several critical insights that contribute to a more nuanced understanding of {research_question.lower()}. Theoretical frameworks provide valuable lenses through which to examine complex phenomena and identify patterns that might otherwise remain obscured. Empirical evidence, while limited in this case, suggests important relationships between various factors and outcomes that warrant further investigation. Methodological approaches employed in studying this topic have become increasingly sophisticated, incorporating advanced analytical techniques and innovative research designs. These developments collectively enhance our capacity to generate meaningful insights and advance knowledge in ways that can inform both theory and practice across multiple contexts. + +The practical implications of research in {research_question.lower()} extend far beyond academic considerations, influencing policy development, professional practice, and organizational decision-making processes. Implementation of research findings requires careful attention to contextual factors that may affect the transferability and applicability of theoretical insights to real-world situations. Stakeholders across various sectors must consider the potential benefits and challenges associated with applying knowledge generated through scholarly investigation. The integration of research evidence into practice demands strategic approaches that balance theoretical rigor with practical considerations and resource constraints. Such thoughtful application of scholarly knowledge can lead to improved outcomes and more effective approaches to addressing complex challenges in diverse settings. + +Future research directions in {research_question.lower()} should prioritize addressing current knowledge gaps while exploring innovative approaches to longstanding questions and challenges. Interdisciplinary collaboration offers promising opportunities to generate novel insights through the integration of diverse perspectives and methodological approaches. Technological advancements and emerging analytical tools provide new capabilities for investigating complex phenomena and generating sophisticated analyses that were previously impossible. Longitudinal studies and cross-cultural investigations can provide valuable insights into the generalizability and transferability of findings across different contexts and populations. Such continued research efforts will contribute to the ongoing advancement of knowledge and support the development of more effective and sustainable solutions to complex problems. + +Note: This is an immediate fallback report generated due to empty search results. Please try again with different subquestions or search parameters for a comprehensive analysis with current research data and citations.""" + return report + except Exception as e: + print(f"[writer] Immediate fallback generation error: {e}") + return f"Error generating report. Please try again. Technical details: {str(e)}" + + def synthesize_report(self, research_question: str, subquestions: List[Dict[str, Any]], search_results: Dict[str, List[Dict[str, Any]]]) -> str: + """ + Synthesize a comprehensive, well-written passage-style report from subquestions and search results. + Creates a cohesive narrative instead of question-answer format. + Enhanced with citations and substantial paragraph generation. + """ + # Immediate fallback if no search results or LM Studio issues + if not search_results or not any(search_results.values()): + return self._generate_immediate_fallback(research_question, subquestions) + + # Build enhanced context with limited sources to avoid context overflow + context = [] + citations = {} + + for qid, sources in search_results.items(): + subq_text = next((sq["text"] for sq in subquestions if sq.get("id") == qid), qid) + if sources: + # Take only top 2 sources per subquestion to reduce context size + for i, source in enumerate(sources[:2], 1): + # Create citation key + citation_key = f"[{qid.upper()}{i}]" + citations[citation_key] = { + 'title': source['title'], + 'url': source['url'] + } + + # Limit content to 200 characters to further reduce context + content = source['content'][:200] + "..." if len(source['content']) > 200 else source['content'] + + context.append({ + 'topic': subq_text, + 'title': source['title'], + 'content': content, + 'citation': citation_key + }) + + # Create comprehensive context string with citations + context_str = "\n\n".join([ + f"Topic: {item['topic']}\nSource: {item['title']}\nContent: {item['content']}\nCitation: {item['citation']}" + for item in context + ]) + + # Enhanced system prompt for detailed, professional paragraphs with citations + system_prompt = ( + "You are an expert research writer. Write a comprehensive 8-paragraph academic report. " + "Each paragraph: 5-7 sentences, 120-180 words for balanced content. " + "Structure: 1) Introduction, 2) Background, 3) Literature Review, 4) Methodology/Approach, " + "5) Analysis with citations, 6) Implications, 7) Challenges/Limitations, 8) Conclusion. " + "Use formal academic tone, complex sentences, and in-text citations [CITATION]. " + "No headings or Q&A format - create flowing narrative. Provide detailed analysis and examples." + ) + + user_prompt = f""" +Research Question: {research_question} + +Research Materials with Citations: +{context_str} + +Write a comprehensive 8-paragraph academic report: +1. Introduction (topic significance, context, importance) +2. Background (current state, historical development, key concepts) +3. Literature Review (existing research, scholarly perspectives) +4. Methodology/Approach (analytical framework, research methods) +5. Analysis (findings with citations, evidence, detailed examination) +6. Implications (practical impact, applications, consequences) +7. Challenges/Limitations (constraints, future research needs) +8. Conclusion (summary, recommendations, future directions) + +Each paragraph: 5-7 sentences, 120-180 words, include citations [CITATION], no instructions in response.""" + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + + print("[writer] Generating comprehensive report with citations...") + report = self._call_lm_studio(messages) + + # Ensure report is a string before processing + if not isinstance(report, str): + print(f"[writer] Warning: Report is not a string, got {type(report)}: {report}") + report = str(report) if report else "Error: No content generated" + + # Add bibliography at the end if not already included + if report and isinstance(report, str): + # Check if report already contains references + has_references = "References" in report or "Bibliography" in report + if not has_references: + bibliography = "\n\nReferences:\n\n" + for citation_key, source_info in citations.items(): + bibliography += f"{citation_key} {source_info['title']}\n" + bibliography += f" Available at: {source_info['url']}\n\n" + report += bibliography + + return report + +if __name__ == "__main__": + print("Enter your research question:") + research_question = input("> ").strip() + if not research_question: + exit(1) + + # Run Planner Agent + print("[system] Running planner agent...") + planner = PlannerAgent() + planner_result = planner.plan(research_question) + subquestions = planner_result.get("subquestions", []) + + print(f"[system] Planner generated {len(subquestions)} subquestions.") + print("Subquestions:") + for i, subq in enumerate(subquestions): + qid = subq.get("id", f"q{i+1}") + qtype = subq.get("type", "unknown") + text = subq.get("text", str(subq)) + print(f" {i+1}. [{qid} - {qtype}] {text}") + print() + + # Run Searcher Agent + print("[system] Running searcher agent...") + searcher = SearcherAgent() + search_results = searcher.search_all(subquestions) + + print("\nSearch Results:") + for qid, sources in search_results.items(): + print(f"\n{qid}:") + for source in sources: + print(f" Title: {source['title']}") + print(f" URL: {source['url']}") + print(f" Content: {source['content'][:500]}{'...' if len(source['content']) > 500 else ''}") + print(f" Score: {source['score']}") + print(" ---") + print() + + # Run Writer Agent + print("[system] Running writer agent...") + writer = WriterAgent() + report = writer.synthesize_report(research_question, subquestions, search_results) + + print("\n" + "="*80) + print("SYNTHESIS REPORT") + print("="*80) + print(report) + print("="*80) + + # Save report to file + import uuid + report_id = str(uuid.uuid4())[:8] + filename = f"research_report_{report_id}.txt" + with open(filename, "w", encoding="utf-8") as f: + f.write(f"Research Question: {research_question}\n\n") + f.write("Subquestions:\n") + for i, subq in enumerate(subquestions): + f.write(f" {i+1}. {subq['text']}\n") + f.write("\nSynthesis Report:\n") + f.write(report) + print(f"[writer] Saved full report to: {filename}") \ No newline at end of file