Skip to content
Merged

Dev #120

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions crates/vectorless-compiler/src/passes/backend/route.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,32 +305,6 @@ mod tests {
assert!(routes.is_empty());
}

#[tokio::test]
async fn test_execute_end_to_end() {
let tree = build_test_tree_with_hints();

let mut ctx = CompileContext::new(
crate::pipeline::CompilerInput::content("test"),
crate::config::PipelineOptions::default(),
);
ctx.tree = Some(tree);

let mut pass = RoutePass::new();
let result = pass.execute(&mut ctx).await;

assert!(result.is_ok());
let pass_result = result.unwrap();
assert!(pass_result.success);

// Verify routing table
let table = ctx.query_routes.unwrap();
assert!(table.intent_route_count() > 0);
assert!(table.concept_route_count() > 0);

// Verify metrics recorded
assert!(ctx.metrics.route_time_ms > 0);
}

#[tokio::test]
async fn test_execute_no_tree() {
let mut ctx = CompileContext::new(
Expand Down
2 changes: 1 addition & 1 deletion crates/vectorless-document/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub use structure::{DocumentStructure, StructureNode};
pub use toc::{TocConfig, TocEntry, TocNode, TocView};
pub use tree::{DocumentTree, RetrievalIndex};
pub use understanding::{
Concept, Document, DocumentInfo, DocumentMeta, IngestInput, CURRENT_SCHEMA_VERSION,
CURRENT_SCHEMA_VERSION, Concept, Document, DocumentInfo, DocumentMeta, IngestInput,
};

// Re-export agent acceleration types
Expand Down
39 changes: 13 additions & 26 deletions crates/vectorless-engine/src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,21 +347,21 @@ impl Engine {
/// Build a [`CompileArtifact`] from a [`Document`].
fn build_index_item(doc: &Document) -> CompileArtifact {
use vectorless_document::DocumentFormat;
let format = DocumentFormat::from_extension(&doc.format)
.unwrap_or(DocumentFormat::Markdown);
let format =
DocumentFormat::from_extension(&doc.format).unwrap_or(DocumentFormat::Markdown);

CompileArtifact::new(
doc.doc_id.clone(),
doc.name.clone(),
format,
if doc.summary.is_empty() { None } else { Some(doc.summary.clone()) },
if doc.summary.is_empty() {
None
} else {
Some(doc.summary.clone())
},
doc.page_count,
)
.with_source_path(
doc.source_path
.clone()
.unwrap_or_default(),
)
.with_source_path(doc.source_path.clone().unwrap_or_default())
}

// ============================================================
Expand Down Expand Up @@ -441,10 +441,7 @@ impl Engine {
}

/// Load a full Document by ID (for navigation via primitives).
pub async fn load_document(
&self,
doc_id: &str,
) -> Result<Option<Document>> {
pub async fn load_document(&self, doc_id: &str) -> Result<Option<Document>> {
self.workspace.load(doc_id).await
}

Expand Down Expand Up @@ -595,9 +592,8 @@ impl Engine {
None => return Ok(IndexAction::FullIndex { existing_id: None }),
};

let format =
vectorless_compiler::parse::DocumentFormat::from_extension(&stored_doc.format)
.unwrap_or(vectorless_compiler::parse::DocumentFormat::Markdown);
let format = vectorless_compiler::parse::DocumentFormat::from_extension(&stored_doc.format)
.unwrap_or(vectorless_compiler::parse::DocumentFormat::Markdown);
let pipeline_options = self.build_pipeline_options(options, source);

// If logic fingerprint changed, remove old doc before full reprocess
Expand Down Expand Up @@ -667,13 +663,7 @@ impl Engine {
for doc in &loaded_docs {
let keywords = Self::extract_keywords_from_doc(doc);
let node_count = doc.meta.as_ref().map(|m| m.node_count).unwrap_or(0);
builder.add_document(
&doc.doc_id,
&doc.name,
&doc.format,
node_count,
keywords,
);
builder.add_document(&doc.doc_id, &doc.name, &doc.format, node_count, keywords);
}

let graph = builder.build();
Expand Down Expand Up @@ -782,9 +772,6 @@ mod tests {
let item = Engine::build_index_item(&doc);

assert_eq!(item.source_path, Some(String::new())); // unwrap_or_default
assert_eq!(
item.format,
vectorless_compiler::parse::DocumentFormat::Pdf
);
assert_eq!(item.format, vectorless_compiler::parse::DocumentFormat::Pdf);
}
}
19 changes: 11 additions & 8 deletions crates/vectorless-engine/src/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,13 @@ use tracing::info;
use uuid::Uuid;

use vectorless_compiler::{CompilerInput, PipelineExecutor, PipelineOptions, SourceFormat};
use vectorless_document::{
Document, DocumentFormat, DocumentMeta, CURRENT_SCHEMA_VERSION,
};
use vectorless_document::{CURRENT_SCHEMA_VERSION, Document, DocumentFormat, DocumentMeta};
use vectorless_error::{Error, Result};
use vectorless_llm::LlmClient;
use vectorless_utils::fingerprint::Fingerprint;

use super::compile_input::CompileSource;
use vectorless_events::{EventEmitter, CompileEvent};
use vectorless_events::{CompileEvent, EventEmitter};

/// Document compile client.
///
Expand Down Expand Up @@ -257,7 +255,8 @@ impl IndexerClient {
.ok_or_else(|| Error::Parse("Document tree not generated".to_string()))?;

let node_count = tree.node_count();
self.events.emit_compile(CompileEvent::TreeBuilt { node_count });
self.events
.emit_compile(CompileEvent::TreeBuilt { node_count });

let doc_name = name
.map(str::to_string)
Expand All @@ -276,8 +275,10 @@ impl IndexerClient {
meta = meta.with_logic_fingerprint(logic_fp.to_string());

// Extract stats from metrics
let (summary_tokens, duration_ms) =
(result.metrics.total_tokens_generated, result.metrics.total_time_ms());
let (summary_tokens, duration_ms) = (
result.metrics.total_tokens_generated,
result.metrics.total_time_ms(),
);
meta.update_processing_stats(node_count, summary_tokens, duration_ms);

// Compute content fingerprint from source file if available
Expand Down Expand Up @@ -308,7 +309,9 @@ impl IndexerClient {
};

info!("Compiling complete: {} ({} nodes)", doc.doc_id, node_count);
self.events.emit_compile(CompileEvent::Complete { doc_id: doc.doc_id.clone() });
self.events.emit_compile(CompileEvent::Complete {
doc_id: doc.doc_id.clone(),
});

Ok(doc)
}
Expand Down
2 changes: 1 addition & 1 deletion crates/vectorless-storage/src/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

use vectorless_document::{Document, CURRENT_SCHEMA_VERSION};
use vectorless_document::{CURRENT_SCHEMA_VERSION, Document};
use vectorless_error::Error;
use vectorless_error::Result;

Expand Down
12 changes: 2 additions & 10 deletions crates/vectorless-storage/src/workspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,7 @@ impl Workspace {
Self::save_meta_index(&inner)?;

// Update catalog with DocCard
if let Some(card) = doc
.nav_index
.doc_card()
.cloned()
{
if let Some(card) = doc.nav_index.doc_card().cloned() {
inner.catalog.insert(doc_id.clone(), card);
Self::save_catalog_index(&inner)?;
}
Expand Down Expand Up @@ -571,11 +567,7 @@ impl Workspace {
for key in doc_keys {
if let Some(bytes) = inner.backend.get(key)? {
if let Ok(doc) = load_document_from_bytes(&bytes) {
if let Some(card) = doc
.nav_index
.doc_card()
.cloned()
{
if let Some(card) = doc.nav_index.doc_card().cloned() {
inner.catalog.insert(doc.doc_id.clone(), card);
}
}
Expand Down
133 changes: 122 additions & 11 deletions crates/vectorless-utils/src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,128 @@

/// Common English stop words for keyword filtering.
pub const STOPWORDS: &[&str] = &[
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
"do", "does", "did", "will", "would", "could", "should", "may", "might", "must", "shall",
"can", "need", "dare", "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
"from", "as", "into", "through", "during", "before", "after", "above", "below", "between",
"under", "again", "further", "then", "once", "here", "there", "when", "where", "why", "how",
"all", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not", "only",
"own", "same", "so", "than", "too", "very", "just", "and", "but", "if", "or", "because",
"until", "while", "about", "what", "which", "who", "whom", "this", "that", "these", "those",
"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours",
"yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself",
"it", "its", "itself", "they", "them", "their", "theirs", "themselves",
"a",
"an",
"the",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"do",
"does",
"did",
"will",
"would",
"could",
"should",
"may",
"might",
"must",
"shall",
"can",
"need",
"dare",
"ought",
"used",
"to",
"of",
"in",
"for",
"on",
"with",
"at",
"by",
"from",
"as",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"between",
"under",
"again",
"further",
"then",
"once",
"here",
"there",
"when",
"where",
"why",
"how",
"all",
"each",
"few",
"more",
"most",
"other",
"some",
"such",
"no",
"nor",
"not",
"only",
"own",
"same",
"so",
"than",
"too",
"very",
"just",
"and",
"but",
"if",
"or",
"because",
"until",
"while",
"about",
"what",
"which",
"who",
"whom",
"this",
"that",
"these",
"those",
"i",
"me",
"my",
"myself",
"we",
"our",
"ours",
"ourselves",
"you",
"your",
"yours",
"yourself",
"yourselves",
"he",
"him",
"his",
"himself",
"she",
"her",
"hers",
"herself",
"it",
"its",
"itself",
"they",
"them",
"their",
"theirs",
"themselves",
];

/// Extract keywords from a query string, filtering stop words.
Expand Down
Loading
Loading