diff --git a/crates/atomic-core/src/lib.rs b/crates/atomic-core/src/lib.rs index b9b7cb18..b4916d15 100644 --- a/crates/atomic-core/src/lib.rs +++ b/crates/atomic-core/src/lib.rs @@ -1562,13 +1562,51 @@ impl AtomicCore { let (strategy, ctx) = self.build_wiki_strategy_context(tag_id, tag_name).await?; - let draft = match wiki::strategy_propose(&strategy, &ctx, &existing) + let draft = match wiki::strategy_propose_outcome(&strategy, &ctx, &existing) .await .map_err(|e| AtomicCoreError::Wiki(e))? { - Some(d) => d, - None => { - tracing::info!(tag_id, "[wiki] No update warranted; no proposal created"); + wiki::WikiProposalOutcome::Draft(d) => d, + wiki::WikiProposalOutcome::NoChange => { + // The LLM evaluated update chunks and decided nothing needs to + // change. Advance the baseline so the same atoms are not + // re-evaluated on every subsequent "Generate Update" click. + if let Err(e) = self.storage.advance_wiki_baseline_sync(tag_id, None).await { + tracing::warn!(tag_id, error = %e, "[wiki] Failed to advance article baseline on no-change"); + } else { + tracing::info!( + tag_id, + "[wiki] No update warranted; article baseline advanced" + ); + } + return Ok(None); + } + wiki::WikiProposalOutcome::NoUpdateChunks => { + // No chunks were selected. This can mean there are truly no new + // atoms, but it can also mean older atoms were newly associated + // with this tag hierarchy. Only advance if the current tag count + // has not increased beyond the article's recorded baseline. + match self + .storage + .advance_wiki_baseline_sync(tag_id, Some(existing.article.atom_count)) + .await + { + Ok(true) => { + tracing::info!( + tag_id, + "[wiki] No update chunks selected; article baseline advanced" + ); + } + Ok(false) => { + tracing::info!( + tag_id, + "[wiki] No update chunks selected; article baseline left unchanged because atom count increased" + ); + } + Err(e) => { + tracing::warn!(tag_id, error = %e, "[wiki] Failed to advance article baseline after empty update selection"); + } + } return Ok(None); } }; @@ -1640,13 +1678,12 @@ impl AtomicCore { )); } - let now = chrono::Utc::now().to_rfc3339(); let article = WikiArticle { id: existing.article.id.clone(), tag_id: tag_id.to_string(), content: proposal.content.clone(), created_at: existing.article.created_at.clone(), - updated_at: now, + updated_at: proposal.created_at.clone(), atom_count: existing.article.atom_count + proposal.new_atom_count, }; @@ -4815,6 +4852,58 @@ mod tests { assert_eq!(remaining_fts, 0); } + #[tokio::test] + async fn test_guarded_wiki_baseline_advance_keeps_older_retagged_atoms_pending() { + let (db, _temp) = create_empty_test_db(); + let tag = db.create_tag("Retagged", None).await.unwrap(); + let article_updated_at = "2026-01-02T00:00:00+00:00"; + + { + let sqlite = db.storage.as_sqlite().unwrap(); + let conn = sqlite.db.conn.lock().unwrap(); + for atom_id in ["atom1", "atom2"] { + conn.execute( + "INSERT INTO atoms (id, content, created_at, updated_at) + VALUES (?1, ?2, ?3, ?3)", + rusqlite::params![atom_id, "older atom content", "2026-01-01T00:00:00+00:00"], + ) + .unwrap(); + conn.execute( + "INSERT INTO atom_tags (atom_id, tag_id) VALUES (?1, ?2)", + rusqlite::params![atom_id, &tag.id], + ) + .unwrap(); + } + conn.execute( + "INSERT INTO wiki_articles (id, tag_id, content, created_at, updated_at, atom_count) + VALUES (?1, ?2, ?3, ?4, ?4, 1)", + rusqlite::params![ + "wiki1", + &tag.id, + "Existing article", + article_updated_at + ], + ) + .unwrap(); + } + + let advanced = db + .storage + .advance_wiki_baseline_sync(&tag.id, Some(1)) + .await + .unwrap(); + assert!( + !advanced, + "baseline must not advance when current atom count increased" + ); + + let status = db.get_wiki_status(&tag.id).await.unwrap(); + assert_eq!(status.article_atom_count, 1); + assert_eq!(status.current_atom_count, 2); + assert_eq!(status.new_atoms_available, 1); + assert_eq!(status.updated_at.as_deref(), Some(article_updated_at)); + } + #[tokio::test] async fn test_global_search_ignores_stale_wiki_fts_rows() { let (db, _temp) = create_test_db().await; diff --git a/crates/atomic-core/src/providers/testdata/schemas.snap.json b/crates/atomic-core/src/providers/testdata/schemas.snap.json index c09587d0..6a418339 100644 --- a/crates/atomic-core/src/providers/testdata/schemas.snap.json +++ b/crates/atomic-core/src/providers/testdata/schemas.snap.json @@ -167,7 +167,7 @@ "type": "string" }, "content": { - "description": "New markdown content for the operation. For NoChange: empty string.", + "description": "New markdown content for the operation. Only NoChange may use empty content. AppendToSection, ReplaceSection, and InsertSection must provide non-empty markdown content with citations.", "type": "string" }, "heading": { diff --git a/crates/atomic-core/src/storage/mod.rs b/crates/atomic-core/src/storage/mod.rs index 13fcb2db..0ce4a92d 100644 --- a/crates/atomic-core/src/storage/mod.rs +++ b/crates/atomic-core/src/storage/mod.rs @@ -553,6 +553,8 @@ dispatch! { => sqlite: get_wiki_proposal_sync, pg_trait: WikiStore, pg_method: get_wiki_proposal; fn delete_wiki_proposal_sync(&self, tag_id: &str) -> Result<(), AtomicCoreError> => sqlite: delete_wiki_proposal_sync, pg_trait: WikiStore, pg_method: delete_wiki_proposal; + fn advance_wiki_baseline_sync(&self, tag_id: &str, max_current_count: Option) -> Result + => sqlite: advance_wiki_baseline_sync, pg_trait: WikiStore, pg_method: advance_wiki_baseline; // ---- BriefingStore ---- fn list_new_atoms_since_sync(&self, since: &str, limit: i32) -> Result, AtomicCoreError> diff --git a/crates/atomic-core/src/storage/postgres/wiki.rs b/crates/atomic-core/src/storage/postgres/wiki.rs index 8711a793..398ded22 100644 --- a/crates/atomic-core/src/storage/postgres/wiki.rs +++ b/crates/atomic-core/src/storage/postgres/wiki.rs @@ -533,11 +533,18 @@ impl WikiStore for PostgresStorage { last_update: &str, max_source_tokens: usize, ) -> StorageResult, i32)>> { - // Get atoms added after the last update + // Get atoms added after the last update, spanning the full tag hierarchy. let new_atom_ids: Vec = sqlx::query_scalar( - "SELECT DISTINCT a.id FROM atoms a + "WITH RECURSIVE descendant_tags(id) AS ( + SELECT $1 + UNION ALL + SELECT t.id FROM tags t + INNER JOIN descendant_tags dt ON t.parent_id = dt.id + ) + SELECT DISTINCT a.id FROM atoms a INNER JOIN atom_tags at ON a.id = at.atom_id - WHERE at.tag_id = $1 AND a.created_at > $2 AND a.db_id = $3 AND at.db_id = $3", + WHERE at.tag_id IN (SELECT id FROM descendant_tags) + AND a.created_at > $2 AND a.db_id = $3 AND at.db_id = $3", ) .bind(tag_id) .bind(last_update) @@ -560,7 +567,7 @@ impl WikiStore for PostgresStorage { .await .map_err(|e| AtomicCoreError::Wiki(e.to_string()))?; - let new_chunks = if let Some(ref centroid_vec) = centroid { + let mut new_chunks = if let Some(ref centroid_vec) = centroid { let rows: Vec<(String, i32, String, f64)> = sqlx::query_as( "SELECT ac.atom_id, ac.chunk_index, ac.content, 1 - (e.embedding <=> $1::vector) as similarity @@ -622,17 +629,58 @@ impl WikiStore for PostgresStorage { chunks }; + if new_chunks.is_empty() && centroid.is_some() { + let rows: Vec<(String, i32, String)> = sqlx::query_as( + "SELECT atom_id, chunk_index, content FROM atom_chunks + WHERE atom_id = ANY($1) AND db_id = $2 ORDER BY atom_id, chunk_index", + ) + .bind(&new_atom_ids) + .bind(&self.db_id) + .fetch_all(&self.pool) + .await + .map_err(|e| AtomicCoreError::Wiki(e.to_string()))?; + + let mut chunks = Vec::new(); + let mut total_tokens = 0; + for (atom_id, chunk_index, content) in rows { + let tokens = count_tokens(&content); + if total_tokens + tokens > max_source_tokens && !chunks.is_empty() { + break; + } + total_tokens += tokens; + chunks.push(ChunkWithContext { + atom_id, + chunk_index, + content, + similarity_score: 1.0, + }); + } + new_chunks = chunks; + } + if new_chunks.is_empty() { - return Ok(None); + return Err(AtomicCoreError::Wiki( + "New atoms are not ready for wiki update yet; chunking or embedding is still pending" + .to_string(), + )); } - let atom_count: Option = - sqlx::query_scalar("SELECT COUNT(*) FROM atom_tags WHERE tag_id = $1 AND db_id = $2") - .bind(tag_id) - .bind(&self.db_id) - .fetch_one(&self.pool) - .await - .map_err(|e| AtomicCoreError::Wiki(e.to_string()))?; + // Count uses the same descendant CTE as get_article_status. + let atom_count: Option = sqlx::query_scalar( + "WITH RECURSIVE descendant_tags(id) AS ( + SELECT $1 + UNION ALL + SELECT t.id FROM tags t + INNER JOIN descendant_tags dt ON t.parent_id = dt.id + ) + SELECT COUNT(DISTINCT atom_id) FROM atom_tags + WHERE tag_id IN (SELECT id FROM descendant_tags) AND db_id = $2", + ) + .bind(tag_id) + .bind(&self.db_id) + .fetch_one(&self.pool) + .await + .map_err(|e| AtomicCoreError::Wiki(e.to_string()))?; Ok(Some((new_chunks, atom_count.unwrap_or(0) as i32))) } @@ -802,6 +850,45 @@ impl WikiStore for PostgresStorage { .map_err(|e| AtomicCoreError::DatabaseOperation(e.to_string()))?; Ok(()) } + + async fn advance_wiki_baseline( + &self, + tag_id: &str, + max_current_count: Option, + ) -> StorageResult { + let now = chrono::Utc::now().to_rfc3339(); + let advanced = sqlx::query_scalar::<_, bool>( + "WITH RECURSIVE descendant_tags(id) AS ( + SELECT $1::text + UNION ALL + SELECT t.id FROM tags t + INNER JOIN descendant_tags dt ON t.parent_id = dt.id + WHERE t.db_id = $2 + ), + current_total(atom_count) AS ( + SELECT COUNT(DISTINCT atom_id)::int FROM atom_tags + WHERE tag_id IN (SELECT id FROM descendant_tags) AND db_id = $2 + ), + updated AS ( + UPDATE wiki_articles + SET atom_count = current_total.atom_count, updated_at = $3 + FROM current_total + WHERE wiki_articles.tag_id = $1 + AND wiki_articles.db_id = $2 + AND ($4::int IS NULL OR current_total.atom_count <= $4) + RETURNING 1 + ) + SELECT EXISTS(SELECT 1 FROM updated)", + ) + .bind(tag_id) + .bind(&self.db_id) + .bind(&now) + .bind(max_current_count) + .fetch_one(&self.pool) + .await + .map_err(|e| AtomicCoreError::Wiki(e.to_string()))?; + Ok(advanced) + } } // Private helper methods diff --git a/crates/atomic-core/src/storage/sqlite/wiki.rs b/crates/atomic-core/src/storage/sqlite/wiki.rs index 6759c5e8..76c13924 100644 --- a/crates/atomic-core/src/storage/sqlite/wiki.rs +++ b/crates/atomic-core/src/storage/sqlite/wiki.rs @@ -233,12 +233,21 @@ impl SqliteStorage { ) -> StorageResult, i32)>> { let conn = self.db.read_conn()?; - // Get atoms added after the last update + // Get atoms added after the last update, spanning the full tag hierarchy + // (same scope as generation and get_article_status — prevents "N new atoms" + // banners for atoms in child tags that the LLM can never see as updates). let mut new_atom_stmt = conn .prepare( - "SELECT DISTINCT a.id FROM atoms a + "WITH RECURSIVE descendant_tags(id) AS ( + SELECT ?1 + UNION ALL + SELECT t.id FROM tags t + INNER JOIN descendant_tags dt ON t.parent_id = dt.id + ) + SELECT DISTINCT a.id FROM atoms a INNER JOIN atom_tags at ON a.id = at.atom_id - WHERE at.tag_id = ?1 AND a.created_at > ?2", + WHERE at.tag_id IN (SELECT id FROM descendant_tags) + AND a.created_at > ?2", ) .map_err(|e| { AtomicCoreError::Wiki(format!("Failed to prepare new atoms query: {}", e)) @@ -265,7 +274,7 @@ impl SqliteStorage { ) .ok(); - let new_chunks = if let Some(ref centroid) = centroid_blob { + let mut new_chunks = if let Some(ref centroid) = centroid_blob { wiki::centroid::select_chunks_by_centroid( &conn, centroid, @@ -279,13 +288,38 @@ impl SqliteStorage { .map_err(|e| AtomicCoreError::Wiki(e))? }; + if new_chunks.is_empty() && centroid_blob.is_some() { + tracing::debug!( + tag_id, + "[wiki/storage] No centroid-ranked update chunks found, falling back to unranked update chunk selection" + ); + new_chunks = wiki::centroid::select_new_chunks_unranked( + &conn, + &new_atom_id_set, + max_source_tokens, + ) + .map_err(|e| AtomicCoreError::Wiki(e))?; + } + if new_chunks.is_empty() { - return Ok(None); + return Err(AtomicCoreError::Wiki( + "New atoms are not ready for wiki update yet; chunking or embedding is still pending" + .to_string(), + )); } + // Count uses the same descendant CTE as get_article_status so the + // stored atom_count stays in sync with what the banner reports. let atom_count: i32 = conn .query_row( - "SELECT COUNT(*) FROM atom_tags WHERE tag_id = ?1", + "WITH RECURSIVE descendant_tags(id) AS ( + SELECT ?1 + UNION ALL + SELECT t.id FROM tags t + INNER JOIN descendant_tags dt ON t.parent_id = dt.id + ) + SELECT COUNT(DISTINCT atom_id) FROM atom_tags + WHERE tag_id IN (SELECT id FROM descendant_tags)", [tag_id], |row| row.get(0), ) @@ -413,6 +447,47 @@ impl SqliteStorage { .map_err(|e| AtomicCoreError::Wiki(format!("Failed to delete wiki proposal: {}", e)))?; Ok(()) } + + pub(crate) fn advance_wiki_baseline_sync( + &self, + tag_id: &str, + max_current_count: Option, + ) -> StorageResult { + let conn = self + .db + .conn + .lock() + .map_err(|e| AtomicCoreError::Lock(e.to_string()))?; + // Use the same descendant CTE as get_article_status so the counts agree. + let current_count: i32 = conn + .query_row( + "WITH RECURSIVE descendant_tags(id) AS ( + SELECT ?1 + UNION ALL + SELECT t.id FROM tags t + INNER JOIN descendant_tags dt ON t.parent_id = dt.id + ) + SELECT COUNT(DISTINCT atom_id) FROM atom_tags + WHERE tag_id IN (SELECT id FROM descendant_tags)", + [tag_id], + |row| row.get(0), + ) + .map_err(|e| { + AtomicCoreError::Wiki(format!("Failed to count atoms for baseline advance: {}", e)) + })?; + + if matches!(max_current_count, Some(max) if current_count > max) { + return Ok(false); + } + + let now = chrono::Utc::now().to_rfc3339(); + conn.execute( + "UPDATE wiki_articles SET atom_count = ?1, updated_at = ?2 WHERE tag_id = ?3", + (current_count, &now, tag_id), + ) + .map_err(|e| AtomicCoreError::Wiki(format!("Failed to advance wiki baseline: {}", e)))?; + Ok(true) + } } #[async_trait] @@ -573,4 +648,18 @@ impl WikiStore for SqliteStorage { .await .map_err(|e| AtomicCoreError::Lock(e.to_string()))? } + + async fn advance_wiki_baseline( + &self, + tag_id: &str, + max_current_count: Option, + ) -> StorageResult { + let storage = self.clone(); + let tag_id = tag_id.to_string(); + tokio::task::spawn_blocking(move || { + storage.advance_wiki_baseline_sync(&tag_id, max_current_count) + }) + .await + .map_err(|e| AtomicCoreError::Lock(e.to_string()))? + } } diff --git a/crates/atomic-core/src/storage/traits.rs b/crates/atomic-core/src/storage/traits.rs index 6356fb5a..80d02b1a 100644 --- a/crates/atomic-core/src/storage/traits.rs +++ b/crates/atomic-core/src/storage/traits.rs @@ -684,7 +684,9 @@ pub trait WikiStore: Send + Sync { /// Select chunks for wiki article update (new atoms since last update). /// /// Returns None if no new atoms have been added since `last_update`. - /// Otherwise returns (new_chunks, atom_count). + /// Otherwise returns (new_chunks, atom_count). If new atoms exist but no + /// selectable chunks are available yet, returns an error so callers do not + /// advance the article baseline before the async chunking pipeline catches up. async fn get_wiki_update_chunks( &self, tag_id: &str, @@ -700,6 +702,16 @@ pub trait WikiStore: Send + Sync { /// Delete the pending wiki proposal for a tag (idempotent). async fn delete_wiki_proposal(&self, tag_id: &str) -> StorageResult<()>; + + /// Advance the article baseline without changing content: update `atom_count` + /// to the current tag-hierarchy total and `updated_at` to now. If + /// `max_current_count` is set and the current total exceeds it, leave the + /// article unchanged and return `false`. + async fn advance_wiki_baseline( + &self, + tag_id: &str, + max_current_count: Option, + ) -> StorageResult; } // ==================== Briefing Storage ==================== diff --git a/crates/atomic-core/src/wiki/mod.rs b/crates/atomic-core/src/wiki/mod.rs index 2d58763e..20d639fc 100644 --- a/crates/atomic-core/src/wiki/mod.rs +++ b/crates/atomic-core/src/wiki/mod.rs @@ -143,6 +143,30 @@ pub struct WikiProposalDraft { pub new_atom_count: i32, } +/// Result of attempting to build a wiki update proposal. +pub enum WikiProposalOutcome { + Draft(WikiProposalDraft), + /// The selector found no chunks to send to the LLM. + NoUpdateChunks, + /// The LLM reviewed update chunks and explicitly found no useful change. + NoChange, +} + +/// Propose an update to an existing wiki article using the given strategy. +/// +/// Returns `None` if no update is warranted (no new atoms, empty ops, or the +/// LLM returns `NoChange`). +pub async fn strategy_propose( + strategy: &WikiStrategy, + ctx: &WikiStrategyContext, + existing: &WikiArticleWithCitations, +) -> Result, String> { + match strategy_propose_outcome(strategy, ctx, existing).await? { + WikiProposalOutcome::Draft(draft) => Ok(Some(draft)), + WikiProposalOutcome::NoUpdateChunks | WikiProposalOutcome::NoChange => Ok(None), + } +} + /// Propose an update to an existing wiki article using the given strategy. /// /// Composes two independent steps: @@ -155,17 +179,17 @@ pub struct WikiProposalDraft { /// applier merges them into the existing content, and citations are extracted /// from the merged output. /// -/// Returns `None` if no update is warranted (no new atoms, empty ops, or the -/// LLM returns `NoChange`). -pub async fn strategy_propose( +/// Returns a typed no-op outcome so callers can distinguish "nothing was sent +/// to the LLM" from "the LLM reviewed new chunks and returned `NoChange`". +pub async fn strategy_propose_outcome( strategy: &WikiStrategy, ctx: &WikiStrategyContext, existing: &WikiArticleWithCitations, -) -> Result, String> { +) -> Result { let Some((new_chunks, total_atom_count)) = select_update_chunks(strategy, ctx, existing).await? else { - return Ok(None); + return Ok(WikiProposalOutcome::NoUpdateChunks); }; // New-atom count is the delta against the baseline the live article was @@ -174,7 +198,10 @@ pub async fn strategy_propose( // since the last accepted version. let new_atom_count = (total_atom_count - existing.article.atom_count).max(0); - generate_section_ops_proposal(ctx, existing, &new_chunks, new_atom_count).await + match generate_section_ops_proposal(ctx, existing, &new_chunks, new_atom_count).await? { + Some(draft) => Ok(WikiProposalOutcome::Draft(draft)), + None => Ok(WikiProposalOutcome::NoChange), + } } /// Strategy-specific chunk selection for the propose path. @@ -256,7 +283,7 @@ pub(crate) fn section_ops_schema() -> serde_json::Value { }, "content": { "type": "string", - "description": "New markdown content for the operation. For NoChange: empty string." + "description": "New markdown content for the operation. Only NoChange may use empty content. AppendToSection, ReplaceSection, and InsertSection must provide non-empty markdown content with citations." } }, "required": ["op", "heading", "after_heading", "content"], @@ -340,11 +367,16 @@ async fn generate_section_ops_proposal( // Enumerate current section headings for the LLM to reference verbatim. let heading_list = extract_current_headings(&existing.article.content); let headings_block = if heading_list.is_empty() { - "(no ## headings — the article has no sections yet; use InsertSection with after_heading=\"\" to add one at the end)".to_string() + "(no section headings — the article has no sections yet; use InsertSection with after_heading=\"\" to add one at the end)".to_string() } else { heading_list .iter() - .map(|h| format!("- {}", h)) + .map(|(level, h)| { + // Indent sub-headings so the LLM can see the hierarchy. + // Level 2 = no indent; each extra level adds two spaces. + let indent = " ".repeat((*level as usize).saturating_sub(2)); + format!("{}{}", indent, h) + }) .collect::>() .join("\n") }; @@ -498,7 +530,7 @@ async fn generate_section_ops_proposal( /// stay embedded in their parent section's body. Surfacing `###` headings to /// the LLM would let it target a heading the applier can't resolve, which /// discards the entire proposal as a hallucination. -fn extract_current_headings(content: &str) -> Vec { +fn extract_current_headings(content: &str) -> Vec<(u8, String)> { let mut headings = Vec::new(); for line in content.lines() { let stripped = line.trim_start(); @@ -507,8 +539,8 @@ fn extract_current_headings(content: &str) -> Vec { while hashes < bytes.len() && bytes[hashes] == b'#' { hashes += 1; } - if hashes == 2 && hashes < bytes.len() && bytes[hashes] == b' ' { - headings.push(stripped[hashes + 1..].trim().to_string()); + if hashes >= 2 && hashes < bytes.len() && bytes[hashes] == b' ' { + headings.push((hashes as u8, stripped[hashes + 1..].trim().to_string())); } } headings @@ -561,7 +593,8 @@ Operations (value of the `op` field): - "InsertSection": add a brand-new section (use only for genuinely new topics not covered elsewhere). Set `heading` to the new section's heading. Set `after_heading` to the exact existing heading you want to insert AFTER, or leave it empty ("") to append the new section at the end of the article. Set `content` to the new section body. Rules: -- `heading` and `after_heading` values must EXACTLY match one of the headings listed under CURRENT SECTION HEADINGS when they reference existing sections. Do not paraphrase, reword, or change capitalization. Do not include the ## prefix. +- `heading` and `after_heading` values must EXACTLY match one of the headings listed under CURRENT SECTION HEADINGS when they reference existing sections. Do not paraphrase, reword, or change capitalization. Sub-headings appear indented under their parent in the list; use the exact heading text without any # prefix characters. +- Only NoChange may have empty content. AppendToSection, ReplaceSection, and InsertSection MUST have non-empty markdown content with citations. If you have no content to add for a section, do not emit an edit operation for that section. If there are no non-empty edits to make, return exactly one NoChange operation. - Prefer AppendToSection over ReplaceSection. Prefer editing an existing section over creating a new one. - Every new factual claim MUST have a [N] citation using the next-available citation numbers shown in the user message. - Keep tone consistent with the existing article. diff --git a/crates/atomic-core/src/wiki/section_ops.rs b/crates/atomic-core/src/wiki/section_ops.rs index 1c59fdfe..4610162a 100644 --- a/crates/atomic-core/src/wiki/section_ops.rs +++ b/crates/atomic-core/src/wiki/section_ops.rs @@ -131,6 +131,7 @@ struct Section { pub fn apply_section_ops(existing: &str, ops: &[WikiSectionOp]) -> Result { let (preamble, mut sections) = parse_sections(existing); + let mut errors: Vec = Vec::new(); for op in ops { match op { WikiSectionOp::NoChange => { @@ -139,57 +140,102 @@ pub fn apply_section_ops(existing: &str, ops: &[WikiSectionOp]) -> Result { - let idx = find_section_idx(§ions, heading).ok_or_else(|| { - format!( - "AppendToSection: heading '{}' not found. Existing headings: [{}]", - heading, - list_headings(§ions) - ) - })?; - append_to_body(&mut sections[idx].body, content); + match find_section_idx(§ions, heading) { + Some(idx) => { + append_to_body(&mut sections[idx].body, content); + } + None => { + let e = format!( + "AppendToSection: heading '{}' not found. Existing headings: [{}]", + heading, + list_headings(§ions) + ); + tracing::warn!(error = %e, "[wiki] Skipping op with unmatched heading"); + errors.push(e); + } + } } WikiSectionOp::ReplaceSection { heading, content } => { - let idx = find_section_idx(§ions, heading).ok_or_else(|| { - format!( - "ReplaceSection: heading '{}' not found. Existing headings: [{}]", - heading, - list_headings(§ions) - ) - })?; - sections[idx].body = ensure_trailing_blank(content); + match find_section_idx(§ions, heading) { + Some(idx) => { + sections[idx].body = ensure_trailing_blank(content); + } + None => { + let e = format!( + "ReplaceSection: heading '{}' not found. Existing headings: [{}]", + heading, + list_headings(§ions) + ); + tracing::warn!(error = %e, "[wiki] Skipping op with unmatched heading"); + errors.push(e); + } + } } WikiSectionOp::InsertSection { after_heading, heading, content, } => { - let new_section = Section { - level: 2, - heading: heading.clone(), - body: ensure_trailing_blank(content), - }; match after_heading { Some(h) => { - let idx = find_section_idx(§ions, h).ok_or_else(|| { - format!( - "InsertSection: after_heading '{}' not found. Existing headings: [{}]", - h, - list_headings(§ions) - ) - })?; - sections.insert(idx + 1, new_section); + match find_section_idx(§ions, h) { + Some(idx) => { + // Inherit the level of the anchor section so that + // inserting after an H3 produces another H3, not H2. + let level = sections[idx].level; + let insert_idx = after_subtree_idx(§ions, idx); + sections.insert( + insert_idx, + Section { + level, + heading: heading.clone(), + body: ensure_trailing_blank(content), + }, + ); + } + None => { + let e = format!( + "InsertSection: after_heading '{}' not found. Existing headings: [{}]", + h, + list_headings(§ions) + ); + tracing::warn!(error = %e, "[wiki] Skipping op with unmatched heading"); + errors.push(e); + } + } } None => { - sections.push(new_section); + sections.push(Section { + level: 2, + heading: heading.clone(), + body: ensure_trailing_blank(content), + }); } } } } } + // A partially applied proposal is not safe to save: accepting it would + // advance the wiki baseline for all selected sources even though facts + // covered by skipped ops never landed in the article. + if !errors.is_empty() { + return Err(errors.remove(0)); + } + Ok(serialize_sections(&preamble, §ions)) } +fn after_subtree_idx(sections: &[Section], anchor_idx: usize) -> usize { + let anchor_level = sections[anchor_idx].level; + sections + .iter() + .enumerate() + .skip(anchor_idx + 1) + .find_map(|(idx, section)| (section.level <= anchor_level).then_some(idx)) + .unwrap_or(sections.len()) +} + /// Parse the article into (preamble, sections). The preamble is any content /// before the first `##` heading. Only `##` (level 2) headings begin new /// sections; `###` and deeper stay embedded in their parent section's body. @@ -200,7 +246,7 @@ fn parse_sections(content: &str) -> (String, Vec
) { for line in content.split_inclusive('\n') { if let Some((level, heading)) = parse_heading(line) { - if level == 2 { + if level >= 2 { if let Some(sec) = current.take() { sections.push(sec); } @@ -442,13 +488,21 @@ Status body. } #[test] - fn subsection_does_not_split_parent() { - // Details has a ### Subsection — parsing must keep it inside Details. + fn h3_heading_becomes_its_own_section() { + // After the multi-level parse change, ### Subsection is an addressable + // section rather than being swallowed into the ## Details body. let (_, sections) = parse_sections(SAMPLE); let headings: Vec<&str> = sections.iter().map(|s| s.heading.as_str()).collect(); - assert_eq!(headings, vec!["Overview", "Details", "Status"]); + assert_eq!( + headings, + vec!["Overview", "Details", "Subsection", "Status"] + ); + let sub = sections.iter().find(|s| s.heading == "Subsection").unwrap(); + assert_eq!(sub.level, 3); + assert!(sub.body.contains("Subsection text.")); + // Details body must NOT include the H3 heading line any more. let details = sections.iter().find(|s| s.heading == "Details").unwrap(); - assert!(details.body.contains("### Subsection")); + assert!(!details.body.contains("### Subsection")); } #[test] @@ -474,13 +528,15 @@ Status body. assert!(out.contains("## Status\n\nReplaced status [5].")); assert!(!out.contains("Status body.")); - // Verify order: Overview, Details, Notes, Status + // Verify order: Overview, Details subtree, Notes, Status. let overview_pos = out.find("## Overview").unwrap(); let details_pos = out.find("## Details").unwrap(); + let subsection_pos = out.find("### Subsection").unwrap(); let notes_pos = out.find("## Notes").unwrap(); let status_pos = out.find("## Status").unwrap(); assert!(overview_pos < details_pos); - assert!(details_pos < notes_pos); + assert!(details_pos < subsection_pos); + assert!(subsection_pos < notes_pos); assert!(notes_pos < status_pos); } @@ -614,4 +670,132 @@ Status body. let roundtrip: Vec = serde_json::from_str(&json).unwrap(); assert_eq!(ops, roundtrip); } + + // ── Multi-level heading tests ──────────────────────────────────────────── + + #[test] + fn append_to_h3_section() { + let ops = vec![WikiSectionOp::AppendToSection { + heading: "Subsection".to_string(), + content: "New subsection detail [4].".to_string(), + }]; + let out = apply_section_ops(SAMPLE, &ops).unwrap(); + assert!(out.contains("### Subsection\n\nSubsection text.")); + assert!(out.contains("New subsection detail [4].")); + // Parent H2 section and sibling sections must be byte-identical. + assert!(out.contains("## Details\n\nDetails body.")); + assert!(out.contains("## Overview\n\nOverview body with [1] citation.")); + } + + #[test] + fn replace_h3_section() { + let ops = vec![WikiSectionOp::ReplaceSection { + heading: "Subsection".to_string(), + content: "Replaced subsection [4].".to_string(), + }]; + let out = apply_section_ops(SAMPLE, &ops).unwrap(); + assert!(out.contains("### Subsection\n\nReplaced subsection [4].")); + assert!(!out.contains("Subsection text.")); + } + + #[test] + fn insert_after_h3_inherits_level() { + let ops = vec![WikiSectionOp::InsertSection { + after_heading: Some("Subsection".to_string()), + heading: "Another Sub".to_string(), + content: "More sub content [4].".to_string(), + }]; + let out = apply_section_ops(SAMPLE, &ops).unwrap(); + // Inserted section inherits level 3 from the H3 anchor. + assert!(out.contains("### Another Sub\n\nMore sub content [4].")); + let sub_pos = out.find("### Subsection").unwrap(); + let another_pos = out.find("### Another Sub").unwrap(); + let status_pos = out.find("## Status").unwrap(); + assert!(sub_pos < another_pos); + assert!(another_pos < status_pos); + } + + #[test] + fn insert_after_h2_still_produces_h2() { + let ops = vec![WikiSectionOp::InsertSection { + after_heading: Some("Overview".to_string()), + heading: "Background".to_string(), + content: "Background content [4].".to_string(), + }]; + let out = apply_section_ops(SAMPLE, &ops).unwrap(); + assert!(out.contains("## Background\n\nBackground content [4].")); + } + + #[test] + fn insert_after_h2_keeps_child_sections_with_parent() { + let ops = vec![WikiSectionOp::InsertSection { + after_heading: Some("Details".to_string()), + heading: "Follow Up".to_string(), + content: "Follow-up content [4].".to_string(), + }]; + let out = apply_section_ops(SAMPLE, &ops).unwrap(); + + let details_pos = out.find("## Details").unwrap(); + let subsection_pos = out.find("### Subsection").unwrap(); + let follow_up_pos = out.find("## Follow Up").unwrap(); + let status_pos = out.find("## Status").unwrap(); + + assert!(details_pos < subsection_pos); + assert!(subsection_pos < follow_up_pos); + assert!(follow_up_pos < status_pos); + } + + // ── Unmatched heading tests ───────────────────────────────────────────── + + #[test] + fn unmatched_heading_rejects_mixed_valid_and_invalid_ops() { + // One hallucinated heading + one valid op must reject the whole + // proposal. Saving a partial merge would advance the wiki baseline for + // sources that were never incorporated. + let ops = vec![ + WikiSectionOp::AppendToSection { + heading: "Nonexistent Section".to_string(), + content: "should be dropped".to_string(), + }, + WikiSectionOp::AppendToSection { + heading: "Overview".to_string(), + content: "Valid addition [3].".to_string(), + }, + ]; + let err = apply_section_ops(SAMPLE, &ops).unwrap_err(); + assert!(err.contains("Nonexistent Section")); + } + + #[test] + fn unmatched_heading_rejects_even_when_append_to_end_insert_is_valid() { + let ops = vec![ + WikiSectionOp::InsertSection { + after_heading: None, + heading: "Appendix".to_string(), + content: "Appendix content [3].".to_string(), + }, + WikiSectionOp::AppendToSection { + heading: "Nonexistent Section".to_string(), + content: "should be dropped".to_string(), + }, + ]; + let err = apply_section_ops(SAMPLE, &ops).unwrap_err(); + assert!(err.contains("Nonexistent Section")); + } + + #[test] + fn all_bad_ops_returns_error() { + let ops = vec![ + WikiSectionOp::AppendToSection { + heading: "Ghost Section".to_string(), + content: "x".to_string(), + }, + WikiSectionOp::ReplaceSection { + heading: "Phantom".to_string(), + content: "y".to_string(), + }, + ]; + let err = apply_section_ops(SAMPLE, &ops).unwrap_err(); + assert!(err.contains("Ghost Section")); + } } diff --git a/crates/atomic-core/tests/storage_tests.rs b/crates/atomic-core/tests/storage_tests.rs index 2df2eeac..60d27ea6 100644 --- a/crates/atomic-core/tests/storage_tests.rs +++ b/crates/atomic-core/tests/storage_tests.rs @@ -490,6 +490,40 @@ async fn test_delete_wiki(tag_store: &dyn TagStore, wiki_store: &dyn WikiStore) assert!(fetched.is_none()); } +async fn test_wiki_update_chunks_pending_atom_errors( + atom_store: &dyn AtomStore, + tag_store: &dyn TagStore, + wiki_store: &dyn WikiStore, +) { + let tag = tag_store + .create_tag("Wiki Pending Tag", None) + .await + .unwrap(); + let request = CreateAtomRequest { + content: "This atom has not been chunked yet.".to_string(), + source_url: None, + published_at: None, + tag_ids: vec![tag.id.clone()], + ..Default::default() + }; + let atom_id = uuid::Uuid::new_v4().to_string(); + let now = chrono::Utc::now().to_rfc3339(); + atom_store + .insert_atom(&atom_id, &request, &now) + .await + .unwrap(); + + let result = wiki_store + .get_wiki_update_chunks(&tag.id, "1970-01-01T00:00:00Z", 1024) + .await; + + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("not ready for wiki update")); +} + // ==================== ChunkStore Tests ==================== // Embedding status tests need both AtomStore and ChunkStore together, @@ -605,6 +639,12 @@ async fn sqlite_delete_wiki() { test_delete_wiki(&s, &s).await; } +#[tokio::test] +async fn sqlite_wiki_update_chunks_pending_atom_errors() { + let (s, _dir) = sqlite_storage().await; + test_wiki_update_chunks_pending_atom_errors(&s, &s, &s).await; +} + // ==================== Postgres Test Runners ==================== #[cfg(feature = "postgres")] @@ -671,4 +711,13 @@ mod postgres_tests { }; test_delete_wiki(s, s).await; } + + #[tokio::test] + async fn pg_wiki_update_chunks_pending_atom_errors() { + let Some(ref s) = postgres_storage().await else { + eprintln!("Skipping (ATOMIC_TEST_DATABASE_URL not set)"); + return; + }; + test_wiki_update_chunks_pending_atom_errors(s, s, s).await; + } }