From d45b538b23adf8e628a2c5042905932ff74e3df6 Mon Sep 17 00:00:00 2001 From: Gale W Date: Sat, 2 May 2026 13:25:45 -0400 Subject: [PATCH 1/3] tests: add fixture corpus quality coverage --- ROADMAP.md | 1 + .../FixtureCorpusQualityTests.swift | 79 ++++++++++++++++ .../Fixtures/GutenbergMiniCorpus.swift | 94 +++++++++++++++++++ docs/maintainers/fixture-corpus.md | 49 ++++++++++ 4 files changed, 223 insertions(+) create mode 100644 Tests/FetchKitTests/FixtureCorpusQualityTests.swift create mode 100644 Tests/FetchKitTests/Fixtures/GutenbergMiniCorpus.swift create mode 100644 docs/maintainers/fixture-corpus.md diff --git a/ROADMAP.md b/ROADMAP.md index d8ed8fc..c55a8fb 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -258,6 +258,7 @@ Planned - Fixed Search Kit index ownership during teardown so the Search Kit verification lane is green again under both `swift test` and `xcodebuild test`. - Added a dedicated repo-maintenance helper for the focused Search Kit test lane and recorded persistent-surface polish plus ranking/snippet refinement as the next FetchKit work. - Tightened the persistent `FetchKitLibrary` surface around one resolved storage location, with Application Support defaults plus a direct directory override for local callers. +- Added the first checked-in fixture corpus for `FetchKit` result-quality characterization, using a tiny attributed Hugging Face Project Gutenberg sample without adding a live dataset-download dependency to CI. - Recorded that the GitHub-hosted `macos-15` Natural Language verification attempt timed out, so Apple-asset coverage stays local-only for now. - Audited the Core Data-backed `FetchKit` store after a GitHub-hosted Swift Testing crash, recorded the executor-assumption findings, moved Core Data verification onto XCTest, and switched the durable store over to a private-queue Core Data context with the framework's async `perform` path. - Refined conventional-search result quality with modest field-aware ranking plus query-aware multi-term snippets across the in-memory and SearchKit-backed `FetchKit` paths. diff --git a/Tests/FetchKitTests/FixtureCorpusQualityTests.swift b/Tests/FetchKitTests/FixtureCorpusQualityTests.swift new file mode 100644 index 0000000..5355bd5 --- /dev/null +++ b/Tests/FetchKitTests/FixtureCorpusQualityTests.swift @@ -0,0 +1,79 @@ +import FetchCore +import Testing +@testable import FetchKit + +@Suite("FetchKit fixture corpus quality", .serialized) +struct FixtureCorpusQualityTests { + @Test("Fixture corpus records carry source attribution") + func fixtureCorpusRecordsCarrySourceAttribution() { + #expect(GutenbergMiniCorpus.source.datasetID == "zkeown/gutenberg-corpus") + #expect(GutenbergMiniCorpus.source.config == "chapters") + #expect(GutenbergMiniCorpus.source.split == "train") + #expect(GutenbergMiniCorpus.records.allSatisfy { $0.sourceURI == GutenbergMiniCorpus.source.url }) + #expect(GutenbergMiniCorpus.records.allSatisfy { $0.metadata["fixture.dataset"] == GutenbergMiniCorpus.source.datasetID }) + } + + @Test("Fixture corpus retrieves a body-driven chapter hit") + func fixtureCorpusRetrievesBodyDrivenChapterHit() async throws { + let library = try await indexedFixtureLibrary() + + let results = try await library.search( + "storage food seeds", + kind: .allTerms, + fields: [.title, .body], + limit: 3 + ) + let firstResult = try #require(results.first) + + #expect(firstResult.document.id == "gutenberg-78430-chapter-1") + #expect(firstResult.snippet?.text.localizedCaseInsensitiveContains("storage") == true) + #expect(firstResult.snippet?.text.localizedCaseInsensitiveContains("food") == true) + #expect(firstResult.snippet?.text.localizedCaseInsensitiveContains("seeds") == true) + #expect((firstResult.snippet?.matchRanges.count ?? 0) >= 3) + } + + @Test("Fixture corpus keeps closely related chapters separate") + func fixtureCorpusKeepsRelatedChaptersSeparate() async throws { + let library = try await indexedFixtureLibrary() + + let foodStorageResults = try await library.search( + "storage food seeds", + kind: .allTerms, + fields: [.body], + limit: 4 + ) + let germinationResults = try await library.search( + "germinating seed organic", + kind: .allTerms, + fields: [.body], + limit: 4 + ) + + #expect(foodStorageResults.map(\.document.id) == ["gutenberg-78430-chapter-1"]) + #expect(germinationResults.map(\.document.id) == ["gutenberg-78430-chapter-2"]) + } + + @Test("Fixture corpus title-only hits use the title as the current snippet source") + func fixtureCorpusTitleOnlyHitUsesTitleSnippet() async throws { + let library = try await indexedFixtureLibrary() + + let results = try await library.search( + "rocket test pilot", + kind: .allTerms, + fields: [.title, .body], + limit: 3 + ) + let firstResult = try #require(results.first) + let snippet = try #require(firstResult.snippet) + + #expect(firstResult.document.id == "gutenberg-78431-book") + #expect(snippet.text.localizedCaseInsensitiveContains("rocket test pilot")) + #expect(!snippet.text.localizedCaseInsensitiveContains("Transcriber's Note")) + } + + private func indexedFixtureLibrary() async throws -> FetchKitLibrary { + let library = FetchKitLibrary() + try await library.addDocuments(GutenbergMiniCorpus.records) + return library + } +} diff --git a/Tests/FetchKitTests/Fixtures/GutenbergMiniCorpus.swift b/Tests/FetchKitTests/Fixtures/GutenbergMiniCorpus.swift new file mode 100644 index 0000000..46c6183 --- /dev/null +++ b/Tests/FetchKitTests/Fixtures/GutenbergMiniCorpus.swift @@ -0,0 +1,94 @@ +import FetchCore + +enum GutenbergMiniCorpus { + struct Source: Hashable, Sendable { + let datasetID: String + let config: String + let split: String + let license: String + let url: String + } + + static let source = Source( + datasetID: "zkeown/gutenberg-corpus", + config: "chapters", + split: "train", + license: "Apache-2.0 dataset packaging; source texts marked public domain in the USA", + url: "https://huggingface.co/datasets/zkeown/gutenberg-corpus" + ) + + static let records: [FetchDocumentRecord] = [ + FetchDocumentRecord( + id: "gutenberg-78430-chapter-1", + title: "A practical course in botany: Chapter I. The Seed", + body: """ + I. The storage of food in seeds. + + Material. In addition to the four food tests described in the course, provide raw starch, grape sugar, the white of a hard-boiled egg, and a fatty substance such as lard or oil. Living material includes grains of corn and wheat, and seeds of some kind of bean. + """, + kind: .reference, + language: "en", + sourceURI: source.url, + metadata: [ + "fixture.dataset": source.datasetID, + "fixture.config": source.config, + "fixture.split": source.split, + "fixture.row": "2", + "fixture.gutenbergID": "78430", + ] + ), + FetchDocumentRecord( + id: "gutenberg-78430-chapter-2", + title: "A practical course in botany: Chapter II. Germination and Growth", + body: """ + Processes accompanying germination. + + Material includes corn, peas, beans, or any quickly germinating seed. Before taking up the study of germinating seeds, it is important to learn from what sources the organic substances used by the growing plant are derived. + """, + kind: .reference, + language: "en", + sourceURI: source.url, + metadata: [ + "fixture.dataset": source.datasetID, + "fixture.config": source.config, + "fixture.split": source.split, + "fixture.row": "3", + "fixture.gutenbergID": "78430", + ] + ), + FetchDocumentRecord( + id: "gutenberg-78431-book", + title: "Always Another Dawn: The Story of a Rocket Test Pilot", + body: """ + Transcriber's Note: Italicized text is surrounded by underscores. The opening material identifies A. Scott Crossfield with Clay Blair, Jr. and includes publisher front matter before the main narrative begins. + """, + kind: .article, + language: "en", + sourceURI: source.url, + metadata: [ + "fixture.dataset": source.datasetID, + "fixture.config": "books", + "fixture.split": "train", + "fixture.row": "2", + "fixture.gutenbergID": "78431", + ] + ), + FetchDocumentRecord( + id: "gutenberg-78432-book", + title: "The young pioneers of the North-west", + body: """ + Transcriber's note: Unusual and inconsistent spelling is as printed. The frontier series opening material introduces a juvenile fiction setting around pioneer children, conduct of life, and frontier life. + """, + kind: .article, + language: "en", + sourceURI: source.url, + metadata: [ + "fixture.dataset": source.datasetID, + "fixture.config": "books", + "fixture.split": "train", + "fixture.row": "3", + "fixture.gutenbergID": "78432", + ] + ), + ] +} diff --git a/docs/maintainers/fixture-corpus.md b/docs/maintainers/fixture-corpus.md new file mode 100644 index 0000000..c943520 --- /dev/null +++ b/docs/maintainers/fixture-corpus.md @@ -0,0 +1,49 @@ +# Fixture Corpus Notes + +## Purpose + +This note records the first checked-in fixture corpus used for `FetchKit` conventional-search quality tests. + +The job of this fixture is deliberately narrow: give the default `FetchKitLibrary` tests enough title/body variety to characterize ranking and snippet behavior without making local or hosted CI download a dataset. + +## Current Fixture Source + +The first mini corpus is derived from the [`zkeown/gutenberg-corpus`](https://huggingface.co/datasets/zkeown/gutenberg-corpus) dataset on Hugging Face. + +Why this source fits the first pass: + +- the source material is Project Gutenberg text marked public domain in the USA +- the dataset card reports Apache-2.0 packaging metadata +- the `books` config has title, author, language, rights, and text fields +- the `chapters` config has chapter titles and chapter text, which is a useful shape for document-search quality tests +- the corpus can be inspected through the Hugging Face Dataset Viewer APIs without adding a Swift dependency + +The fixture records live in `Tests/FetchKitTests/Fixtures/GutenbergMiniCorpus.swift`. Each record carries dataset, config, split, row, and Gutenberg ID metadata so the sample remains attributable and replaceable. + +## Hugging Face Dependency Boundary + +Do not add a Hugging Face Swift dependency for the default fixture lane yet. The current checked-in fixture keeps CI deterministic and avoids adding a network, token, cache, or package-resolution requirement to ordinary tests. + +[`swift-transformers`](https://github.com/huggingface/swift-transformers) is worth tracking for future tokenization or model-adjacent work, but it is broader than this fixture-corpus job. Its README describes tokenizers, Hub downloads, model utilities, and Core ML helpers, which would move this package closer to model tooling than the current retrieval/search fixture need. + +If future work needs live Hub dataset downloads from Swift, evaluate [`swift-huggingface`](https://github.com/huggingface/swift-huggingface) separately. Hugging Face describes it as the newer Swift Hub client for models, datasets, spaces, file downloads, cache behavior, and authentication. That would be a durable dependency decision, not a test-fixture detail. + +## Dataset Viewer Commands + +The fixture was inspected with read-only Dataset Viewer calls: + +```bash +curl -s 'https://datasets-server.huggingface.co/splits?dataset=zkeown/gutenberg-corpus' +curl -s 'https://datasets-server.huggingface.co/rows?dataset=zkeown/gutenberg-corpus&config=books&split=train&offset=1&length=5' +curl -s 'https://datasets-server.huggingface.co/rows?dataset=zkeown/gutenberg-corpus&config=chapters&split=train&offset=1&length=3' +``` + +Hugging Face documents dataset parquet discovery through the Dataset Viewer service in the [`huggingface_hub` CLI guide](https://huggingface.co/docs/huggingface_hub/guides/cli) and the Dataset Viewer [Parquet conversion guide](https://huggingface.co/docs/dataset-viewer/parquet). + +## Next Use + +Use this fixture to settle the remaining Milestone 4 questions: + +- whether the current ranking and snippet heuristics are enough for ordinary app callers +- whether title-only hits should keep using title snippets, suppress snippets, or grow a different presentation policy in the public facade +- whether the first fixture corpus should also cover the macOS SearchKit-backed path directly, or whether the existing SearchKit tests plus the default-library corpus tests are enough for now From 0473b31386f237fef75f584913c73a2d6deb6620 Mon Sep 17 00:00:00 2001 From: Gale W Date: Sat, 2 May 2026 17:00:18 -0400 Subject: [PATCH 2/3] fetchkit: expose search result match fields --- ROADMAP.md | 2 ++ Sources/FetchCore/Search.swift | 8 ++++++- Sources/FetchKit/InMemoryFetchIndex.swift | 4 +++- Sources/FetchKit/SearchKitFetchIndex.swift | 20 ++++++++++++------ .../FetchCoreTests/FetchCoreModelTests.swift | 21 +++++++++++++++++++ .../FetchKitTests/FetchKitLibraryTests.swift | 8 +++++++ .../FixtureCorpusQualityTests.swift | 4 ++++ .../SearchKitFetchIndexTests.swift | 8 +++++++ 8 files changed, 67 insertions(+), 8 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index c55a8fb..87c7498 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -185,6 +185,7 @@ In Progress - [ ] Audit real-corpus result quality now that field-aware ranking, phrase weighting, truncation cues, and multi-term snippets are in place. - [ ] Decide whether title-only hits should suppress body snippets or use a different presentation policy in the public facade. - [ ] Keep the persistent `FetchKitLibrary` construction and search API surface under review as real callers exercise the current design. +- [ ] Explore an opt-in extended snippet surface that can use idle time to precompute short document summaries for larger records, with Apple's [`FoundationModels`](https://developer.apple.com/documentation/foundationmodels) or another local summarization path as the first candidate instead of making foreground full-text search wait on summarization. ### Exit Criteria @@ -259,6 +260,7 @@ Planned - Added a dedicated repo-maintenance helper for the focused Search Kit test lane and recorded persistent-surface polish plus ranking/snippet refinement as the next FetchKit work. - Tightened the persistent `FetchKitLibrary` surface around one resolved storage location, with Application Support defaults plus a direct directory override for local callers. - Added the first checked-in fixture corpus for `FetchKit` result-quality characterization, using a tiny attributed Hugging Face Project Gutenberg sample without adding a live dataset-download dependency to CI. +- Kept title-only snippets as the default result explanation and added typed result metadata for matched fields plus snippet source field, so consumers can distinguish title evidence from body evidence. - Recorded that the GitHub-hosted `macos-15` Natural Language verification attempt timed out, so Apple-asset coverage stays local-only for now. - Audited the Core Data-backed `FetchKit` store after a GitHub-hosted Swift Testing crash, recorded the executor-assumption findings, moved Core Data verification onto XCTest, and switched the durable store over to a private-queue Core Data context with the framework's async `perform` path. - Refined conventional-search result quality with modest field-aware ranking plus query-aware multi-term snippets across the in-memory and SearchKit-backed `FetchKit` paths. diff --git a/Sources/FetchCore/Search.swift b/Sources/FetchCore/Search.swift index 7a23382..2157e9d 100644 --- a/Sources/FetchCore/Search.swift +++ b/Sources/FetchCore/Search.swift @@ -53,14 +53,20 @@ public struct FetchSearchResult: Hashable, Codable, Sendable { public let document: FetchDocument public let score: Double public let snippet: FetchSnippet? + public let matchedFields: Set + public let snippetField: FetchSearchField? public init( document: FetchDocument, score: Double, - snippet: FetchSnippet? = nil + snippet: FetchSnippet? = nil, + matchedFields: Set = [], + snippetField: FetchSearchField? = nil ) { self.document = document self.score = score self.snippet = snippet + self.matchedFields = matchedFields + self.snippetField = snippetField } } diff --git a/Sources/FetchKit/InMemoryFetchIndex.swift b/Sources/FetchKit/InMemoryFetchIndex.swift index 944a2f4..92b6213 100644 --- a/Sources/FetchKit/InMemoryFetchIndex.swift +++ b/Sources/FetchKit/InMemoryFetchIndex.swift @@ -68,7 +68,9 @@ actor InMemoryFetchIndex: FetchIndex { score: score, snippet: snippetMatch.flatMap { match in FetchSearchSupport.buildSnippet(from: match.text, query: query) - } + }, + matchedFields: Set(matches.map(\.field)), + snippetField: snippetMatch?.field ) } diff --git a/Sources/FetchKit/SearchKitFetchIndex.swift b/Sources/FetchKit/SearchKitFetchIndex.swift index 8536437..f6ebdf7 100644 --- a/Sources/FetchKit/SearchKitFetchIndex.swift +++ b/Sources/FetchKit/SearchKitFetchIndex.swift @@ -317,23 +317,29 @@ public actor SearchKitFetchIndex: FetchIndex { } let score = existing.score + new.score - let snippet = preferredSnippet(existing: existing, new: new) + let snippetMatch = preferredSnippet(existing: existing, new: new) return FetchSearchResult( document: existing.document, score: score, - snippet: snippet + snippet: snippetMatch.snippet, + matchedFields: existing.matchedFields.union([new.field]), + snippetField: snippetMatch.field ) } private func preferredSnippet( existing: FetchSearchResult, new: FieldSearchMatch - ) -> FetchSnippet? { + ) -> (snippet: FetchSnippet?, field: FetchSearchField?) { if new.field == .body, new.snippet != nil { - return new.snippet + return (new.snippet, new.field) } - return existing.snippet ?? new.snippet + if let existingSnippet = existing.snippet { + return (existingSnippet, existing.snippetField) + } + + return (new.snippet, new.snippet == nil ? nil : new.field) } private func normalize( @@ -413,7 +419,9 @@ private struct FieldSearchMatch { FetchSearchResult( document: document, score: score, - snippet: snippet + snippet: snippet, + matchedFields: [field], + snippetField: snippet == nil ? nil : field ) } } diff --git a/Tests/FetchCoreTests/FetchCoreModelTests.swift b/Tests/FetchCoreTests/FetchCoreModelTests.swift index 4230b0b..1114ae4 100644 --- a/Tests/FetchCoreTests/FetchCoreModelTests.swift +++ b/Tests/FetchCoreTests/FetchCoreModelTests.swift @@ -59,6 +59,27 @@ struct FetchCoreSearchModelTests { #expect(result.score == 0.9) #expect(result.snippet?.text == "Apples are bright and crisp.") #expect(result.snippet?.matchRanges == [FetchMatchRange(lowerBound: 0, upperBound: 6)]) + #expect(result.matchedFields.isEmpty) + #expect(result.snippetField == nil) + } + + @Test("Fetch search results can describe matched fields and snippet source") + func fetchSearchResultsDescribeMatchedFields() { + let document = FetchDocument( + id: "doc-apple", + title: "Apple Guide", + body: "Apples are bright and crisp." + ) + let result = FetchSearchResult( + document: document, + score: 0.9, + snippet: FetchSnippet(text: "Apple Guide"), + matchedFields: [.title], + snippetField: .title + ) + + #expect(result.matchedFields == [.title]) + #expect(result.snippetField == .title) } @Test("Fetch document records keep durable metadata separate from search and index views") diff --git a/Tests/FetchKitTests/FetchKitLibraryTests.swift b/Tests/FetchKitTests/FetchKitLibraryTests.swift index b5344a2..7f7caee 100644 --- a/Tests/FetchKitTests/FetchKitLibraryTests.swift +++ b/Tests/FetchKitTests/FetchKitLibraryTests.swift @@ -116,6 +116,8 @@ struct FetchKitLibraryTests { #expect(results.count == 1) #expect(results[0].document.id == "doc-apple") #expect(results[0].snippet?.text.contains("bright") == true) + #expect(results[0].matchedFields == [.body]) + #expect(results[0].snippetField == .body) } @Test("FetchKitLibrary prefers title matches over body-only matches") @@ -139,6 +141,10 @@ struct FetchKitLibraryTests { #expect(results.count == 2) #expect(results.map(\.document.id) == ["doc-title", "doc-body"]) + #expect(results[0].matchedFields == [.title]) + #expect(results[0].snippetField == .title) + #expect(results[1].matchedFields == [.body]) + #expect(results[1].snippetField == .body) } @Test("FetchKitLibrary snippets highlight multiple query terms") @@ -159,6 +165,8 @@ struct FetchKitLibraryTests { #expect(snippet.text.localizedCaseInsensitiveContains("bright")) #expect(snippet.text.localizedCaseInsensitiveContains("crisp")) #expect(snippet.matchRanges.count >= 2) + #expect(results.first?.matchedFields == [.body]) + #expect(results.first?.snippetField == .body) } @Test("FetchKitLibrary snippets show truncation markers when context is cropped") diff --git a/Tests/FetchKitTests/FixtureCorpusQualityTests.swift b/Tests/FetchKitTests/FixtureCorpusQualityTests.swift index 5355bd5..8d35a08 100644 --- a/Tests/FetchKitTests/FixtureCorpusQualityTests.swift +++ b/Tests/FetchKitTests/FixtureCorpusQualityTests.swift @@ -30,6 +30,8 @@ struct FixtureCorpusQualityTests { #expect(firstResult.snippet?.text.localizedCaseInsensitiveContains("food") == true) #expect(firstResult.snippet?.text.localizedCaseInsensitiveContains("seeds") == true) #expect((firstResult.snippet?.matchRanges.count ?? 0) >= 3) + #expect(firstResult.matchedFields == [.body]) + #expect(firstResult.snippetField == .body) } @Test("Fixture corpus keeps closely related chapters separate") @@ -67,6 +69,8 @@ struct FixtureCorpusQualityTests { let snippet = try #require(firstResult.snippet) #expect(firstResult.document.id == "gutenberg-78431-book") + #expect(firstResult.matchedFields == [.title]) + #expect(firstResult.snippetField == .title) #expect(snippet.text.localizedCaseInsensitiveContains("rocket test pilot")) #expect(!snippet.text.localizedCaseInsensitiveContains("Transcriber's Note")) } diff --git a/Tests/FetchKitTests/SearchKitFetchIndexTests.swift b/Tests/FetchKitTests/SearchKitFetchIndexTests.swift index 0a1ddda..af9dd7e 100644 --- a/Tests/FetchKitTests/SearchKitFetchIndexTests.swift +++ b/Tests/FetchKitTests/SearchKitFetchIndexTests.swift @@ -40,8 +40,12 @@ final class SearchKitFetchIndexTests: XCTestCase { ) XCTAssertEqual(titleResults.map(\.document.id), ["doc-apple"]) + XCTAssertEqual(titleResults.first?.matchedFields, [.title]) + XCTAssertEqual(titleResults.first?.snippetField, .title) XCTAssertEqual(bodyResults.map(\.document.id), ["doc-orange"]) XCTAssertEqual(bodyResults.first?.snippet?.text.contains("juicy"), true) + XCTAssertEqual(bodyResults.first?.matchedFields, [.body]) + XCTAssertEqual(bodyResults.first?.snippetField, .body) } func testSearchKitFetchIndexPrefersTitleMatchesOverBodyOnlyMatches() async throws { @@ -76,6 +80,8 @@ final class SearchKitFetchIndexTests: XCTestCase { ) XCTAssertEqual(results.map(\.document.id), ["doc-title", "doc-body"]) + XCTAssertEqual(results.first?.matchedFields, [.title]) + XCTAssertEqual(results.first?.snippetField, .title) } func testSearchKitFetchIndexHighlightsMultipleQueryTermsInSnippets() async throws { @@ -106,6 +112,8 @@ final class SearchKitFetchIndexTests: XCTestCase { XCTAssertEqual(results.first?.snippet?.text.localizedCaseInsensitiveContains("bright"), true) XCTAssertEqual(results.first?.snippet?.text.localizedCaseInsensitiveContains("crisp"), true) XCTAssertGreaterThanOrEqual(results.first?.snippet?.matchRanges.count ?? 0, 2) + XCTAssertEqual(results.first?.matchedFields, [.body]) + XCTAssertEqual(results.first?.snippetField, .body) } func testSearchKitFetchIndexShowsSnippetTruncationMarkers() async throws { From 97dc93e0c9d229b0f57213b27c58b048e6dcb62e Mon Sep 17 00:00:00 2001 From: Gale W Date: Sat, 2 May 2026 17:04:28 -0400 Subject: [PATCH 3/3] tests: cover searchkit fixture corpus parity --- .../SearchKitFetchIndexTests.swift | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Tests/FetchKitTests/SearchKitFetchIndexTests.swift b/Tests/FetchKitTests/SearchKitFetchIndexTests.swift index af9dd7e..6977ef3 100644 --- a/Tests/FetchKitTests/SearchKitFetchIndexTests.swift +++ b/Tests/FetchKitTests/SearchKitFetchIndexTests.swift @@ -202,6 +202,41 @@ final class SearchKitFetchIndexTests: XCTestCase { XCTAssertTrue(results.isEmpty) } + func testSearchKitFetchIndexMatchesFixtureCorpusBodyAndTitleEvidence() async throws { + let index = try SearchKitFetchIndex( + configuration: .init( + storage: .inMemory, + indexNamePrefix: "SearchKitFetchIndexTests-\(UUID().uuidString)" + ) + ) + + try await index.apply( + FetchIndexingChangeset( + GutenbergMiniCorpus.records.map { .upsert($0.indexDocument) } + ) + ) + + let bodyResults = try await index.search( + FetchSearchQuery("storage food seeds", kind: .allTerms, fields: [.title, .body], limit: 3) + ) + let titleResults = try await index.search( + FetchSearchQuery("rocket test pilot", kind: .allTerms, fields: [.title, .body], limit: 3) + ) + + XCTAssertEqual(bodyResults.first?.document.id, "gutenberg-78430-chapter-1") + XCTAssertEqual(bodyResults.first?.matchedFields, [.body]) + XCTAssertEqual(bodyResults.first?.snippetField, .body) + XCTAssertEqual(bodyResults.first?.snippet?.text.localizedCaseInsensitiveContains("storage"), true) + XCTAssertEqual(bodyResults.first?.snippet?.text.localizedCaseInsensitiveContains("food"), true) + XCTAssertEqual(bodyResults.first?.snippet?.text.localizedCaseInsensitiveContains("seeds"), true) + + XCTAssertEqual(titleResults.first?.document.id, "gutenberg-78431-book") + XCTAssertEqual(titleResults.first?.matchedFields, [.title]) + XCTAssertEqual(titleResults.first?.snippetField, .title) + XCTAssertEqual(titleResults.first?.snippet?.text.localizedCaseInsensitiveContains("rocket test pilot"), true) + XCTAssertEqual(titleResults.first?.snippet?.text.localizedCaseInsensitiveContains("Transcriber's Note"), false) + } + func testFetchKitLibraryBuildsPersistentPair() async throws { let temporaryDirectory = URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true) .appendingPathComponent(UUID().uuidString, isDirectory: true)