From c9bac83177fb9a2ca8300f56ef7b840f67c0b238 Mon Sep 17 00:00:00 2001 From: Isaac Schepp Date: Thu, 2 Apr 2026 09:28:00 -0500 Subject: [PATCH 1/3] fix: Silently skip identical vocabulary entries during merge When merging two archives that both embed standard vocabularies, identical entries (same key and value) are now silently skipped instead of producing ~230 noisy "duplicate" warnings. Only true conflicts (same key, different value) are reported. Entity duplicates are still always reported since ID collisions are significant regardless of value equality. Closes #273 --- glx/merge_runner.go | 18 ++++-- glx/merge_runner_test.go | 8 +-- go-glx/serializer.go | 2 +- go-glx/types.go | 119 +++++++++++++++++++++++-------------- go-glx/types_merge_test.go | 116 +++++++++++++++++++++++++++--------- 5 files changed, 178 insertions(+), 85 deletions(-) diff --git a/glx/merge_runner.go b/glx/merge_runner.go index b86f52e3..5b7ab033 100644 --- a/glx/merge_runner.go +++ b/glx/merge_runner.go @@ -24,7 +24,8 @@ import ( // mergeResult holds statistics from a merge operation. type mergeResult struct { - Duplicates []string + Conflicts []string + IdenticalSkipped int NewPersons int NewEvents int NewRelationships int @@ -47,13 +48,14 @@ func mergeArchivesInMemory(dest, src *glxlib.GLXFile) *mergeResult { // Snapshot counts before merge before := entityCounts(dest) - duplicates := dest.Merge(src) + conflicts, identicalSkipped := dest.Merge(src) // Compute new entities added after := entityCounts(dest) return &mergeResult{ - Duplicates: duplicates, + Conflicts: conflicts, + IdenticalSkipped: identicalSkipped, NewPersons: after.persons - before.persons, NewEvents: after.events - before.events, NewRelationships: after.relationships - before.relationships, @@ -190,13 +192,17 @@ func mergeArchives(srcPath, destPath string, dryRun bool) error { fmt.Println(" No new entities to merge.") } - if len(result.Duplicates) > 0 { - fmt.Printf("\n Duplicates (%d — skipped, destination kept):\n", len(result.Duplicates)) - for _, d := range result.Duplicates { + if len(result.Conflicts) > 0 { + fmt.Printf("\n Conflicts (%d — skipped, destination kept):\n", len(result.Conflicts)) + for _, d := range result.Conflicts { fmt.Printf(" %s\n", d) } } + if result.IdenticalSkipped > 0 { + fmt.Printf("\n %d identical vocabulary/property entries skipped\n", result.IdenticalSkipped) + } + if dryRun { fmt.Println("\n(dry run — no files written)") return nil diff --git a/glx/merge_runner_test.go b/glx/merge_runner_test.go index 92ef6f64..17aecfee 100644 --- a/glx/merge_runner_test.go +++ b/glx/merge_runner_test.go @@ -49,7 +49,7 @@ func TestMergeArchives_NewEntities(t *testing.T) { result := mergeArchivesInMemory(dest, src) - assert.Empty(t, result.Duplicates, "no duplicates expected") + assert.Empty(t, result.Conflicts, "no duplicates expected") assert.Equal(t, 1, result.NewPersons) assert.Equal(t, 1, result.NewEvents) assert.Len(t, dest.Persons, 2) @@ -73,8 +73,8 @@ func TestMergeArchives_Duplicates(t *testing.T) { result := mergeArchivesInMemory(dest, src) - require.Len(t, result.Duplicates, 1) - assert.Contains(t, result.Duplicates[0], "person-a") + require.Len(t, result.Conflicts, 1) + assert.Contains(t, result.Conflicts[0], "person-a") assert.Equal(t, 1, result.NewPersons) } @@ -89,7 +89,7 @@ func TestMergeArchives_EmptySource(t *testing.T) { result := mergeArchivesInMemory(dest, src) - assert.Empty(t, result.Duplicates) + assert.Empty(t, result.Conflicts) assert.Equal(t, 0, result.TotalNew()) assert.Len(t, dest.Persons, 1) } diff --git a/go-glx/serializer.go b/go-glx/serializer.go index f8a304aa..2510000a 100644 --- a/go-glx/serializer.go +++ b/go-glx/serializer.go @@ -301,7 +301,7 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte) if err := yaml.Unmarshal(data, &partial); err != nil { return nil, nil, fmt.Errorf("failed to unmarshal %s: %w", path, err) } - duplicates := glx.Merge(&partial) + duplicates, _ := glx.Merge(&partial) allDuplicates = append(allDuplicates, duplicates...) } diff --git a/go-glx/types.go b/go-glx/types.go index 8e9845d0..9d97956d 100644 --- a/go-glx/types.go +++ b/go-glx/types.go @@ -16,6 +16,7 @@ package glx import ( "fmt" + "reflect" ) // Metadata holds import metadata extracted from GEDCOM HEAD and SUBM records. @@ -395,56 +396,65 @@ type TemporalValue struct { Date DateString `yaml:"date,omitempty"` // FamilySearch normalized date string } -// Merge combines another GLXFile into this one, returning duplicate/conflict messages. -// Messages may report duplicate entity or vocabulary IDs ("duplicate ID: ") -// or metadata conflicts ("duplicate metadata: ..."). -func (g *GLXFile) Merge(other *GLXFile) []string { +// Merge combines another GLXFile into this one, returning conflict messages and +// a count of identical items silently skipped. Entity duplicates (same ID, +// regardless of value) are always reported as conflicts. Vocabulary/property +// duplicates are only reported when the values differ; identical vocabulary +// entries (e.g., standard vocabularies present in both archives) are silently +// skipped and counted. +func (g *GLXFile) Merge(other *GLXFile) (conflicts []string, identicalSkipped int) { g.initMaps() g.validation = nil // invalidate cached validation since maps are being mutated - duplicates := make([]string, 0, 10) + conflicts = make([]string, 0, 10) // Merge metadata (first one wins; report duplicate if both have content) if other.ImportMetadata != nil && other.ImportMetadata.hasContent() { if g.ImportMetadata != nil && g.ImportMetadata.hasContent() { - duplicates = append(duplicates, "duplicate metadata: metadata appears in multiple files") + conflicts = append(conflicts, "duplicate metadata: metadata appears in multiple files") } else { g.ImportMetadata = other.ImportMetadata } } - // Merge entities (fail on duplicates) - duplicates = append(duplicates, mergeMap("persons", g.Persons, other.Persons)...) - duplicates = append(duplicates, mergeMap("relationships", g.Relationships, other.Relationships)...) - duplicates = append(duplicates, mergeMap("events", g.Events, other.Events)...) - duplicates = append(duplicates, mergeMap("places", g.Places, other.Places)...) - duplicates = append(duplicates, mergeMap("sources", g.Sources, other.Sources)...) - duplicates = append(duplicates, mergeMap("citations", g.Citations, other.Citations)...) - duplicates = append(duplicates, mergeMap("repositories", g.Repositories, other.Repositories)...) - duplicates = append(duplicates, mergeMap("assertions", g.Assertions, other.Assertions)...) - duplicates = append(duplicates, mergeMap("media", g.Media, other.Media)...) - - // Merge vocabularies (ALSO fail on duplicates - treat same as entities) - duplicates = append(duplicates, mergeMap("event_types", g.EventTypes, other.EventTypes)...) - duplicates = append(duplicates, mergeMap("relationship_types", g.RelationshipTypes, other.RelationshipTypes)...) - duplicates = append(duplicates, mergeMap("place_types", g.PlaceTypes, other.PlaceTypes)...) - duplicates = append(duplicates, mergeMap("source_types", g.SourceTypes, other.SourceTypes)...) - duplicates = append(duplicates, mergeMap("repository_types", g.RepositoryTypes, other.RepositoryTypes)...) - duplicates = append(duplicates, mergeMap("media_types", g.MediaTypes, other.MediaTypes)...) - duplicates = append(duplicates, mergeMap("gender_types", g.GenderTypes, other.GenderTypes)...) - duplicates = append(duplicates, mergeMap("participant_roles", g.ParticipantRoles, other.ParticipantRoles)...) - duplicates = append(duplicates, mergeMap("confidence_levels", g.ConfidenceLevels, other.ConfidenceLevels)...) - - // Merge property vocabularies - duplicates = append(duplicates, mergeMap("person_properties", g.PersonProperties, other.PersonProperties)...) - duplicates = append(duplicates, mergeMap("event_properties", g.EventProperties, other.EventProperties)...) - duplicates = append(duplicates, mergeMap("relationship_properties", g.RelationshipProperties, other.RelationshipProperties)...) - duplicates = append(duplicates, mergeMap("place_properties", g.PlaceProperties, other.PlaceProperties)...) - duplicates = append(duplicates, mergeMap("media_properties", g.MediaProperties, other.MediaProperties)...) - duplicates = append(duplicates, mergeMap("repository_properties", g.RepositoryProperties, other.RepositoryProperties)...) - duplicates = append(duplicates, mergeMap("citation_properties", g.CitationProperties, other.CitationProperties)...) - duplicates = append(duplicates, mergeMap("source_properties", g.SourceProperties, other.SourceProperties)...) - - return duplicates + // Merge entities — always report duplicates (entity ID collisions are significant) + conflicts = append(conflicts, mergeMap("persons", g.Persons, other.Persons)...) + conflicts = append(conflicts, mergeMap("relationships", g.Relationships, other.Relationships)...) + conflicts = append(conflicts, mergeMap("events", g.Events, other.Events)...) + conflicts = append(conflicts, mergeMap("places", g.Places, other.Places)...) + conflicts = append(conflicts, mergeMap("sources", g.Sources, other.Sources)...) + conflicts = append(conflicts, mergeMap("citations", g.Citations, other.Citations)...) + conflicts = append(conflicts, mergeMap("repositories", g.Repositories, other.Repositories)...) + conflicts = append(conflicts, mergeMap("assertions", g.Assertions, other.Assertions)...) + conflicts = append(conflicts, mergeMap("media", g.Media, other.Media)...) + + // Helper to accumulate mergeMapDedup results + addDedup := func(c []string, s int) { + conflicts = append(conflicts, c...) + identicalSkipped += s + } + + // Merge vocabularies — silently skip identical entries, report only true conflicts + addDedup(mergeMapDedup("event_types", g.EventTypes, other.EventTypes)) + addDedup(mergeMapDedup("relationship_types", g.RelationshipTypes, other.RelationshipTypes)) + addDedup(mergeMapDedup("place_types", g.PlaceTypes, other.PlaceTypes)) + addDedup(mergeMapDedup("source_types", g.SourceTypes, other.SourceTypes)) + addDedup(mergeMapDedup("repository_types", g.RepositoryTypes, other.RepositoryTypes)) + addDedup(mergeMapDedup("media_types", g.MediaTypes, other.MediaTypes)) + addDedup(mergeMapDedup("gender_types", g.GenderTypes, other.GenderTypes)) + addDedup(mergeMapDedup("participant_roles", g.ParticipantRoles, other.ParticipantRoles)) + addDedup(mergeMapDedup("confidence_levels", g.ConfidenceLevels, other.ConfidenceLevels)) + + // Merge property vocabularies — same dedup behavior + addDedup(mergeMapDedup("person_properties", g.PersonProperties, other.PersonProperties)) + addDedup(mergeMapDedup("event_properties", g.EventProperties, other.EventProperties)) + addDedup(mergeMapDedup("relationship_properties", g.RelationshipProperties, other.RelationshipProperties)) + addDedup(mergeMapDedup("place_properties", g.PlaceProperties, other.PlaceProperties)) + addDedup(mergeMapDedup("media_properties", g.MediaProperties, other.MediaProperties)) + addDedup(mergeMapDedup("repository_properties", g.RepositoryProperties, other.RepositoryProperties)) + addDedup(mergeMapDedup("citation_properties", g.CitationProperties, other.CitationProperties)) + addDedup(mergeMapDedup("source_properties", g.SourceProperties, other.SourceProperties)) + + return conflicts, identicalSkipped } // initMaps ensures all entity and vocabulary maps are initialized (non-nil). @@ -532,13 +542,11 @@ func (g *GLXFile) initMaps() { } } -// mergeMap is used for BOTH entities and vocabularies - duplicates are always errors +// mergeMap merges src into dest, reporting all key collisions as duplicates. +// Used for entity maps where any ID collision is significant. func mergeMap[T any](mapType string, dest, src map[string]*T) []string { var duplicates []string - if dest == nil { - return duplicates - } - if src == nil { + if dest == nil || src == nil { return duplicates } for k, v := range src { @@ -548,6 +556,27 @@ func mergeMap[T any](mapType string, dest, src map[string]*T) []string { dest[k] = v } } - return duplicates } + +// mergeMapDedup merges src into dest, silently skipping entries where the key +// exists and the value is deeply equal. Only reports conflicts where the same +// key maps to a different value. Used for vocabulary/property maps where +// identical standard vocabularies in both archives should not produce noise. +func mergeMapDedup[T any](mapType string, dest, src map[string]*T) (conflicts []string, skipped int) { + if dest == nil || src == nil { + return nil, 0 + } + for k, v := range src { + if existing, exists := dest[k]; exists { + if reflect.DeepEqual(existing, v) { + skipped++ + } else { + conflicts = append(conflicts, fmt.Sprintf("conflict %s ID: %s", mapType, k)) + } + } else { + dest[k] = v + } + } + return conflicts, skipped +} diff --git a/go-glx/types_merge_test.go b/go-glx/types_merge_test.go index 9e09d55e..a7149249 100644 --- a/go-glx/types_merge_test.go +++ b/go-glx/types_merge_test.go @@ -26,7 +26,7 @@ func TestGLXFile_Merge_EmptyFiles(t *testing.T) { g1 := &GLXFile{} g2 := &GLXFile{} - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "merging empty files should have no duplicates") } @@ -45,7 +45,7 @@ func TestGLXFile_Merge_NilDestMaps(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") require.Len(t, g1.Persons, 1, "persons should be merged into initialized map") require.Len(t, g1.Events, 1, "events should be merged into initialized map") @@ -66,7 +66,7 @@ func TestGLXFile_Merge_Persons_NoDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") require.Len(t, g1.Persons, 2, "should have merged both persons") require.Contains(t, g1.Persons, "person-1") @@ -87,7 +87,7 @@ func TestGLXFile_Merge_Persons_WithDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Len(t, duplicates, 1, "should detect one duplicate") require.Contains(t, duplicates[0], "duplicate persons ID: person-1") @@ -122,7 +122,7 @@ func TestGLXFile_Merge_AllEntityTypes(t *testing.T) { Media: map[string]*Media{"media-2": {}}, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") // Verify all entity types were merged @@ -157,7 +157,7 @@ func TestGLXFile_Merge_Vocabularies_NoDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") require.Len(t, g1.EventTypes, 2) require.Len(t, g1.ParticipantRoles, 2) @@ -178,9 +178,9 @@ func TestGLXFile_Merge_Vocabularies_WithDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) - require.Len(t, duplicates, 1, "should detect one duplicate") - require.Contains(t, duplicates[0], "duplicate event_types ID: birth") + duplicates, _ := g1.Merge(g2) + require.Len(t, duplicates, 1, "should detect one conflict") + require.Contains(t, duplicates[0], "conflict event_types ID: birth") // death should still be merged require.Len(t, g1.EventTypes, 2) @@ -213,7 +213,7 @@ func TestGLXFile_Merge_AllVocabularyTypes(t *testing.T) { GenderTypes: map[string]*GenderType{"gender-2": {}}, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") // Verify all vocabulary types were merged @@ -248,7 +248,7 @@ func TestGLXFile_Merge_PropertyVocabularies_NoDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") require.Len(t, g1.PersonProperties, 2) require.Len(t, g1.EventProperties, 2) @@ -269,9 +269,9 @@ func TestGLXFile_Merge_PropertyVocabularies_WithDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) - require.Len(t, duplicates, 1, "should detect one duplicate") - require.Contains(t, duplicates[0], "duplicate person_properties ID: prop-1") + duplicates, _ := g1.Merge(g2) + require.Len(t, duplicates, 1, "should detect one conflict") + require.Contains(t, duplicates[0], "conflict person_properties ID: prop-1") // prop-2 should still be merged require.Len(t, g1.PersonProperties, 2) @@ -294,7 +294,7 @@ func TestGLXFile_Merge_AllPropertyVocabularies(t *testing.T) { PlaceProperties: map[string]*PropertyDefinition{"prop-2": {}}, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") // Verify all property vocabulary types were merged @@ -333,10 +333,11 @@ func TestGLXFile_Merge_MultipleDuplicates(t *testing.T) { }, } - duplicates := g1.Merge(g2) - require.Len(t, duplicates, 3, "should detect three duplicates") + duplicates, skipped := g1.Merge(g2) + require.Len(t, duplicates, 2, "should detect two entity duplicates") + require.Equal(t, 1, skipped, "should silently skip one identical vocab entry") - // Check that all duplicates are reported + // Check that entity duplicates are reported duplicateStr := "" var duplicateStrSb317 strings.Builder for _, d := range duplicates { @@ -345,7 +346,6 @@ func TestGLXFile_Merge_MultipleDuplicates(t *testing.T) { duplicateStr += duplicateStrSb317.String() require.Contains(t, duplicateStr, "person-1") require.Contains(t, duplicateStr, "event-1") - require.Contains(t, duplicateStr, "birth") // Non-duplicates should still be merged require.Contains(t, g1.Persons, "person-2") @@ -369,7 +369,7 @@ func TestGLXFile_Merge_NilMaps(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") // nil dest maps should be initialized and populated with source data @@ -395,7 +395,7 @@ func TestGLXFile_Merge_SourceNilMaps(t *testing.T) { Events: nil, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") // Destination should remain unchanged @@ -437,7 +437,7 @@ func TestGLXFile_Merge_MixedEntitiesAndVocabularies(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") // Verify everything merged correctly @@ -462,7 +462,7 @@ func TestGLXFile_Merge_PreservesExistingData(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates) // Verify original data is preserved @@ -496,7 +496,7 @@ func TestGLXFile_Merge_DuplicateReporting(t *testing.T) { Media: map[string]*Media{"media-1": {}}, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Len(t, duplicates, 9, "should detect 9 duplicates") // Verify messages include the entity type and ID @@ -530,7 +530,7 @@ func TestGLXFile_Merge_Metadata_AdoptFromOther(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") require.NotNil(t, g1.ImportMetadata, "metadata should be adopted from other") require.Equal(t, "MyApp", g1.ImportMetadata.SourceSystem) @@ -551,7 +551,7 @@ func TestGLXFile_Merge_Metadata_DuplicateDetected(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Len(t, duplicates, 1, "should detect one metadata duplicate") require.Contains(t, duplicates[0], "duplicate metadata") @@ -568,7 +568,7 @@ func TestGLXFile_Merge_Metadata_EmptyMetadataIgnored(t *testing.T) { ImportMetadata: &Metadata{}, // all fields empty } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates") require.Nil(t, g1.ImportMetadata, "empty metadata should not be adopted") } @@ -582,7 +582,7 @@ func TestGLXFile_Merge_Metadata_NilMetadataBothSides(t *testing.T) { Persons: map[string]*Person{}, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates) require.Nil(t, g1.ImportMetadata) } @@ -600,8 +600,66 @@ func TestGLXFile_Merge_Metadata_ExistingEmptyDoesNotConflict(t *testing.T) { }, } - duplicates := g1.Merge(g2) + duplicates, _ := g1.Merge(g2) require.Empty(t, duplicates, "should have no duplicates since g1 metadata has no content") require.NotNil(t, g1.ImportMetadata) require.Equal(t, "NewApp", g1.ImportMetadata.SourceSystem) } + +func TestGLXFile_Merge_IdenticalVocabsSilentlySkipped(t *testing.T) { + g1 := &GLXFile{ + EventTypes: map[string]*EventType{ + "birth": {Label: "Birth", Description: "A birth event"}, + "death": {Label: "Death"}, + }, + PersonProperties: map[string]*PropertyDefinition{ + "name": {ValueType: "string", Label: "Name"}, + }, + } + + g2 := &GLXFile{ + EventTypes: map[string]*EventType{ + "birth": {Label: "Birth", Description: "A birth event"}, + "death": {Label: "Death"}, + "marriage": {Label: "Marriage"}, + }, + PersonProperties: map[string]*PropertyDefinition{ + "name": {ValueType: "string", Label: "Name"}, + "age": {ValueType: "integer"}, + }, + } + + conflicts, skipped := g1.Merge(g2) + require.Empty(t, conflicts, "identical entries should not produce conflicts") + require.Equal(t, 3, skipped, "should skip 3 identical entries (birth, death, name)") + require.Len(t, g1.EventTypes, 3) + require.Contains(t, g1.EventTypes, "marriage") + require.Len(t, g1.PersonProperties, 2) + require.Contains(t, g1.PersonProperties, "age") +} + +func TestGLXFile_Merge_ConflictingVocabsReported(t *testing.T) { + g1 := &GLXFile{ + EventTypes: map[string]*EventType{ + "birth": {Label: "Birth"}, + }, + PersonProperties: map[string]*PropertyDefinition{ + "name": {ValueType: "string"}, + }, + } + + g2 := &GLXFile{ + EventTypes: map[string]*EventType{ + "birth": {Label: "Different Birth"}, + }, + PersonProperties: map[string]*PropertyDefinition{ + "name": {ValueType: "integer"}, + }, + } + + conflicts, skipped := g1.Merge(g2) + require.Len(t, conflicts, 2, "should report two conflicts") + require.Equal(t, 0, skipped, "no identical entries to skip") + require.Contains(t, conflicts[0], "conflict") + require.Contains(t, conflicts[1], "conflict") +} From d80416929daa9d94556f4b49a7434c9bb6705fef Mon Sep 17 00:00:00 2001 From: Isaac Schepp Date: Thu, 2 Apr 2026 09:34:46 -0500 Subject: [PATCH 2/3] fix: Rename duplicates to conflicts for consistency with new Merge semantics --- glx/merge_runner_test.go | 2 +- go-glx/serializer.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/glx/merge_runner_test.go b/glx/merge_runner_test.go index 17aecfee..bec6fa82 100644 --- a/glx/merge_runner_test.go +++ b/glx/merge_runner_test.go @@ -49,7 +49,7 @@ func TestMergeArchives_NewEntities(t *testing.T) { result := mergeArchivesInMemory(dest, src) - assert.Empty(t, result.Conflicts, "no duplicates expected") + assert.Empty(t, result.Conflicts, "no conflicts expected") assert.Equal(t, 1, result.NewPersons) assert.Equal(t, 1, result.NewEvents) assert.Len(t, dest.Persons, 2) diff --git a/go-glx/serializer.go b/go-glx/serializer.go index 2510000a..2c6de202 100644 --- a/go-glx/serializer.go +++ b/go-glx/serializer.go @@ -287,7 +287,7 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte) SourceProperties: make(map[string]*PropertyDefinition), } - var allDuplicates []string + var allConflicts []string // Each file is a GLXFile fragment — the YAML top-level keys (persons:, // events:, event_types:, etc.) determine what entities it contains, @@ -301,8 +301,8 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte) if err := yaml.Unmarshal(data, &partial); err != nil { return nil, nil, fmt.Errorf("failed to unmarshal %s: %w", path, err) } - duplicates, _ := glx.Merge(&partial) - allDuplicates = append(allDuplicates, duplicates...) + conflicts, _ := glx.Merge(&partial) + allConflicts = append(allConflicts, conflicts...) } // Validate if requested @@ -312,7 +312,7 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte) } } - return glx, allDuplicates, nil + return glx, allConflicts, nil } // validateGLXFile validates a GLX archive using the built-in validation system. From 86a8f6bf43e9c8bb77707d16d167c32066dde8a9 Mon Sep 17 00:00:00 2001 From: Isaac Schepp Date: Thu, 2 Apr 2026 11:27:53 -0500 Subject: [PATCH 3/3] refactor: Rename duplicates to conflicts in merge test variables and function names --- glx/merge_runner_test.go | 2 +- go-glx/types_merge_test.go | 100 ++++++++++++++++++------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/glx/merge_runner_test.go b/glx/merge_runner_test.go index bec6fa82..3eec66ff 100644 --- a/glx/merge_runner_test.go +++ b/glx/merge_runner_test.go @@ -56,7 +56,7 @@ func TestMergeArchives_NewEntities(t *testing.T) { assert.Contains(t, dest.Persons, "person-b") } -func TestMergeArchives_Duplicates(t *testing.T) { +func TestMergeArchives_Conflicts(t *testing.T) { dest := &glxlib.GLXFile{ Persons: map[string]*glxlib.Person{ "person-a": {Properties: map[string]any{"name": "Person A"}}, diff --git a/go-glx/types_merge_test.go b/go-glx/types_merge_test.go index a7149249..e9588c0c 100644 --- a/go-glx/types_merge_test.go +++ b/go-glx/types_merge_test.go @@ -26,8 +26,8 @@ func TestGLXFile_Merge_EmptyFiles(t *testing.T) { g1 := &GLXFile{} g2 := &GLXFile{} - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "merging empty files should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "merging empty files should have no duplicates") } func TestGLXFile_Merge_NilDestMaps(t *testing.T) { @@ -45,8 +45,8 @@ func TestGLXFile_Merge_NilDestMaps(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") require.Len(t, g1.Persons, 1, "persons should be merged into initialized map") require.Len(t, g1.Events, 1, "events should be merged into initialized map") require.Len(t, g1.Places, 1, "places should be merged into initialized map") @@ -66,8 +66,8 @@ func TestGLXFile_Merge_Persons_NoDuplicates(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") require.Len(t, g1.Persons, 2, "should have merged both persons") require.Contains(t, g1.Persons, "person-1") require.Contains(t, g1.Persons, "person-2") @@ -87,9 +87,9 @@ func TestGLXFile_Merge_Persons_WithDuplicates(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Len(t, duplicates, 1, "should detect one duplicate") - require.Contains(t, duplicates[0], "duplicate persons ID: person-1") + conflicts, _ := g1.Merge(g2) + require.Len(t, conflicts, 1, "should detect one duplicate") + require.Contains(t, conflicts[0], "duplicate persons ID: person-1") // person-2 should still be merged require.Len(t, g1.Persons, 2, "should merge non-duplicate persons") @@ -122,8 +122,8 @@ func TestGLXFile_Merge_AllEntityTypes(t *testing.T) { Media: map[string]*Media{"media-2": {}}, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") // Verify all entity types were merged require.Len(t, g1.Persons, 2) @@ -157,8 +157,8 @@ func TestGLXFile_Merge_Vocabularies_NoDuplicates(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") require.Len(t, g1.EventTypes, 2) require.Len(t, g1.ParticipantRoles, 2) } @@ -178,9 +178,9 @@ func TestGLXFile_Merge_Vocabularies_WithDuplicates(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Len(t, duplicates, 1, "should detect one conflict") - require.Contains(t, duplicates[0], "conflict event_types ID: birth") + conflicts, _ := g1.Merge(g2) + require.Len(t, conflicts, 1, "should detect one conflict") + require.Contains(t, conflicts[0], "conflict event_types ID: birth") // death should still be merged require.Len(t, g1.EventTypes, 2) @@ -213,8 +213,8 @@ func TestGLXFile_Merge_AllVocabularyTypes(t *testing.T) { GenderTypes: map[string]*GenderType{"gender-2": {}}, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") // Verify all vocabulary types were merged require.Len(t, g1.EventTypes, 2) @@ -248,8 +248,8 @@ func TestGLXFile_Merge_PropertyVocabularies_NoDuplicates(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") require.Len(t, g1.PersonProperties, 2) require.Len(t, g1.EventProperties, 2) } @@ -269,9 +269,9 @@ func TestGLXFile_Merge_PropertyVocabularies_WithDuplicates(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Len(t, duplicates, 1, "should detect one conflict") - require.Contains(t, duplicates[0], "conflict person_properties ID: prop-1") + conflicts, _ := g1.Merge(g2) + require.Len(t, conflicts, 1, "should detect one conflict") + require.Contains(t, conflicts[0], "conflict person_properties ID: prop-1") // prop-2 should still be merged require.Len(t, g1.PersonProperties, 2) @@ -294,8 +294,8 @@ func TestGLXFile_Merge_AllPropertyVocabularies(t *testing.T) { PlaceProperties: map[string]*PropertyDefinition{"prop-2": {}}, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") // Verify all property vocabulary types were merged require.Len(t, g1.PersonProperties, 2) @@ -333,14 +333,14 @@ func TestGLXFile_Merge_MultipleDuplicates(t *testing.T) { }, } - duplicates, skipped := g1.Merge(g2) - require.Len(t, duplicates, 2, "should detect two entity duplicates") + conflicts, skipped := g1.Merge(g2) + require.Len(t, conflicts, 2, "should detect two entity duplicates") require.Equal(t, 1, skipped, "should silently skip one identical vocab entry") // Check that entity duplicates are reported duplicateStr := "" var duplicateStrSb317 strings.Builder - for _, d := range duplicates { + for _, d := range conflicts { duplicateStrSb317.WriteString(d + " ") } duplicateStr += duplicateStrSb317.String() @@ -369,8 +369,8 @@ func TestGLXFile_Merge_NilMaps(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") // nil dest maps should be initialized and populated with source data require.NotNil(t, g1.Persons) @@ -395,8 +395,8 @@ func TestGLXFile_Merge_SourceNilMaps(t *testing.T) { Events: nil, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") // Destination should remain unchanged require.Len(t, g1.Persons, 1) @@ -437,8 +437,8 @@ func TestGLXFile_Merge_MixedEntitiesAndVocabularies(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") // Verify everything merged correctly require.Len(t, g1.Persons, 2) @@ -462,8 +462,8 @@ func TestGLXFile_Merge_PreservesExistingData(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates) + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts) // Verify original data is preserved require.Equal(t, "Original Name", g1.Persons["person-1"].Properties["primary_name"]) @@ -496,13 +496,13 @@ func TestGLXFile_Merge_DuplicateReporting(t *testing.T) { Media: map[string]*Media{"media-1": {}}, } - duplicates, _ := g1.Merge(g2) - require.Len(t, duplicates, 9, "should detect 9 duplicates") + conflicts, _ := g1.Merge(g2) + require.Len(t, conflicts, 9, "should detect 9 duplicates") // Verify messages include the entity type and ID duplicateStr := "" var duplicateStrSb476 strings.Builder - for _, d := range duplicates { + for _, d := range conflicts { duplicateStrSb476.WriteString(d + "\n") } duplicateStr += duplicateStrSb476.String() @@ -530,8 +530,8 @@ func TestGLXFile_Merge_Metadata_AdoptFromOther(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") require.NotNil(t, g1.ImportMetadata, "metadata should be adopted from other") require.Equal(t, "MyApp", g1.ImportMetadata.SourceSystem) require.Equal(t, DateString("2026-01-15"), g1.ImportMetadata.ExportDate) @@ -551,9 +551,9 @@ func TestGLXFile_Merge_Metadata_DuplicateDetected(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Len(t, duplicates, 1, "should detect one metadata duplicate") - require.Contains(t, duplicates[0], "duplicate metadata") + conflicts, _ := g1.Merge(g2) + require.Len(t, conflicts, 1, "should detect one metadata duplicate") + require.Contains(t, conflicts[0], "duplicate metadata") // Original metadata is preserved (first one wins) require.Equal(t, "AppA", g1.ImportMetadata.SourceSystem) @@ -568,8 +568,8 @@ func TestGLXFile_Merge_Metadata_EmptyMetadataIgnored(t *testing.T) { ImportMetadata: &Metadata{}, // all fields empty } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates") require.Nil(t, g1.ImportMetadata, "empty metadata should not be adopted") } @@ -582,8 +582,8 @@ func TestGLXFile_Merge_Metadata_NilMetadataBothSides(t *testing.T) { Persons: map[string]*Person{}, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates) + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts) require.Nil(t, g1.ImportMetadata) } @@ -600,8 +600,8 @@ func TestGLXFile_Merge_Metadata_ExistingEmptyDoesNotConflict(t *testing.T) { }, } - duplicates, _ := g1.Merge(g2) - require.Empty(t, duplicates, "should have no duplicates since g1 metadata has no content") + conflicts, _ := g1.Merge(g2) + require.Empty(t, conflicts, "should have no duplicates since g1 metadata has no content") require.NotNil(t, g1.ImportMetadata) require.Equal(t, "NewApp", g1.ImportMetadata.SourceSystem) }