Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions glx/merge_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ import (

// mergeResult holds statistics from a merge operation.
type mergeResult struct {
Duplicates []string
Conflicts []string
IdenticalSkipped int
NewPersons int
NewEvents int
NewRelationships int
Expand All @@ -47,13 +48,14 @@ func mergeArchivesInMemory(dest, src *glxlib.GLXFile) *mergeResult {
// Snapshot counts before merge
before := entityCounts(dest)

duplicates := dest.Merge(src)
conflicts, identicalSkipped := dest.Merge(src)

// Compute new entities added
after := entityCounts(dest)

return &mergeResult{
Duplicates: duplicates,
Conflicts: conflicts,
IdenticalSkipped: identicalSkipped,
NewPersons: after.persons - before.persons,
NewEvents: after.events - before.events,
NewRelationships: after.relationships - before.relationships,
Expand Down Expand Up @@ -190,13 +192,17 @@ func mergeArchives(srcPath, destPath string, dryRun bool) error {
fmt.Println(" No new entities to merge.")
}

if len(result.Duplicates) > 0 {
fmt.Printf("\n Duplicates (%d — skipped, destination kept):\n", len(result.Duplicates))
for _, d := range result.Duplicates {
if len(result.Conflicts) > 0 {
fmt.Printf("\n Conflicts (%d — skipped, destination kept):\n", len(result.Conflicts))
for _, d := range result.Conflicts {
fmt.Printf(" %s\n", d)
}
}

if result.IdenticalSkipped > 0 {
fmt.Printf("\n %d identical vocabulary/property entries skipped\n", result.IdenticalSkipped)
}

if dryRun {
fmt.Println("\n(dry run — no files written)")
return nil
Expand Down
10 changes: 5 additions & 5 deletions glx/merge_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ func TestMergeArchives_NewEntities(t *testing.T) {

result := mergeArchivesInMemory(dest, src)

assert.Empty(t, result.Duplicates, "no duplicates expected")
assert.Empty(t, result.Conflicts, "no conflicts expected")
assert.Equal(t, 1, result.NewPersons)
assert.Equal(t, 1, result.NewEvents)
assert.Len(t, dest.Persons, 2)
assert.Contains(t, dest.Persons, "person-b")
}

func TestMergeArchives_Duplicates(t *testing.T) {
func TestMergeArchives_Conflicts(t *testing.T) {
dest := &glxlib.GLXFile{
Persons: map[string]*glxlib.Person{
"person-a": {Properties: map[string]any{"name": "Person A"}},
Expand All @@ -74,8 +74,8 @@ func TestMergeArchives_Duplicates(t *testing.T) {

result := mergeArchivesInMemory(dest, src)

require.Len(t, result.Duplicates, 1)
assert.Contains(t, result.Duplicates[0], "person-a")
require.Len(t, result.Conflicts, 1)
assert.Contains(t, result.Conflicts[0], "person-a")
assert.Equal(t, 1, result.NewPersons)
}

Expand All @@ -90,7 +90,7 @@ func TestMergeArchives_EmptySource(t *testing.T) {

result := mergeArchivesInMemory(dest, src)

assert.Empty(t, result.Duplicates)
assert.Empty(t, result.Conflicts)
assert.Equal(t, 0, result.TotalNew())
assert.Len(t, dest.Persons, 1)
}
Expand Down
8 changes: 4 additions & 4 deletions go-glx/serializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte)
SourceProperties: make(map[string]*PropertyDefinition),
}

var allDuplicates []string
var allConflicts []string

// Each file is a GLXFile fragment — the YAML top-level keys (persons:,
// events:, event_types:, etc.) determine what entities it contains,
Expand All @@ -301,8 +301,8 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte)
if err := yaml.Unmarshal(data, &partial); err != nil {
return nil, nil, fmt.Errorf("failed to unmarshal %s: %w", path, err)
}
duplicates := glx.Merge(&partial)
allDuplicates = append(allDuplicates, duplicates...)
conflicts, _ := glx.Merge(&partial)
allConflicts = append(allConflicts, conflicts...)
}

// Validate if requested
Expand All @@ -312,7 +312,7 @@ func (s *DefaultSerializer) DeserializeMultiFileFromMap(files map[string][]byte)
}
}

return glx, allDuplicates, nil
return glx, allConflicts, nil
}

// validateGLXFile validates a GLX archive using the built-in validation system.
Expand Down
119 changes: 74 additions & 45 deletions go-glx/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package glx

import (
"fmt"
"reflect"
)

// Metadata holds import metadata extracted from GEDCOM HEAD and SUBM records.
Expand Down Expand Up @@ -395,56 +396,65 @@ type TemporalValue struct {
Date DateString `yaml:"date,omitempty"` // FamilySearch normalized date string
}

// Merge combines another GLXFile into this one, returning duplicate/conflict messages.
// Messages may report duplicate entity or vocabulary IDs ("duplicate <type> ID: <id>")
// or metadata conflicts ("duplicate metadata: ...").
func (g *GLXFile) Merge(other *GLXFile) []string {
// Merge combines another GLXFile into this one, returning conflict messages and
// a count of identical items silently skipped. Entity duplicates (same ID,
// regardless of value) are always reported as conflicts. Vocabulary/property
// duplicates are only reported when the values differ; identical vocabulary
// entries (e.g., standard vocabularies present in both archives) are silently
// skipped and counted.
func (g *GLXFile) Merge(other *GLXFile) (conflicts []string, identicalSkipped int) {
g.initMaps()
g.validation = nil // invalidate cached validation since maps are being mutated
duplicates := make([]string, 0, 10)
conflicts = make([]string, 0, 10)

// Merge metadata (first one wins; report duplicate if both have content)
if other.ImportMetadata != nil && other.ImportMetadata.hasContent() {
if g.ImportMetadata != nil && g.ImportMetadata.hasContent() {
duplicates = append(duplicates, "duplicate metadata: metadata appears in multiple files")
conflicts = append(conflicts, "duplicate metadata: metadata appears in multiple files")
} else {
g.ImportMetadata = other.ImportMetadata
}
}

// Merge entities (fail on duplicates)
duplicates = append(duplicates, mergeMap("persons", g.Persons, other.Persons)...)
duplicates = append(duplicates, mergeMap("relationships", g.Relationships, other.Relationships)...)
duplicates = append(duplicates, mergeMap("events", g.Events, other.Events)...)
duplicates = append(duplicates, mergeMap("places", g.Places, other.Places)...)
duplicates = append(duplicates, mergeMap("sources", g.Sources, other.Sources)...)
duplicates = append(duplicates, mergeMap("citations", g.Citations, other.Citations)...)
duplicates = append(duplicates, mergeMap("repositories", g.Repositories, other.Repositories)...)
duplicates = append(duplicates, mergeMap("assertions", g.Assertions, other.Assertions)...)
duplicates = append(duplicates, mergeMap("media", g.Media, other.Media)...)

// Merge vocabularies (ALSO fail on duplicates - treat same as entities)
duplicates = append(duplicates, mergeMap("event_types", g.EventTypes, other.EventTypes)...)
duplicates = append(duplicates, mergeMap("relationship_types", g.RelationshipTypes, other.RelationshipTypes)...)
duplicates = append(duplicates, mergeMap("place_types", g.PlaceTypes, other.PlaceTypes)...)
duplicates = append(duplicates, mergeMap("source_types", g.SourceTypes, other.SourceTypes)...)
duplicates = append(duplicates, mergeMap("repository_types", g.RepositoryTypes, other.RepositoryTypes)...)
duplicates = append(duplicates, mergeMap("media_types", g.MediaTypes, other.MediaTypes)...)
duplicates = append(duplicates, mergeMap("gender_types", g.GenderTypes, other.GenderTypes)...)
duplicates = append(duplicates, mergeMap("participant_roles", g.ParticipantRoles, other.ParticipantRoles)...)
duplicates = append(duplicates, mergeMap("confidence_levels", g.ConfidenceLevels, other.ConfidenceLevels)...)

// Merge property vocabularies
duplicates = append(duplicates, mergeMap("person_properties", g.PersonProperties, other.PersonProperties)...)
duplicates = append(duplicates, mergeMap("event_properties", g.EventProperties, other.EventProperties)...)
duplicates = append(duplicates, mergeMap("relationship_properties", g.RelationshipProperties, other.RelationshipProperties)...)
duplicates = append(duplicates, mergeMap("place_properties", g.PlaceProperties, other.PlaceProperties)...)
duplicates = append(duplicates, mergeMap("media_properties", g.MediaProperties, other.MediaProperties)...)
duplicates = append(duplicates, mergeMap("repository_properties", g.RepositoryProperties, other.RepositoryProperties)...)
duplicates = append(duplicates, mergeMap("citation_properties", g.CitationProperties, other.CitationProperties)...)
duplicates = append(duplicates, mergeMap("source_properties", g.SourceProperties, other.SourceProperties)...)

return duplicates
// Merge entities — always report duplicates (entity ID collisions are significant)
conflicts = append(conflicts, mergeMap("persons", g.Persons, other.Persons)...)
conflicts = append(conflicts, mergeMap("relationships", g.Relationships, other.Relationships)...)
conflicts = append(conflicts, mergeMap("events", g.Events, other.Events)...)
conflicts = append(conflicts, mergeMap("places", g.Places, other.Places)...)
conflicts = append(conflicts, mergeMap("sources", g.Sources, other.Sources)...)
conflicts = append(conflicts, mergeMap("citations", g.Citations, other.Citations)...)
conflicts = append(conflicts, mergeMap("repositories", g.Repositories, other.Repositories)...)
conflicts = append(conflicts, mergeMap("assertions", g.Assertions, other.Assertions)...)
conflicts = append(conflicts, mergeMap("media", g.Media, other.Media)...)

// Helper to accumulate mergeMapDedup results
addDedup := func(c []string, s int) {
conflicts = append(conflicts, c...)
identicalSkipped += s
}

// Merge vocabularies — silently skip identical entries, report only true conflicts
addDedup(mergeMapDedup("event_types", g.EventTypes, other.EventTypes))
addDedup(mergeMapDedup("relationship_types", g.RelationshipTypes, other.RelationshipTypes))
addDedup(mergeMapDedup("place_types", g.PlaceTypes, other.PlaceTypes))
addDedup(mergeMapDedup("source_types", g.SourceTypes, other.SourceTypes))
addDedup(mergeMapDedup("repository_types", g.RepositoryTypes, other.RepositoryTypes))
addDedup(mergeMapDedup("media_types", g.MediaTypes, other.MediaTypes))
addDedup(mergeMapDedup("gender_types", g.GenderTypes, other.GenderTypes))
addDedup(mergeMapDedup("participant_roles", g.ParticipantRoles, other.ParticipantRoles))
addDedup(mergeMapDedup("confidence_levels", g.ConfidenceLevels, other.ConfidenceLevels))

// Merge property vocabularies — same dedup behavior
addDedup(mergeMapDedup("person_properties", g.PersonProperties, other.PersonProperties))
addDedup(mergeMapDedup("event_properties", g.EventProperties, other.EventProperties))
addDedup(mergeMapDedup("relationship_properties", g.RelationshipProperties, other.RelationshipProperties))
addDedup(mergeMapDedup("place_properties", g.PlaceProperties, other.PlaceProperties))
addDedup(mergeMapDedup("media_properties", g.MediaProperties, other.MediaProperties))
addDedup(mergeMapDedup("repository_properties", g.RepositoryProperties, other.RepositoryProperties))
addDedup(mergeMapDedup("citation_properties", g.CitationProperties, other.CitationProperties))
addDedup(mergeMapDedup("source_properties", g.SourceProperties, other.SourceProperties))

return conflicts, identicalSkipped
}

// initMaps ensures all entity and vocabulary maps are initialized (non-nil).
Expand Down Expand Up @@ -532,13 +542,11 @@ func (g *GLXFile) initMaps() {
}
}

// mergeMap is used for BOTH entities and vocabularies - duplicates are always errors
// mergeMap merges src into dest, reporting all key collisions as duplicates.
// Used for entity maps where any ID collision is significant.
func mergeMap[T any](mapType string, dest, src map[string]*T) []string {
var duplicates []string
if dest == nil {
return duplicates
}
if src == nil {
if dest == nil || src == nil {
return duplicates
}
for k, v := range src {
Expand All @@ -548,6 +556,27 @@ func mergeMap[T any](mapType string, dest, src map[string]*T) []string {
dest[k] = v
}
}

return duplicates
}

// mergeMapDedup merges src into dest, silently skipping entries where the key
// exists and the value is deeply equal. Only reports conflicts where the same
// key maps to a different value. Used for vocabulary/property maps where
// identical standard vocabularies in both archives should not produce noise.
func mergeMapDedup[T any](mapType string, dest, src map[string]*T) (conflicts []string, skipped int) {
if dest == nil || src == nil {
return nil, 0
}
for k, v := range src {
if existing, exists := dest[k]; exists {
if reflect.DeepEqual(existing, v) {
skipped++
} else {
conflicts = append(conflicts, fmt.Sprintf("conflict %s ID: %s", mapType, k))
}
} else {
dest[k] = v
}
}
return conflicts, skipped
}
Loading
Loading