diff --git a/checker/checker.go b/checker/checker.go index d6fe49eb4..e6ab23dfb 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -49,6 +49,16 @@ func New(repo *repository.Repository) *Checker { const defaultParallelism = 40 +// ErrDuplicatePacks is returned when a pack is found in more than one index. +type ErrDuplicatePacks struct { + PackID backend.ID + Indexes backend.IDSet +} + +func (e ErrDuplicatePacks) Error() string { + return fmt.Sprintf("pack %v contained in several indexes: %v", e.PackID.Str(), e.Indexes) +} + // LoadIndex loads all index files. func (c *Checker) LoadIndex() error { debug.Log("LoadIndex", "Start") @@ -97,14 +107,16 @@ func (c *Checker) LoadIndex() error { done := make(chan struct{}) defer close(done) + packToIndex := make(map[backend.ID]backend.IDSet) + for res := range indexCh { debug.Log("LoadIndex", "process index %v", res.ID) - id, err := backend.ParseID(res.ID) + idxID, err := backend.ParseID(res.ID) if err != nil { return err } - c.indexes[id] = res.Index + c.indexes[idxID] = res.Index c.masterIndex.Insert(res.Index) debug.Log("LoadIndex", "process blobs") @@ -114,6 +126,11 @@ func (c *Checker) LoadIndex() error { c.blobs[blob.ID] = struct{}{} c.blobRefs.M[blob.ID] = 0 cnt++ + + if _, ok := packToIndex[blob.PackID]; !ok { + packToIndex[blob.PackID] = backend.NewIDSet() + } + packToIndex[blob.PackID].Insert(idxID) } debug.Log("LoadIndex", "%d blobs processed", cnt) @@ -121,6 +138,17 @@ func (c *Checker) LoadIndex() error { debug.Log("LoadIndex", "done, error %v", perr) + debug.Log("LoadIndex", "checking for duplicate packs") + for packID := range c.packs { + debug.Log("LoadIndex", " check pack %v: contained in %d indexes", packID.Str(), len(packToIndex[packID])) + if len(packToIndex[packID]) > 1 { + return ErrDuplicatePacks{ + PackID: packID, + Indexes: packToIndex[packID], + } + } + } + c.repo.SetIndex(c.masterIndex) return perr diff --git a/checker/checker_test.go b/checker/checker_test.go index 3f5362a78..24ac88ec5 100644 --- a/checker/checker_test.go +++ b/checker/checker_test.go @@ -140,3 +140,21 @@ func TestUnreferencedBlobs(t *testing.T) { Equals(t, unusedBlobsBySnapshot, blobs) }) } + +var checkerDuplicateIndexTestData = filepath.Join("testdata", "duplicate-packs-in-index-test-repo.tar.gz") + +func TestDuplicatePacksInIndex(t *testing.T) { + WithTestEnvironment(t, checkerDuplicateIndexTestData, func(repodir string) { + repo := OpenLocalRepo(t, repodir) + + chkr := checker.New(repo) + err := chkr.LoadIndex() + if err == nil { + t.Fatalf("did not get expected checker error for duplicate packs in indexes") + } + + if _, ok := err.(checker.ErrDuplicatePacks); !ok { + t.Fatalf("did not get ErrDuplicatePacks, got %v instead", err) + } + }) +} diff --git a/checker/testdata/duplicate-packs-in-index-test-repo.tar.gz b/checker/testdata/duplicate-packs-in-index-test-repo.tar.gz new file mode 100644 index 000000000..f0e194d8d Binary files /dev/null and b/checker/testdata/duplicate-packs-in-index-test-repo.tar.gz differ