diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 2e2e233d6..185fb52ce 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -190,6 +190,8 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { } } + c.masterIndex.MergeFinalIndexes() + err = c.repo.SetIndex(c.masterIndex) if err != nil { debug.Log("SetIndex returned error: %v", err) diff --git a/internal/repository/index.go b/internal/repository/index.go index 560696ba5..e6053fa72 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -49,8 +49,8 @@ type Index struct { // only used by Store, StorePacks does not check for already saved packIDs packIDToIndex map[restic.ID]int - final bool // set to true for all indexes read from the backend ("finalized") - id restic.ID // set to the ID of the index when it's finalized + final bool // set to true for all indexes read from the backend ("finalized") + ids restic.IDs // set to the IDs of the contained finalized indexes supersedes restic.IDs created time.Time } @@ -393,17 +393,17 @@ func (idx *Index) Finalize() { idx.packIDToIndex = nil } -// ID returns the ID of the index, if available. If the index is not yet +// ID returns the IDs of the index, if available. If the index is not yet // finalized, an error is returned. -func (idx *Index) ID() (restic.ID, error) { +func (idx *Index) IDs() (restic.IDs, error) { idx.m.Lock() defer idx.m.Unlock() if !idx.final { - return restic.ID{}, errors.New("index not finalized") + return nil, errors.New("index not finalized") } - return idx.id, nil + return idx.ids, nil } // SetID sets the ID the index has been written to. This requires that @@ -416,12 +416,12 @@ func (idx *Index) SetID(id restic.ID) error { return errors.New("index is not final") } - if !idx.id.IsNull() { + if len(idx.ids) > 0 { return errors.New("ID already set") } debug.Log("ID set to %v", id) - idx.id = id + idx.ids = append(idx.ids, id) return nil } @@ -462,6 +462,38 @@ func (idx *Index) TreePacks() restic.IDs { return idx.treePacks } +// merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx. +// idx2 is not changed by this method. +func (idx *Index) merge(idx2 *Index) error { + idx.m.Lock() + defer idx.m.Unlock() + idx2.m.Lock() + defer idx2.m.Unlock() + + if !idx2.final { + return errors.New("index to merge is not final!") + } + + packlen := len(idx.packs) + // copy all index entries of idx2 to idx + for typ := range idx2.byType { + m2 := &idx2.byType[typ] + m := &idx.byType[typ] + m2.foreach(func(entry *indexEntry) bool { + // packIndex is changed as idx2.pack is appended to idx.pack, see below + m.add(entry.id, entry.packIndex+packlen, entry.offset, entry.length) + return true + }) + } + + idx.packs = append(idx.packs, idx2.packs...) + idx.treePacks = append(idx.treePacks, idx2.treePacks...) + idx.ids = append(idx.ids, idx2.ids...) + idx.supersedes = append(idx.supersedes, idx2.supersedes...) + + return nil +} + // isErrOldIndex returns true if the error may be caused by an old index // format. func isErrOldIndex(err error) bool { @@ -581,7 +613,7 @@ func LoadIndexWithDecoder(ctx context.Context, repo restic.Repository, buf []byt return nil, buf[:0], err } - idx.id = id + idx.ids = append(idx.ids, id) return idx, buf, nil } diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index 513f4c5d1..7b95a13c2 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -135,10 +135,9 @@ func TestIndexSerialize(t *testing.T) { id := restic.NewRandomID() rtest.OK(t, idx.SetID(id)) - id2, err := idx.ID() + ids, err := idx.IDs() rtest.OK(t, err) - rtest.Assert(t, id2.Equal(id), - "wrong ID returned: want %v, got %v", id, id2) + rtest.Equals(t, restic.IDs{id}, ids) idx3, err := repository.DecodeIndex(wr3.Bytes()) rtest.OK(t, err) @@ -336,7 +335,7 @@ var ( ) func initBenchmarkIndexJSON() { - idx, _ := createRandomIndex(rand.New(rand.NewSource(0))) + idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000) var buf bytes.Buffer idx.Encode(&buf) benchmarkIndexJSON = buf.Bytes() @@ -418,11 +417,11 @@ func NewRandomTestID(rng *rand.Rand) restic.ID { return id } -func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.ID) { +func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lookupID restic.ID) { idx = repository.NewIndex() - // create index with 200k pack files - for i := 0; i < 200000; i++ { + // create index with given number of pack files + for i := 0; i < packfiles; i++ { packID := NewRandomTestID(rng) var blobs []restic.Blob offset := 0 @@ -449,7 +448,7 @@ func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.I } func BenchmarkIndexHasUnknown(b *testing.B) { - idx, _ := createRandomIndex(rand.New(rand.NewSource(0))) + idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000) lookupID := restic.NewRandomID() b.ResetTimer() @@ -460,7 +459,7 @@ func BenchmarkIndexHasUnknown(b *testing.B) { } func BenchmarkIndexHasKnown(b *testing.B) { - idx, lookupID := createRandomIndex(rand.New(rand.NewSource(0))) + idx, lookupID := createRandomIndex(rand.New(rand.NewSource(0)), 200000) b.ResetTimer() @@ -474,7 +473,7 @@ func BenchmarkIndexAlloc(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { - createRandomIndex(rng) + createRandomIndex(rng, 200000) } } @@ -484,7 +483,7 @@ func BenchmarkIndexAllocParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { rng := rand.New(rand.NewSource(0)) for pb.Next() { - createRandomIndex(rng) + createRandomIndex(rng, 200000) } }) } diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 24762ffb2..2705b5994 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -18,7 +18,12 @@ type MasterIndex struct { // NewMasterIndex creates a new master index. func NewMasterIndex() *MasterIndex { - return &MasterIndex{pendingBlobs: restic.NewBlobSet()} + // Always add an empty final index, such that MergeFinalIndexes can merge into this. + // Note that removing this index could lead to a race condition in the rare + // sitation that only two indexes exist which are saved and merged concurrently. + idx := []*Index{NewIndex()} + idx[0].Finalize() + return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()} } // Lookup queries all known Indexes for the ID and returns the first match. @@ -237,6 +242,31 @@ func (mi *MasterIndex) Each(ctx context.Context) <-chan restic.PackedBlob { return ch } +// MergeFinalIndexes merges all final indexes together. +// After calling, there will be only one big final index in MasterIndex +// containing all final index contents. +// Indexes that are not final are left untouched. +// This merging can only be called after all index files are loaded - as +// removing of superseded index contents is only possible for unmerged indexes. +func (mi *MasterIndex) MergeFinalIndexes() { + mi.idxMutex.Lock() + defer mi.idxMutex.Unlock() + + // The first index is always final and the one to merge into + newIdx := mi.idx[:1] + for i := 1; i < len(mi.idx); i++ { + idx := mi.idx[i] + // clear reference in masterindex as it may become stale + mi.idx[i] = nil + if !idx.Final() { + newIdx = append(newIdx, idx) + } else { + mi.idx[0].merge(idx) + } + } + mi.idx = newIdx +} + // RebuildIndex combines all known indexes to a new index, leaving out any // packs whose ID is contained in packBlacklist. The new index contains the IDs // of all known indexes in the "supersedes" field. @@ -267,15 +297,15 @@ func (mi *MasterIndex) RebuildIndex(packBlacklist restic.IDSet) (*Index, error) continue } - id, err := idx.ID() + ids, err := idx.IDs() if err != nil { debug.Log("index %d does not have an ID: %v", err) return nil, err } - debug.Log("adding index id %v to supersedes field", id) + debug.Log("adding index ids %v to supersedes field", ids) - err = newIndex.AddToSupersedes(id) + err = newIndex.AddToSupersedes(ids...) if err != nil { return nil, err } diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index 3b858253a..e16d55bd5 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -57,11 +57,75 @@ func TestMasterIndexLookup(t *testing.T) { rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") } -func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) { - idx1, lookupID := createRandomIndex(rand.New(rand.NewSource(0))) +func TestMasterMergeFinalIndexes(t *testing.T) { + idInIdx1 := restic.NewRandomID() + idInIdx2 := restic.NewRandomID() + + blob1 := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.DataBlob, + ID: idInIdx1, + Length: 10, + Offset: 0, + }, + } + + blob2 := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.DataBlob, + ID: idInIdx2, + Length: 100, + Offset: 10, + }, + } + + idx1 := repository.NewIndex() + idx1.Store(blob1) + + idx2 := repository.NewIndex() + idx2.Store(blob2) mIdx := repository.NewMasterIndex() mIdx.Insert(idx1) + mIdx.Insert(idx2) + + finalIndexes := mIdx.FinalizeNotFinalIndexes() + rtest.Equals(t, []*repository.Index{idx1, idx2}, finalIndexes) + + mIdx.MergeFinalIndexes() + + blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1) + rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) + + blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2) + rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) + + blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) + rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id") + rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") +} + +func createRandomMasterIndex(rng *rand.Rand, num, size int) (*repository.MasterIndex, restic.ID) { + mIdx := repository.NewMasterIndex() + for i := 0; i < num-1; i++ { + idx, _ := createRandomIndex(rng, size) + mIdx.Insert(idx) + } + idx1, lookupID := createRandomIndex(rng, size) + mIdx.Insert(idx1) + + mIdx.FinalizeNotFinalIndexes() + mIdx.MergeFinalIndexes() + + return mIdx, lookupID +} + +func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) { + mIdx, lookupID := createRandomMasterIndex(rand.New(rand.NewSource(0)), 1, 200000) b.ResetTimer() @@ -71,16 +135,7 @@ func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) { } func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) { - rng := rand.New(rand.NewSource(0)) - mIdx := repository.NewMasterIndex() - - for i := 0; i < 5; i++ { - idx, _ := createRandomIndex(rng) - mIdx.Insert(idx) - } - - idx1, lookupID := createRandomIndex(rng) - mIdx.Insert(idx1) + mIdx, lookupID := createRandomMasterIndex(rand.New(rand.NewSource(0)), 100, 10000) b.ResetTimer() @@ -90,11 +145,9 @@ func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) { } func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) { - lookupID := restic.NewRandomID() - idx1, _ := createRandomIndex(rand.New(rand.NewSource(0))) - mIdx := repository.NewMasterIndex() - mIdx.Insert(idx1) + lookupID := restic.NewRandomID() + mIdx, _ := createRandomMasterIndex(rand.New(rand.NewSource(0)), 1, 200000) b.ResetTimer() @@ -104,14 +157,8 @@ func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) { } func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) { - rng := rand.New(rand.NewSource(0)) lookupID := restic.NewRandomID() - mIdx := repository.NewMasterIndex() - - for i := 0; i < 6; i++ { - idx, _ := createRandomIndex(rng) - mIdx.Insert(idx) - } + mIdx, _ := createRandomMasterIndex(rand.New(rand.NewSource(0)), 100, 10000) b.ResetTimer() @@ -123,16 +170,12 @@ func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) { func BenchmarkMasterIndexLookupParallel(b *testing.B) { mIdx := repository.NewMasterIndex() - for _, numindices := range []int{5, 10, 20} { + for _, numindices := range []int{25, 50, 100} { var lookupID restic.ID b.StopTimer() rng := rand.New(rand.NewSource(0)) - for i := 0; i < numindices; i++ { - var idx *repository.Index - idx, lookupID = createRandomIndex(rng) - mIdx.Insert(idx) - } + mIdx, lookupID = createRandomMasterIndex(rng, numindices, 10000) b.StartTimer() name := fmt.Sprintf("known,indices=%d", numindices) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 64781f9e4..0d5242022 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -361,12 +361,14 @@ func (r *Repository) SetIndex(i restic.Index) error { ids := restic.NewIDSet() for _, idx := range r.idx.All() { - id, err := idx.ID() + indexIDs, err := idx.IDs() if err != nil { debug.Log("not using index, ID() returned error %v", err) continue } - ids.Insert(id) + for _, id := range indexIDs { + ids.Insert(id) + } } return r.PrepareCache(ids) @@ -396,6 +398,7 @@ func (r *Repository) saveIndex(ctx context.Context, indexes ...*Index) error { debug.Log("Saved index %d as %v", i, sid) } + r.idx.MergeFinalIndexes() return nil } @@ -479,12 +482,16 @@ func (r *Repository) LoadIndex(ctx context.Context) error { validIndex := restic.NewIDSet() wg.Go(func() error { for idx := range indexCh { - id, err := idx.ID() + ids, err := idx.IDs() if err == nil { - validIndex.Insert(id) + for _, id := range ids { + validIndex.Insert(id) + } } + r.idx.Insert(idx) } + r.idx.MergeFinalIndexes() return nil })