From d40f566e419fed34d108338e263af3221ec78d57 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 15 Jan 2017 15:45:52 +0100 Subject: [PATCH] Index: Use slices instead of maps, reduce data --- src/cmds/restic/cmd_prune.go | 21 +++++---- src/restic/index/index.go | 80 ++++++++-------------------------- src/restic/index/index_test.go | 6 +-- 3 files changed, 33 insertions(+), 74 deletions(-) diff --git a/src/cmds/restic/cmd_prune.go b/src/cmds/restic/cmd_prune.go index 98c32f784..00f74e559 100644 --- a/src/cmds/restic/cmd_prune.go +++ b/src/cmds/restic/cmd_prune.go @@ -103,11 +103,13 @@ func runPrune(gopts GlobalOptions) error { return err } + blobs := 0 for _, pack := range idx.Packs { stats.bytes += pack.Size + blobs += len(pack.Entries) } Verbosef("repository contains %v packs (%v blobs) with %v bytes\n", - len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes))) + len(idx.Packs), blobs, formatBytes(uint64(stats.bytes))) blobCount := make(map[restic.BlobHandle]int) duplicateBlobs := 0 @@ -164,14 +166,17 @@ func runPrune(gopts GlobalOptions) error { // find packs that need a rewrite rewritePacks := restic.NewIDSet() - for h, blob := range idx.Blobs { - if !usedBlobs.Has(h) { - rewritePacks.Merge(blob.Packs) - continue - } + for _, pack := range idx.Packs { + for _, blob := range pack.Entries { + h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} + if !usedBlobs.Has(h) { + rewritePacks.Insert(pack.ID) + continue + } - if blobCount[h] > 1 { - rewritePacks.Merge(blob.Packs) + if blobCount[h] > 1 { + rewritePacks.Insert(pack.ID) + } } } diff --git a/src/restic/index/index.go b/src/restic/index/index.go index 4481d0d5d..7d8b72836 100644 --- a/src/restic/index/index.go +++ b/src/restic/index/index.go @@ -14,27 +14,20 @@ import ( // Pack contains information about the contents of a pack. type Pack struct { + ID restic.ID Size int64 Entries []restic.Blob } -// Blob contains information about a blob. -type Blob struct { - Size int64 - Packs restic.IDSet -} - // Index contains information about blobs and packs stored in a repo. type Index struct { Packs map[restic.ID]Pack - Blobs map[restic.BlobHandle]Blob IndexIDs restic.IDSet } func newIndex() *Index { return &Index{ Packs: make(map[restic.ID]Pack), - Blobs: make(map[restic.BlobHandle]Blob), IndexIDs: restic.NewIDSet(), } } @@ -70,7 +63,7 @@ func New(repo restic.Repository, p *restic.Progress) (*Index, error) { return nil, err } - p := Pack{Entries: j.Entries(), Size: j.Size()} + p := Pack{ID: packID, Entries: j.Entries(), Size: j.Size()} idx.Packs[packID] = p } @@ -181,18 +174,6 @@ func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error idx.Packs[id] = Pack{Size: size, Entries: entries} - for _, entry := range entries { - h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} - if _, ok := idx.Blobs[h]; !ok { - idx.Blobs[h] = Blob{ - Size: int64(entry.Length), - Packs: restic.NewIDSet(), - } - } - - idx.Blobs[h].Packs.Insert(id) - } - return nil } @@ -202,15 +183,6 @@ func (idx *Index) RemovePack(id restic.ID) error { return errors.Errorf("pack %v not found in the index", id.Str()) } - for _, blob := range idx.Packs[id].Entries { - h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} - idx.Blobs[h].Packs.Delete(id) - - if len(idx.Blobs[h].Packs) == 0 { - delete(idx.Blobs, h) - } - } - delete(idx.Packs, id) return nil @@ -239,14 +211,11 @@ func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) { func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { packs = restic.NewIDSet() - for h := range blobs { - blob, ok := idx.Blobs[h] - if !ok { - continue - } - - for id := range blob.Packs { - packs.Insert(id) + for id, p := range idx.Packs { + for _, entry := range p.Entries { + if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) { + packs.Insert(id) + } } } @@ -264,31 +233,20 @@ type Location struct { var ErrBlobNotFound = errors.New("blob not found in index") // FindBlob returns a list of packs and positions the blob can be found in. -func (idx *Index) FindBlob(h restic.BlobHandle) ([]Location, error) { - blob, ok := idx.Blobs[h] - if !ok { - return nil, ErrBlobNotFound +func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) { + for id, p := range idx.Packs { + for _, entry := range p.Entries { + if entry.ID.Equal(h.ID) && entry.Type == h.Type { + result = append(result, Location{ + PackID: id, + Blob: entry, + }) + } + } } - result := make([]Location, 0, len(blob.Packs)) - for packID := range blob.Packs { - pack, ok := idx.Packs[packID] - if !ok { - return nil, errors.Errorf("pack %v not found in index", packID.Str()) - } - - for _, entry := range pack.Entries { - if entry.Type != h.Type { - continue - } - - if !entry.ID.Equal(h.ID) { - continue - } - - loc := Location{PackID: packID, Blob: entry} - result = append(result, loc) - } + if len(result) == 0 { + return nil, ErrBlobNotFound } return result, nil diff --git a/src/restic/index/index_test.go b/src/restic/index/index_test.go index 7905f7368..a7de094d4 100644 --- a/src/restic/index/index_test.go +++ b/src/restic/index/index_test.go @@ -151,7 +151,7 @@ func TestIndexDuplicateBlobs(t *testing.T) { if len(dups) == 0 { t.Errorf("no duplicate blobs found") } - t.Logf("%d packs, %d unique blobs", len(idx.Packs), len(idx.Blobs)) + t.Logf("%d packs, %d duplicate blobs", len(idx.Packs), len(dups)) packs := idx.PacksForBlobs(dups) if len(packs) == 0 { @@ -249,10 +249,6 @@ func TestIndexAddRemovePack(t *testing.T) { if err == nil { t.Errorf("removed blob %v found in index", h) } - - if _, ok := idx.Blobs[h]; ok { - t.Errorf("removed blob %v found in index.Blobs", h) - } } }