From 184103647ad5e8ce2cf87d1efa5985cf4d4b2560 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 1 Feb 2020 21:09:52 +0100 Subject: [PATCH] FindUsedBlobs: merge seen into blobs BlobSet The seen BlobSet always contained a subset of the entries in blobs. Thus use blobs instead and avoid the memory overhead of the second set. Suggested-by: Alexander Weiss --- cmd/restic/cmd_prune.go | 3 +-- cmd/restic/cmd_stats.go | 9 ++++----- internal/restic/find.go | 11 ++++------- internal/restic/find_test.go | 5 ++--- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 6a3af8332..7d374b1b8 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -189,14 +189,13 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { Verbosef("find data that is still in use for %d snapshots\n", stats.snapshots) usedBlobs := restic.NewBlobSet() - seenBlobs := restic.NewBlobSet() bar = newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots") bar.Start() for _, sn := range snapshots { debug.Log("process snapshot %v", sn.ID()) - err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs, seenBlobs) + err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs) if err != nil { if repo.Backend().IsNotExist(err) { return errors.Fatal("unable to load a tree from the repo: " + err.Error()) diff --git a/cmd/restic/cmd_stats.go b/cmd/restic/cmd_stats.go index 7228bf6b0..a779447b4 100644 --- a/cmd/restic/cmd_stats.go +++ b/cmd/restic/cmd_stats.go @@ -93,7 +93,6 @@ func runStats(gopts GlobalOptions, args []string) error { uniqueInodes: make(map[uint64]struct{}), fileBlobs: make(map[string]restic.IDSet), blobs: restic.NewBlobSet(), - blobsSeen: restic.NewBlobSet(), } if snapshotIDString != "" { @@ -183,7 +182,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest if countMode == countModeRawData { // count just the sizes of unique blobs; we don't need to walk the tree // ourselves in this case, since a nifty function does it for us - return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs, stats.blobsSeen) + return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs) } err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, stats)) @@ -318,9 +317,9 @@ type statsContainer struct { // blobs that have been seen as a part of the file fileBlobs map[string]restic.IDSet - // blobs and blobsSeen are used to count individual - // unique blobs, independent of references to files - blobs, blobsSeen restic.BlobSet + // blobs is used to count individual unique blobs, + // independent of references to files + blobs restic.BlobSet } // fileID is a 256-bit hash that distinguishes unique files. diff --git a/internal/restic/find.go b/internal/restic/find.go index 4b118abb0..09654f938 100644 --- a/internal/restic/find.go +++ b/internal/restic/find.go @@ -3,9 +3,8 @@ package restic import "context" // FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data -// blobs) to the set blobs. The tree blobs in the `seen` BlobSet will not be visited -// again. -func FindUsedBlobs(ctx context.Context, repo Repository, treeID ID, blobs BlobSet, seen BlobSet) error { +// blobs) to the set blobs. Already seen tree blobs will not be visited again. +func FindUsedBlobs(ctx context.Context, repo Repository, treeID ID, blobs BlobSet) error { blobs.Insert(BlobHandle{ID: treeID, Type: TreeBlob}) tree, err := repo.LoadTree(ctx, treeID) @@ -22,13 +21,11 @@ func FindUsedBlobs(ctx context.Context, repo Repository, treeID ID, blobs BlobSe case "dir": subtreeID := *node.Subtree h := BlobHandle{ID: subtreeID, Type: TreeBlob} - if seen.Has(h) { + if blobs.Has(h) { continue } - seen.Insert(h) - - err := FindUsedBlobs(ctx, repo, subtreeID, blobs, seen) + err := FindUsedBlobs(ctx, repo, subtreeID, blobs) if err != nil { return err } diff --git a/internal/restic/find_test.go b/internal/restic/find_test.go index d3620b472..7e52dd681 100644 --- a/internal/restic/find_test.go +++ b/internal/restic/find_test.go @@ -93,7 +93,7 @@ func TestFindUsedBlobs(t *testing.T) { for i, sn := range snapshots { usedBlobs := restic.NewBlobSet() - err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, usedBlobs, restic.NewBlobSet()) + err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, usedBlobs) if err != nil { t.Errorf("FindUsedBlobs returned error: %v", err) continue @@ -127,9 +127,8 @@ func BenchmarkFindUsedBlobs(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - seen := restic.NewBlobSet() blobs := restic.NewBlobSet() - err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, blobs, seen) + err := restic.FindUsedBlobs(context.TODO(), repo, *sn.Tree, blobs) if err != nil { b.Error(err) }