From f2a1b125cb6859161b20957b6ec083ff0229424d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 7 Nov 2020 01:12:07 +0100 Subject: [PATCH] restic: Actually parallelize FindUsedBlobs --- internal/restic/find.go | 57 +++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/internal/restic/find.go b/internal/restic/find.go index b5bef0720..b797cac6b 100644 --- a/internal/restic/find.go +++ b/internal/restic/find.go @@ -1,6 +1,11 @@ package restic -import "context" +import ( + "context" + "sync" + + "golang.org/x/sync/errgroup" +) // TreeLoader loads a tree from a repository. type TreeLoader interface { @@ -10,30 +15,38 @@ type TreeLoader interface { // FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data // blobs) to the set blobs. Already seen tree blobs will not be visited again. func FindUsedBlobs(ctx context.Context, repo TreeLoader, treeID ID, blobs BlobSet) error { - h := BlobHandle{ID: treeID, Type: TreeBlob} - if blobs.Has(h) { - return nil - } - blobs.Insert(h) + var lock sync.Mutex - tree, err := repo.LoadTree(ctx, treeID) - if err != nil { - return err - } + wg, ctx := errgroup.WithContext(ctx) + treeStream := StreamTrees(ctx, wg, repo, IDs{treeID}, func(treeID ID) bool { + // locking is necessary the goroutine below concurrently adds data blobs + lock.Lock() + h := BlobHandle{ID: treeID, Type: TreeBlob} + blobReferenced := blobs.Has(h) + // noop if already referenced + blobs.Insert(h) + lock.Unlock() + return blobReferenced + }) - for _, node := range tree.Nodes { - switch node.Type { - case "file": - for _, blob := range node.Content { - blobs.Insert(BlobHandle{ID: blob, Type: DataBlob}) + wg.Go(func() error { + for tree := range treeStream { + if tree.Error != nil { + return tree.Error } - case "dir": - err := FindUsedBlobs(ctx, repo, *node.Subtree, blobs) - if err != nil { - return err + + lock.Lock() + for _, node := range tree.Nodes { + switch node.Type { + case "file": + for _, blob := range node.Content { + blobs.Insert(BlobHandle{ID: blob, Type: DataBlob}) + } + } } + lock.Unlock() } - } - - return nil + return nil + }) + return wg.Wait() }