restic: Actually parallelize FindUsedBlobs

This commit is contained in:
Michael Eischer 2020-11-07 01:12:07 +01:00 committed by Alexander Neumann
parent 6e03f80ca2
commit f2a1b125cb
1 changed files with 35 additions and 22 deletions

View File

@ -1,6 +1,11 @@
package restic package restic
import "context" import (
"context"
"sync"
"golang.org/x/sync/errgroup"
)
// TreeLoader loads a tree from a repository. // TreeLoader loads a tree from a repository.
type TreeLoader interface { type TreeLoader interface {
@ -10,30 +15,38 @@ type TreeLoader interface {
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data // FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
// blobs) to the set blobs. Already seen tree blobs will not be visited again. // blobs) to the set blobs. Already seen tree blobs will not be visited again.
func FindUsedBlobs(ctx context.Context, repo TreeLoader, treeID ID, blobs BlobSet) error { func FindUsedBlobs(ctx context.Context, repo TreeLoader, treeID ID, blobs BlobSet) error {
var lock sync.Mutex
wg, ctx := errgroup.WithContext(ctx)
treeStream := StreamTrees(ctx, wg, repo, IDs{treeID}, func(treeID ID) bool {
// locking is necessary the goroutine below concurrently adds data blobs
lock.Lock()
h := BlobHandle{ID: treeID, Type: TreeBlob} h := BlobHandle{ID: treeID, Type: TreeBlob}
if blobs.Has(h) { blobReferenced := blobs.Has(h)
return nil // noop if already referenced
}
blobs.Insert(h) blobs.Insert(h)
lock.Unlock()
return blobReferenced
})
tree, err := repo.LoadTree(ctx, treeID) wg.Go(func() error {
if err != nil { for tree := range treeStream {
return err if tree.Error != nil {
return tree.Error
} }
lock.Lock()
for _, node := range tree.Nodes { for _, node := range tree.Nodes {
switch node.Type { switch node.Type {
case "file": case "file":
for _, blob := range node.Content { for _, blob := range node.Content {
blobs.Insert(BlobHandle{ID: blob, Type: DataBlob}) blobs.Insert(BlobHandle{ID: blob, Type: DataBlob})
} }
case "dir":
err := FindUsedBlobs(ctx, repo, *node.Subtree, blobs)
if err != nil {
return err
} }
} }
lock.Unlock()
} }
return nil return nil
})
return wg.Wait()
} }