From 0b39940fdb8b39a138a3d58764efa80f10c94838 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 6 Jan 2024 13:59:47 +0100 Subject: [PATCH 1/2] walker: Remove ignoreTrees functionality It was only used in two places: - stats: apparently as a minor performance optimization, which is unlikely to be important - find: filtered directories would be ignored. However, this optimization missed that it is possible that two directories have the exact same content. Such directories would be incorrectly ignored too. Example: ``` mkdir test test/a test/b restic backup test restic find latest test/b -> incorrectly does not return anything ``` Thus, remove the functionality as it's apparently too complex to use correctly. --- changelog/unreleased/pull-4615 | 6 ++ cmd/restic/cmd_find.go | 56 +++++----- cmd/restic/cmd_ls.go | 14 +-- cmd/restic/cmd_stats.go | 14 ++- internal/dump/common.go | 12 +-- internal/walker/walker.go | 74 +++----------- internal/walker/walker_test.go | 182 +++------------------------------ 7 files changed, 77 insertions(+), 281 deletions(-) create mode 100644 changelog/unreleased/pull-4615 diff --git a/changelog/unreleased/pull-4615 b/changelog/unreleased/pull-4615 new file mode 100644 index 000000000..7e2d4a017 --- /dev/null +++ b/changelog/unreleased/pull-4615 @@ -0,0 +1,6 @@ +Bugfix: `find` ignored directories in some cases + +In some cases, the `find` command ignored empty or moved directories. This has +been fixed. + +https://github.com/restic/restic/pull/4615 diff --git a/cmd/restic/cmd_find.go b/cmd/restic/cmd_find.go index c30650823..33fff864f 100644 --- a/cmd/restic/cmd_find.go +++ b/cmd/restic/cmd_find.go @@ -244,13 +244,12 @@ func (s *statefulOutput) Finish() { // Finder bundles information needed to find a file or directory. type Finder struct { - repo restic.Repository - pat findPattern - out statefulOutput - ignoreTrees restic.IDSet - blobIDs map[string]struct{} - treeIDs map[string]struct{} - itemsFound int + repo restic.Repository + pat findPattern + out statefulOutput + blobIDs map[string]struct{} + treeIDs map[string]struct{} + itemsFound int } func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error { @@ -261,17 +260,17 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error } f.out.newsn = sn - return walker.Walk(ctx, f.repo, *sn.Tree, f.ignoreTrees, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) (bool, error) { + return walker.Walk(ctx, f.repo, *sn.Tree, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error { if err != nil { debug.Log("Error loading tree %v: %v", parentTreeID, err) Printf("Unable to load tree %s\n ... which belongs to snapshot %s\n", parentTreeID, sn.ID()) - return false, walker.ErrSkipNode + return walker.ErrSkipNode } if node == nil { - return false, nil + return nil } normalizedNodepath := nodepath @@ -284,7 +283,7 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error for _, pat := range f.pat.pattern { found, err := filter.Match(pat, normalizedNodepath) if err != nil { - return false, err + return err } if found { foundMatch = true @@ -292,16 +291,13 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error } } - var ( - ignoreIfNoMatch = true - errIfNoMatch error - ) + var errIfNoMatch error if node.Type == "dir" { var childMayMatch bool for _, pat := range f.pat.pattern { mayMatch, err := filter.ChildMatch(pat, normalizedNodepath) if err != nil { - return false, err + return err } if mayMatch { childMayMatch = true @@ -310,30 +306,27 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error } if !childMayMatch { - ignoreIfNoMatch = true errIfNoMatch = walker.ErrSkipNode - } else { - ignoreIfNoMatch = false } } if !foundMatch { - return ignoreIfNoMatch, errIfNoMatch + return errIfNoMatch } if !f.pat.oldest.IsZero() && node.ModTime.Before(f.pat.oldest) { debug.Log(" ModTime is older than %s\n", f.pat.oldest) - return ignoreIfNoMatch, errIfNoMatch + return errIfNoMatch } if !f.pat.newest.IsZero() && node.ModTime.After(f.pat.newest) { debug.Log(" ModTime is newer than %s\n", f.pat.newest) - return ignoreIfNoMatch, errIfNoMatch + return errIfNoMatch } debug.Log(" found match\n") f.out.PrintPattern(nodepath, node) - return false, nil + return nil }) } @@ -345,17 +338,17 @@ func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error { } f.out.newsn = sn - return walker.Walk(ctx, f.repo, *sn.Tree, f.ignoreTrees, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) (bool, error) { + return walker.Walk(ctx, f.repo, *sn.Tree, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error { if err != nil { debug.Log("Error loading tree %v: %v", parentTreeID, err) Printf("Unable to load tree %s\n ... which belongs to snapshot %s\n", parentTreeID, sn.ID()) - return false, walker.ErrSkipNode + return walker.ErrSkipNode } if node == nil { - return false, nil + return nil } if node.Type == "dir" && f.treeIDs != nil { @@ -373,7 +366,7 @@ func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error { // looking for blobs) if f.itemsFound >= len(f.treeIDs) && f.blobIDs == nil { // Return an error to terminate the Walk - return true, errors.New("OK") + return errors.New("OK") } } } @@ -394,7 +387,7 @@ func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error { } } - return false, nil + return nil }) } @@ -593,10 +586,9 @@ func runFind(ctx context.Context, opts FindOptions, gopts GlobalOptions, args [] } f := &Finder{ - repo: repo, - pat: pat, - out: statefulOutput{ListLong: opts.ListLong, HumanReadable: opts.HumanReadable, JSON: gopts.JSON}, - ignoreTrees: restic.NewIDSet(), + repo: repo, + pat: pat, + out: statefulOutput{ListLong: opts.ListLong, HumanReadable: opts.HumanReadable, JSON: gopts.JSON}, } if opts.BlobID { diff --git a/cmd/restic/cmd_ls.go b/cmd/restic/cmd_ls.go index 83a03559d..07c49d60f 100644 --- a/cmd/restic/cmd_ls.go +++ b/cmd/restic/cmd_ls.go @@ -230,12 +230,12 @@ func runLs(ctx context.Context, opts LsOptions, gopts GlobalOptions, args []stri printSnapshot(sn) - err = walker.Walk(ctx, repo, *sn.Tree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) { + err = walker.Walk(ctx, repo, *sn.Tree, func(_ restic.ID, nodepath string, node *restic.Node, err error) error { if err != nil { - return false, err + return err } if node == nil { - return false, nil + return nil } if withinDir(nodepath) { @@ -245,22 +245,22 @@ func runLs(ctx context.Context, opts LsOptions, gopts GlobalOptions, args []stri // if recursive listing is requested, signal the walker that it // should continue walking recursively if opts.Recursive { - return false, nil + return nil } } // if there's an upcoming match deeper in the tree (but we're not // there yet), signal the walker to descend into any subdirs if approachingMatchingTree(nodepath) { - return false, nil + return nil } // otherwise, signal the walker to not walk recursively into any // subdirs if node.Type == "dir" { - return false, walker.ErrSkipNode + return walker.ErrSkipNode } - return false, nil + return nil }) if err != nil { diff --git a/cmd/restic/cmd_stats.go b/cmd/restic/cmd_stats.go index a3e0cefc7..b0837510d 100644 --- a/cmd/restic/cmd_stats.go +++ b/cmd/restic/cmd_stats.go @@ -203,7 +203,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest } hardLinkIndex := restorer.NewHardlinkIndex[struct{}]() - err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, opts, stats, hardLinkIndex)) + err := walker.Walk(ctx, repo, *snapshot.Tree, statsWalkTree(repo, opts, stats, hardLinkIndex)) if err != nil { return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err) } @@ -212,12 +212,12 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest } func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContainer, hardLinkIndex *restorer.HardlinkIndex[struct{}]) walker.WalkFunc { - return func(parentTreeID restic.ID, npath string, node *restic.Node, nodeErr error) (bool, error) { + return func(parentTreeID restic.ID, npath string, node *restic.Node, nodeErr error) error { if nodeErr != nil { - return true, nodeErr + return nodeErr } if node == nil { - return true, nil + return nil } if opts.countMode == countModeUniqueFilesByContents || opts.countMode == countModeBlobsPerFile { @@ -247,7 +247,7 @@ func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContai // is always a data blob since we're accessing it via a file's Content array blobSize, found := repo.LookupBlobSize(blobID, restic.DataBlob) if !found { - return true, fmt.Errorf("blob %s not found for tree %s", blobID, parentTreeID) + return fmt.Errorf("blob %s not found for tree %s", blobID, parentTreeID) } // count the blob's size, then add this blob by this @@ -274,11 +274,9 @@ func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContai hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{}) stats.TotalSize += node.Size } - - return false, nil } - return true, nil + return nil } } diff --git a/internal/dump/common.go b/internal/dump/common.go index c3ba69431..3ca1ced82 100644 --- a/internal/dump/common.go +++ b/internal/dump/common.go @@ -70,27 +70,27 @@ func sendNodes(ctx context.Context, repo restic.Repository, root *restic.Node, c return nil } - err := walker.Walk(ctx, repo, *root.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) { + err := walker.Walk(ctx, repo, *root.Subtree, func(_ restic.ID, nodepath string, node *restic.Node, err error) error { if err != nil { - return false, err + return err } if node == nil { - return false, nil + return nil } node.Path = path.Join(root.Path, nodepath) if !IsFile(node) && !IsDir(node) && !IsLink(node) { - return false, nil + return nil } select { case ch <- node: case <-ctx.Done(): - return false, ctx.Err() + return ctx.Err() } - return false, nil + return nil }) return err diff --git a/internal/walker/walker.go b/internal/walker/walker.go index 4c4e7f5ab..aba2e39e5 100644 --- a/internal/walker/walker.go +++ b/internal/walker/walker.go @@ -21,21 +21,14 @@ var ErrSkipNode = errors.New("skip this node") // When the special value ErrSkipNode is returned and node is a dir node, it is // not walked. When the node is not a dir node, the remaining items in this // tree are skipped. -// -// Setting ignore to true tells Walk that it should not visit the node again. -// For tree nodes, this means that the function is not called for the -// referenced tree. If the node is not a tree, and all nodes in the current -// tree have ignore set to true, the current tree will not be visited again. -// When err is not nil and different from ErrSkipNode, the value returned for -// ignore is ignored. -type WalkFunc func(parentTreeID restic.ID, path string, node *restic.Node, nodeErr error) (ignore bool, err error) +type WalkFunc func(parentTreeID restic.ID, path string, node *restic.Node, nodeErr error) (err error) // Walk calls walkFn recursively for each node in root. If walkFn returns an // error, it is passed up the call stack. The trees in ignoreTrees are not // walked. If walkFn ignores trees, these are added to the set. -func Walk(ctx context.Context, repo restic.BlobLoader, root restic.ID, ignoreTrees restic.IDSet, walkFn WalkFunc) error { +func Walk(ctx context.Context, repo restic.BlobLoader, root restic.ID, walkFn WalkFunc) error { tree, err := restic.LoadTree(ctx, repo, root) - _, err = walkFn(root, "/", nil, err) + err = walkFn(root, "/", nil, err) if err != nil { if err == ErrSkipNode { @@ -44,24 +37,13 @@ func Walk(ctx context.Context, repo restic.BlobLoader, root restic.ID, ignoreTre return err } - if ignoreTrees == nil { - ignoreTrees = restic.NewIDSet() - } - - _, err = walk(ctx, repo, "/", root, tree, ignoreTrees, walkFn) - return err + return walk(ctx, repo, "/", root, tree, walkFn) } // walk recursively traverses the tree, ignoring subtrees when the ID of the // subtree is in ignoreTrees. If err is nil and ignore is true, the subtree ID // will be added to ignoreTrees by walk. -func walk(ctx context.Context, repo restic.BlobLoader, prefix string, parentTreeID restic.ID, tree *restic.Tree, ignoreTrees restic.IDSet, walkFn WalkFunc) (ignore bool, err error) { - var allNodesIgnored = true - - if len(tree.Nodes) == 0 { - allNodesIgnored = false - } - +func walk(ctx context.Context, repo restic.BlobLoader, prefix string, parentTreeID restic.ID, tree *restic.Tree, walkFn WalkFunc) (err error) { sort.Slice(tree.Nodes, func(i, j int) bool { return tree.Nodes[i].Name < tree.Nodes[j].Name }) @@ -70,68 +52,40 @@ func walk(ctx context.Context, repo restic.BlobLoader, prefix string, parentTree p := path.Join(prefix, node.Name) if node.Type == "" { - return false, errors.Errorf("node type is empty for node %q", node.Name) + return errors.Errorf("node type is empty for node %q", node.Name) } if node.Type != "dir" { - ignore, err := walkFn(parentTreeID, p, node, nil) + err := walkFn(parentTreeID, p, node, nil) if err != nil { if err == ErrSkipNode { // skip the remaining entries in this tree - return allNodesIgnored, nil + return nil } - return false, err - } - - if !ignore { - allNodesIgnored = false + return err } continue } if node.Subtree == nil { - return false, errors.Errorf("subtree for node %v in tree %v is nil", node.Name, p) - } - - if ignoreTrees.Has(*node.Subtree) { - continue + return errors.Errorf("subtree for node %v in tree %v is nil", node.Name, p) } subtree, err := restic.LoadTree(ctx, repo, *node.Subtree) - ignore, err := walkFn(parentTreeID, p, node, err) + err = walkFn(parentTreeID, p, node, err) if err != nil { if err == ErrSkipNode { - if ignore { - ignoreTrees.Insert(*node.Subtree) - } continue } - return false, err } - if ignore { - ignoreTrees.Insert(*node.Subtree) - } - - if !ignore { - allNodesIgnored = false - } - - ignore, err = walk(ctx, repo, p, *node.Subtree, subtree, ignoreTrees, walkFn) + err = walk(ctx, repo, p, *node.Subtree, subtree, walkFn) if err != nil { - return false, err - } - - if ignore { - ignoreTrees.Insert(*node.Subtree) - } - - if !ignore { - allNodesIgnored = false + return err } } - return allNodesIgnored, nil + return nil } diff --git a/internal/walker/walker_test.go b/internal/walker/walker_test.go index 54cc69792..cf9dd4168 100644 --- a/internal/walker/walker_test.go +++ b/internal/walker/walker_test.go @@ -99,22 +99,22 @@ type checkFunc func(t testing.TB) (walker WalkFunc, final func(testing.TB)) func checkItemOrder(want []string) checkFunc { pos := 0 return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) { - walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) { + walker = func(treeID restic.ID, path string, node *restic.Node, err error) error { if err != nil { t.Errorf("error walking %v: %v", path, err) - return false, err + return err } if pos >= len(want) { t.Errorf("additional unexpected path found: %v", path) - return false, nil + return nil } if path != want[pos] { t.Errorf("wrong path found, want %q, got %q", want[pos], path) } pos++ - return false, nil + return nil } final = func(t testing.TB) { @@ -131,22 +131,22 @@ func checkItemOrder(want []string) checkFunc { func checkParentTreeOrder(want []string) checkFunc { pos := 0 return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) { - walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) { + walker = func(treeID restic.ID, path string, node *restic.Node, err error) error { if err != nil { t.Errorf("error walking %v: %v", path, err) - return false, err + return err } if pos >= len(want) { t.Errorf("additional unexpected parent tree ID found: %v", treeID) - return false, nil + return nil } if treeID.String() != want[pos] { t.Errorf("wrong parent tree ID found, want %q, got %q", want[pos], treeID.String()) } pos++ - return false, nil + return nil } final = func(t testing.TB) { @@ -165,15 +165,15 @@ func checkSkipFor(skipFor map[string]struct{}, wantPaths []string) checkFunc { var pos int return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) { - walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) { + walker = func(treeID restic.ID, path string, node *restic.Node, err error) error { if err != nil { t.Errorf("error walking %v: %v", path, err) - return false, err + return err } if pos >= len(wantPaths) { t.Errorf("additional unexpected path found: %v", path) - return false, nil + return nil } if path != wantPaths[pos] { @@ -182,50 +182,10 @@ func checkSkipFor(skipFor map[string]struct{}, wantPaths []string) checkFunc { pos++ if _, ok := skipFor[path]; ok { - return false, ErrSkipNode + return ErrSkipNode } - return false, nil - } - - final = func(t testing.TB) { - if pos != len(wantPaths) { - t.Errorf("wrong number of paths returned, want %d, got %d", len(wantPaths), pos) - } - } - - return walker, final - } -} - -// checkIgnore returns ErrSkipNode if path is in skipFor and sets ignore according -// to ignoreFor. It checks that the paths the walk func is called for are exactly -// the ones in wantPaths. -func checkIgnore(skipFor map[string]struct{}, ignoreFor map[string]bool, wantPaths []string) checkFunc { - var pos int - - return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) { - walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) { - if err != nil { - t.Errorf("error walking %v: %v", path, err) - return false, err - } - - if pos >= len(wantPaths) { - t.Errorf("additional unexpected path found: %v", path) - return ignoreFor[path], nil - } - - if path != wantPaths[pos] { - t.Errorf("wrong path found, want %q, got %q", wantPaths[pos], path) - } - pos++ - - if _, ok := skipFor[path]; ok { - return ignoreFor[path], ErrSkipNode - } - - return ignoreFor[path], nil + return nil } final = func(t testing.TB) { @@ -272,16 +232,6 @@ func TestWalker(t *testing.T) { "/subdir", }, ), - checkIgnore( - map[string]struct{}{}, map[string]bool{ - "/subdir": true, - }, []string{ - "/", - "/foo", - "/subdir", - "/subdir/subfile", - }, - ), }, }, { @@ -409,81 +359,6 @@ func TestWalker(t *testing.T) { "57ee8960c7a86859b090a76e5d013f83d10c0ce11d5460076ca8468706f784ab", // tree /subdir3 "c2efeff7f217a4dfa12a16e8bb3cefedd37c00873605c29e5271c6061030672f", // tree / }), - checkIgnore( - map[string]struct{}{ - "/subdir1": {}, - }, map[string]bool{ - "/subdir1": true, - }, []string{ - "/", - "/foo", - "/subdir1", - "/zzz other", - }, - ), - checkIgnore( - map[string]struct{}{}, map[string]bool{ - "/subdir1": true, - }, []string{ - "/", - "/foo", - "/subdir1", - "/subdir1/subfile1", - "/subdir1/subfile2", - "/subdir1/subfile3", - "/zzz other", - }, - ), - checkIgnore( - map[string]struct{}{ - "/subdir2": {}, - }, map[string]bool{ - "/subdir2": true, - }, []string{ - "/", - "/foo", - "/subdir1", - "/subdir1/subfile1", - "/subdir1/subfile2", - "/subdir1/subfile3", - "/subdir2", - "/zzz other", - }, - ), - checkIgnore( - map[string]struct{}{}, map[string]bool{ - "/subdir1/subfile1": true, - "/subdir1/subfile2": true, - "/subdir1/subfile3": true, - }, []string{ - "/", - "/foo", - "/subdir1", - "/subdir1/subfile1", - "/subdir1/subfile2", - "/subdir1/subfile3", - "/zzz other", - }, - ), - checkIgnore( - map[string]struct{}{}, map[string]bool{ - "/subdir2/subfile1": true, - "/subdir2/subfile2": true, - "/subdir2/subfile3": true, - }, []string{ - "/", - "/foo", - "/subdir1", - "/subdir1/subfile1", - "/subdir1/subfile2", - "/subdir1/subfile3", - "/subdir2", - "/subdir2/subfile1", - "/subdir2/subfile2", - "/subdir2/subfile3", - "/zzz other", - }, - ), }, }, { @@ -513,35 +388,6 @@ func TestWalker(t *testing.T) { }), }, }, - { - tree: TestTree{ - "subdir1": TestTree{}, - "subdir2": TestTree{}, - "subdir3": TestTree{ - "file": TestFile{}, - }, - "subdir4": TestTree{}, - "subdir5": TestTree{ - "file": TestFile{}, - }, - "subdir6": TestTree{}, - }, - checks: []checkFunc{ - checkIgnore( - map[string]struct{}{}, map[string]bool{ - "/subdir2": true, - }, []string{ - "/", - "/subdir1", - "/subdir2", - "/subdir3", - "/subdir3/file", - "/subdir5", - "/subdir5/file", - }, - ), - }, - }, } for _, test := range tests { @@ -553,7 +399,7 @@ func TestWalker(t *testing.T) { defer cancel() fn, last := check(t) - err := Walk(ctx, repo, root, restic.NewIDSet(), fn) + err := Walk(ctx, repo, root, fn) if err != nil { t.Error(err) } From fdcbb53017438bbcc572d7af021899b33d6a59c7 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 19 Jan 2024 21:14:31 +0100 Subject: [PATCH 2/2] walker: test skipping for root node --- internal/walker/walker_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/walker/walker_test.go b/internal/walker/walker_test.go index cf9dd4168..786570e02 100644 --- a/internal/walker/walker_test.go +++ b/internal/walker/walker_test.go @@ -232,6 +232,13 @@ func TestWalker(t *testing.T) { "/subdir", }, ), + checkSkipFor( + map[string]struct{}{ + "/": {}, + }, []string{ + "/", + }, + ), }, }, {