diff --git a/changelog/unreleased/issue-1665 b/changelog/unreleased/issue-1665 new file mode 100644 index 000000000..ba956edfb --- /dev/null +++ b/changelog/unreleased/issue-1665 @@ -0,0 +1,27 @@ +Enhancement: Improve cache handling for `restic check` + +For safety reasons, restic does not use a local metadata cache for the `restic +check` command, so that data is loaded from the repository and restic can check +it's in good condition. When the cache is disabled, restic will fetch each tiny +blob needed for checking the integrity using a separate backend request. For +non-local backends, that will take a long time, and depending on the backend +(e.g. B2) may also be much more expensive. + +This PR adds a few commits which will change the behavior as follows: + + * When `restic check` is called without any additional parameters, it will + build a new cache in a temporary directory, which is removed at the end of + the check. This way, we'll get readahead for metadata files (so restic will + fetch the whole file when the first blob from the file is requested), but + all data is freshly fetched from the storage backend. This is the default + behavior and will work for almost all users. + + * When `restic check` is called with `--with-cache`, the default on-disc cache + is used. This behavior hasn't changed since the cache was introduced. + + * When `--no-cache` is specified, restic falls back to the old behavior, and + read all tiny blobs in separate requests. + +https://github.com/restic/restic/issues/1665 +https://github.com/restic/restic/issues/1694 +https://github.com/restic/restic/pull/1696 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index ac669c6c7..cee676766 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "io/ioutil" "os" "strconv" "strings" @@ -11,6 +12,7 @@ import ( "github.com/restic/restic/internal/checker" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/restic" ) @@ -117,15 +119,52 @@ func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress { return readProgress } +// prepareCheckCache configures a special cache directory for check. +// +// * if --with-cache is specified, the default cache is used +// * if the user explicitely requested --no-cache, we don't use any cache +// * by default, we use a cache in a temporary directory that is deleted after the check +func prepareCheckCache(opts CheckOptions, gopts *GlobalOptions) (cleanup func()) { + cleanup = func() {} + if opts.WithCache { + // use the default cache, no setup needed + return cleanup + } + + if gopts.NoCache { + // don't use any cache, no setup needed + return cleanup + } + + // use a cache in a temporary directory + tempdir, err := ioutil.TempDir("", "restic-check-cache-") + if err != nil { + // if an error occurs, don't use any cache + Warnf("unable to create temporary directory for cache during check, disabling cache: %v\n", err) + gopts.NoCache = true + return cleanup + } + + gopts.CacheDir = tempdir + Verbosef("using temporary cache in %v\n", tempdir) + + cleanup = func() { + err := fs.RemoveAll(tempdir) + if err != nil { + Warnf("error removing temporary cache directory: %v\n", err) + } + } + + return cleanup +} + func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { if len(args) != 0 { return errors.Fatal("check has no arguments") } - if !opts.WithCache { - // do not use a cache for the checker - gopts.NoCache = true - } + cleanup := prepareCheckCache(opts, &gopts) + defer cleanup() repo, err := OpenRepository(gopts) if err != nil { diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 81c0d8347..ca289be10 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -164,7 +164,11 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { } } - c.repo.SetIndex(c.masterIndex) + err := c.repo.SetIndex(c.masterIndex) + if err != nil { + debug.Log("SetIndex returned error: %v", err) + errs = append(errs, err) + } return hints, errs } diff --git a/internal/mock/repository.go b/internal/mock/repository.go index a6a68988a..c3a9f0f9f 100644 --- a/internal/mock/repository.go +++ b/internal/mock/repository.go @@ -11,7 +11,7 @@ type Repository struct { KeyFn func() *crypto.Key - SetIndexFn func(restic.Index) + SetIndexFn func(restic.Index) error IndexFn func() restic.Index SaveFullIndexFn func() error @@ -51,8 +51,8 @@ func (repo Repository) Key() *crypto.Key { } // SetIndex is a stub method. -func (repo Repository) SetIndex(idx restic.Index) { - repo.SetIndexFn(idx) +func (repo Repository) SetIndex(idx restic.Index) error { + return repo.SetIndexFn(idx) } // Index is a stub method. diff --git a/internal/repository/repository.go b/internal/repository/repository.go index fc31c8392..3a1ce40c2 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -331,8 +331,20 @@ func (r *Repository) Index() restic.Index { } // SetIndex instructs the repository to use the given index. -func (r *Repository) SetIndex(i restic.Index) { +func (r *Repository) SetIndex(i restic.Index) error { r.idx = i.(*MasterIndex) + + ids := restic.NewIDSet() + for _, idx := range r.idx.All() { + id, err := idx.ID() + if err != nil { + debug.Log("not using index, ID() returned error %v", err) + continue + } + ids.Insert(id) + } + + return r.PrepareCache(ids) } // SaveIndex saves an index in the repository. @@ -413,52 +425,75 @@ func (r *Repository) LoadIndex(ctx context.Context) error { r.idx.Insert(idx) } - if r.Cache != nil { - // clear old index files - err := r.Cache.Clear(restic.IndexFile, validIndex) - if err != nil { - fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err) - } - - packs := restic.NewIDSet() - for _, idx := range r.idx.All() { - for id := range idx.Packs() { - packs.Insert(id) - } - } - - // clear old data files - err = r.Cache.Clear(restic.DataFile, packs) - if err != nil { - fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err) - } - - treePacks := restic.NewIDSet() - for _, idx := range r.idx.All() { - for _, id := range idx.TreePacks() { - treePacks.Insert(id) - } - } - - // use readahead - cache := r.Cache.(*cache.Cache) - cache.PerformReadahead = func(h restic.Handle) bool { - if h.Type != restic.DataFile { - return false - } - - id, err := restic.ParseID(h.Name) - if err != nil { - return false - } - - return treePacks.Has(id) - } + err := r.PrepareCache(validIndex) + if err != nil { + return err } return <-errCh } +// PrepareCache initializes the local cache. indexIDs is the list of IDs of +// index files still present in the repo. +func (r *Repository) PrepareCache(indexIDs restic.IDSet) error { + if r.Cache == nil { + return nil + } + + debug.Log("prepare cache with %d index files", len(indexIDs)) + + // clear old index files + err := r.Cache.Clear(restic.IndexFile, indexIDs) + if err != nil { + fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err) + } + + packs := restic.NewIDSet() + for _, idx := range r.idx.All() { + for id := range idx.Packs() { + packs.Insert(id) + } + } + + // clear old data files + err = r.Cache.Clear(restic.DataFile, packs) + if err != nil { + fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err) + } + + treePacks := restic.NewIDSet() + for _, idx := range r.idx.All() { + for _, id := range idx.TreePacks() { + treePacks.Insert(id) + } + } + + // use readahead + debug.Log("using readahead") + cache := r.Cache.(*cache.Cache) + cache.PerformReadahead = func(h restic.Handle) bool { + if h.Type != restic.DataFile { + debug.Log("no readahead for %v, is not data file", h) + return false + } + + id, err := restic.ParseID(h.Name) + if err != nil { + debug.Log("no readahead for %v, invalid ID", h) + return false + } + + if treePacks.Has(id) { + debug.Log("perform readahead for %v", h) + return true + } + debug.Log("no readahead for %v, not tree file", h) + return false + } + + return nil +} + // LoadIndex loads the index id from backend and returns it. func LoadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*Index, error) { idx, err := LoadIndexWithDecoder(ctx, repo, id, DecodeIndex) diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 5a3a01a8a..ff8f38034 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -15,7 +15,7 @@ type Repository interface { Key() *crypto.Key - SetIndex(Index) + SetIndex(Index) error Index() Index SaveFullIndex(context.Context) error