diff --git a/changelog/unreleased/issue-1153 b/changelog/unreleased/issue-1153 new file mode 100644 index 000000000..c6eaa4ad2 --- /dev/null +++ b/changelog/unreleased/issue-1153 @@ -0,0 +1,9 @@ +Enhancement: Support pruning even after running out of disk space + +When running out of disk space it was no longer possible to add or remove +data from a repository. To help with recovering from such a deadlock, the +prune command now supports an `--unsafe-recover-no-free-space` option to +recover from such situations. Make sure to read the documentation first! + +https://github.com/restic/restic/issues/1153 +https://github.com/restic/restic/pull/3481 diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 9447f8145..a6a8d0bde 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -39,7 +39,10 @@ Exit status is 0 if the command was successful, and non-zero if there was any er // PruneOptions collects all options for the cleanup command. type PruneOptions struct { - DryRun bool + DryRun bool + UnsafeNoSpaceRecovery string + + unsafeRecovery bool MaxUnused string maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused @@ -56,6 +59,7 @@ func init() { cmdRoot.AddCommand(cmdPrune) f := cmdPrune.Flags() f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done") + f.StringVarP(&pruneOptions.UnsafeNoSpaceRecovery, "unsafe-recover-no-free-space", "", "", "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first.") addPruneOptions(cmdPrune) } @@ -75,6 +79,10 @@ func verifyPruneOptions(opts *PruneOptions) error { } opts.MaxRepackBytes = uint64(size) } + if opts.UnsafeNoSpaceRecovery != "" { + // prevent repacking data to make sure users cannot get stuck. + opts.MaxRepackBytes = 0 + } maxUnused := strings.TrimSpace(opts.MaxUnused) if maxUnused == "" { @@ -136,6 +144,14 @@ func runPrune(opts PruneOptions, gopts GlobalOptions) error { return errors.Fatal("prune requires a backend connection limit of at least two") } + if opts.UnsafeNoSpaceRecovery != "" { + repoID := repo.Config().ID + if opts.UnsafeNoSpaceRecovery != repoID { + return errors.Fatalf("must pass id '%s' to --unsafe-recover-no-free-space", repoID) + } + opts.unsafeRecovery = true + } + lock, err := lockRepoExclusive(gopts.ctx, repo) defer unlockRepo(lock) if err != nil { @@ -522,7 +538,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB ignorePacks.Merge(removePacks) } - if len(ignorePacks) != 0 { + if opts.unsafeRecovery { + Verbosef("deleting index files\n") + indexFiles := repo.Index().(*repository.MasterIndex).IDs() + err = DeleteFilesChecked(gopts, repo, indexFiles, restic.IndexFile) + if err != nil { + return errors.Fatalf("%s", err) + } + } else if len(ignorePacks) != 0 { err = rebuildIndexFiles(gopts, repo, ignorePacks, nil) if err != nil { return errors.Fatalf("%s", err) @@ -534,11 +557,18 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB DeleteFiles(gopts, repo, removePacks, restic.PackFile) } + if opts.unsafeRecovery { + _, err = writeIndexFiles(gopts, repo, ignorePacks, nil) + if err != nil { + return errors.Fatalf("%s", err) + } + } + Verbosef("done\n") return nil } -func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error { +func writeIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) (restic.IDSet, error) { Verbosef("rebuilding index\n") idx := (repo.Index()).(*repository.MasterIndex) @@ -546,6 +576,11 @@ func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks bar := newProgressMax(!gopts.Quiet, packcount, "packs processed") obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar) bar.Done() + return obsoleteIndexes, err +} + +func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error { + obsoleteIndexes, err := writeIndexFiles(gopts, repo, removePacks, extraObsolete) if err != nil { return err } diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index ebf63e930..792b825c2 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1573,26 +1573,35 @@ func TestCheckRestoreNoLock(t *testing.T) { } func TestPrune(t *testing.T) { - t.Run("0", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "0%"} + testPruneVariants(t, false) + testPruneVariants(t, true) +} + +func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) { + suffix := "" + if unsafeNoSpaceRecovery { + suffix = "-recovery" + } + t.Run("0"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "0%", unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true, CheckUnused: true} testPrune(t, opts, checkOpts) }) - t.Run("50", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "50%"} + t.Run("50"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "50%", unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) - t.Run("unlimited", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "unlimited"} + t.Run("unlimited"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "unlimited", unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) - t.Run("CachableOnly", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true} + t.Run("CachableOnly"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true, unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) diff --git a/doc/060_forget.rst b/doc/060_forget.rst index 55acc42ea..ab67368aa 100644 --- a/doc/060_forget.rst +++ b/doc/060_forget.rst @@ -444,3 +444,31 @@ The ``prune`` command accepts the following options: - ``--dry-run`` only show what ``prune`` would do. - ``--verbose`` increased verbosity shows additional statistics for ``prune``. + + +Recovering from "no free space" errors +************************************** + +In some cases when a repository has grown large enough to fill up all disk space or the +allocated quota, then ``prune`` might fail to free space. ``prune`` works in such a way +that a repository remains usable no matter at which point the command is interrupted. +However, this also means that ``prune`` requires some scratch space to work. + +In most cases it is sufficient to instruct ``prune`` to use as little scratch space as +possible by running it as ``prune --max-repack-size 0``. Note that for restic versions +before 0.13.0 ``prune --max-repack-size 1`` must be used. Obviously, this can only work +if several snapshots have been removed using ``forget`` before. This then allows the +``prune`` command to actually remove data from the repository. If the command succeeds, +but there is still little free space, then remove a few more snapshots and run ``prune`` again. + +If ``prune`` fails to complete, then ``prune --unsafe-recover-no-free-space SOME-ID`` +is available as a method of last resort. It allows prune to work with little to no free +space. However, a **failed** ``prune`` run can cause the repository to become +**temporarily unusable**. Therefore, make sure that you have a stable connection to the +repository storage, before running this command. In case the command fails, it may become +necessary to manually remove all files from the `index/` folder of the repository and +run `rebuild-index` afterwards. + +To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is +necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace +``SOME-ID`` with the requested ID.