From 0a6fa602c8c46a947bac415d538e80b296f7c753 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 2 Jul 2022 23:30:26 +0200 Subject: [PATCH 01/15] add option for setting min pack size --- cmd/restic/cmd_init.go | 8 ++++++- cmd/restic/global.go | 14 ++++++++++- internal/checker/checker_test.go | 3 ++- internal/repository/packer_manager.go | 18 +++++++-------- internal/repository/packer_manager_test.go | 6 ++--- internal/repository/repository.go | 27 ++++++++++++++++++---- internal/repository/testing.go | 12 +++++++--- internal/restic/repository.go | 1 + 8 files changed, 67 insertions(+), 22 deletions(-) diff --git a/cmd/restic/cmd_init.go b/cmd/restic/cmd_init.go index 058f1ed07..ee3ec4b10 100644 --- a/cmd/restic/cmd_init.go +++ b/cmd/restic/cmd_init.go @@ -86,7 +86,13 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err) } - s := repository.New(be, repository.Options{Compression: gopts.Compression}) + s, err := repository.New(be, repository.Options{ + Compression: gopts.Compression, + PackSize: gopts.MinPackSize * 1024 * 1024, + }) + if err != nil { + return err + } err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial) if err != nil { diff --git a/cmd/restic/global.go b/cmd/restic/global.go index 876e6e614..d52dee34f 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "runtime" + "strconv" "strings" "syscall" "time" @@ -62,6 +63,7 @@ type GlobalOptions struct { NoCache bool CleanupCache bool Compression repository.CompressionMode + MinPackSize uint backend.TransportOptions limiter.Limits @@ -102,6 +104,9 @@ func init() { return nil }) + // parse min pack size from env, on error the default value will be used + minPackSize, _ := strconv.ParseUint(os.Getenv("RESTIC_MIN_PACKSIZE"), 10, 32) + f := cmdRoot.PersistentFlags() f.StringVarP(&globalOptions.Repo, "repo", "r", os.Getenv("RESTIC_REPOSITORY"), "`repository` to backup to or restore from (default: $RESTIC_REPOSITORY)") f.StringVarP(&globalOptions.RepositoryFile, "repository-file", "", os.Getenv("RESTIC_REPOSITORY_FILE"), "`file` to read the repository location from (default: $RESTIC_REPOSITORY_FILE)") @@ -121,6 +126,7 @@ func init() { f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repository format version 2), one of (auto|off|max)") f.IntVar(&globalOptions.Limits.UploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.Limits.DownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)") + f.UintVar(&globalOptions.MinPackSize, "min-packsize", uint(minPackSize), "set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") // Use our "generate" command instead of the cobra provided "completion" command cmdRoot.CompletionOptions.DisableDefaultCmd = true @@ -440,7 +446,13 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { } } - s := repository.New(be, repository.Options{Compression: opts.Compression}) + s, err := repository.New(be, repository.Options{ + Compression: opts.Compression, + PackSize: opts.MinPackSize * 1024 * 1024, + }) + if err != nil { + return nil, err + } passwordTriesLeft := 1 if stdinIsTerminal() && opts.password == "" { diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index c82375e3c..b3a736152 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -348,7 +348,8 @@ func TestCheckerModifiedData(t *testing.T) { t.Logf("archived as %v", sn.ID().Str()) beError := &errorBackend{Backend: repo.Backend()} - checkRepo := repository.New(beError, repository.Options{}) + checkRepo, err := repository.New(beError, repository.Options{}) + test.OK(t, err) test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, "")) chkr := checker.New(checkRepo, false) diff --git a/internal/repository/packer_manager.go b/internal/repository/packer_manager.go index 32b2c9b7a..6179aab5c 100644 --- a/internal/repository/packer_manager.go +++ b/internal/repository/packer_manager.go @@ -34,19 +34,19 @@ type packerManager struct { key *crypto.Key queueFn func(ctx context.Context, t restic.BlobType, p *Packer) error - pm sync.Mutex - packer *Packer + pm sync.Mutex + packer *Packer + packSize uint } -const minPackSize = 4 * 1024 * 1024 - // newPackerManager returns an new packer manager which writes temporary files // to a temporary directory -func newPackerManager(key *crypto.Key, tpe restic.BlobType, queueFn func(ctx context.Context, t restic.BlobType, p *Packer) error) *packerManager { +func newPackerManager(key *crypto.Key, tpe restic.BlobType, packSize uint, queueFn func(ctx context.Context, t restic.BlobType, p *Packer) error) *packerManager { return &packerManager{ - tpe: tpe, - key: key, - queueFn: queueFn, + tpe: tpe, + key: key, + queueFn: queueFn, + packSize: packSize, } } @@ -88,7 +88,7 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest } // if the pack is not full enough, put back to the list - if packer.Size() < minPackSize { + if packer.Size() < r.packSize { debug.Log("pack is not full enough (%d bytes)", packer.Size()) return size, nil } diff --git a/internal/repository/packer_manager_test.go b/internal/repository/packer_manager_test.go index 67a33c757..90f716e0d 100644 --- a/internal/repository/packer_manager_test.go +++ b/internal/repository/packer_manager_test.go @@ -31,7 +31,7 @@ func min(a, b int) int { } func fillPacks(t testing.TB, rnd *rand.Rand, pm *packerManager, buf []byte) (bytes int) { - for i := 0; i < 100; i++ { + for i := 0; i < 102; i++ { l := rnd.Intn(maxBlobSize) id := randomID(rnd) buf = buf[:l] @@ -70,7 +70,7 @@ func testPackerManager(t testing.TB) int64 { rnd := rand.New(rand.NewSource(randomSeed)) savedBytes := int(0) - pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, func(ctx context.Context, tp restic.BlobType, p *Packer) error { + pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, DefaultPackSize, func(ctx context.Context, tp restic.BlobType, p *Packer) error { err := p.Finalize() if err != nil { return err @@ -104,7 +104,7 @@ func BenchmarkPackerManager(t *testing.B) { for i := 0; i < t.N; i++ { rnd.Seed(randomSeed) - pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, func(ctx context.Context, t restic.BlobType, p *Packer) error { + pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, DefaultPackSize, func(ctx context.Context, t restic.BlobType, p *Packer) error { return nil }) fillPacks(t, rnd, pm, blobBuf) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index c35bf1b76..2b7750648 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -28,6 +28,10 @@ import ( const MaxStreamBufferSize = 4 * 1024 * 1024 +const MinPackSize = 4 * 1024 * 1024 +const DefaultPackSize = 16 * 1024 * 1024 +const MaxPackSize = 128 * 1024 * 1024 + // Repository is used to access a repository in a backend. type Repository struct { be restic.Backend @@ -54,6 +58,7 @@ type Repository struct { type Options struct { Compression CompressionMode + PackSize uint } // CompressionMode configures if data should be compressed. @@ -100,14 +105,23 @@ func (c *CompressionMode) Type() string { } // New returns a new repository with backend be. -func New(be restic.Backend, opts Options) *Repository { +func New(be restic.Backend, opts Options) (*Repository, error) { + if opts.PackSize == 0 { + opts.PackSize = DefaultPackSize + } + if opts.PackSize > MaxPackSize { + return nil, errors.Fatalf("pack size larger than limit of %v MiB", MaxPackSize/1024/1024) + } else if opts.PackSize < MinPackSize { + return nil, errors.Fatalf("pack size smaller than minimum of %v MiB", MinPackSize/1024/1024) + } + repo := &Repository{ be: be, opts: opts, idx: NewMasterIndex(), } - return repo + return repo, nil } // DisableAutoIndexUpdate deactives the automatic finalization and upload of new @@ -129,6 +143,11 @@ func (r *Repository) Config() restic.Config { return r.cfg } +// MinPackSize return the minimum size of a pack file before uploading +func (r *Repository) MinPackSize() uint { + return r.opts.PackSize +} + // UseCache replaces the backend with the wrapped cache. func (r *Repository) UseCache(c *cache.Cache) { if c == nil { @@ -497,8 +516,8 @@ func (r *Repository) StartPackUploader(ctx context.Context, wg *errgroup.Group) innerWg, ctx := errgroup.WithContext(ctx) r.packerWg = innerWg r.uploader = newPackerUploader(ctx, innerWg, r, r.be.Connections()) - r.treePM = newPackerManager(r.key, restic.TreeBlob, r.uploader.QueuePacker) - r.dataPM = newPackerManager(r.key, restic.DataBlob, r.uploader.QueuePacker) + r.treePM = newPackerManager(r.key, restic.TreeBlob, r.MinPackSize(), r.uploader.QueuePacker) + r.dataPM = newPackerManager(r.key, restic.DataBlob, r.MinPackSize(), r.uploader.QueuePacker) wg.Go(func() error { return innerWg.Wait() diff --git a/internal/repository/testing.go b/internal/repository/testing.go index b9b38b1f4..380a47d04 100644 --- a/internal/repository/testing.go +++ b/internal/repository/testing.go @@ -52,10 +52,13 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend, version uint) (r be, beCleanup = TestBackend(t) } - repo := New(be, Options{}) + repo, err := New(be, Options{}) + if err != nil { + t.Fatalf("TestRepository(): new repo failed: %v", err) + } cfg := restic.TestCreateConfig(t, TestChunkerPol, version) - err := repo.init(context.TODO(), test.TestPassword, cfg) + err = repo.init(context.TODO(), test.TestPassword, cfg) if err != nil { t.Fatalf("TestRepository(): initialize repo failed: %v", err) } @@ -104,7 +107,10 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) { t.Fatal(err) } - repo := New(be, Options{}) + repo, err := New(be, Options{}) + if err != nil { + t.Fatal(err) + } err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "") if err != nil { t.Fatal(err) diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 2bf12503f..1e64289e5 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -25,6 +25,7 @@ type Repository interface { LookupBlobSize(ID, BlobType) (uint, bool) Config() Config + MinPackSize() uint // List calls the function fn for each file of type t in the repository. // When an error is returned by fn, processing stops and List() returns the From 0269381b8dbe9b65916a4f3681a3a80f4907a46a Mon Sep 17 00:00:00 2001 From: Kyle Brennan Date: Sat, 30 Apr 2022 15:02:01 -0700 Subject: [PATCH 02/15] prune: add repack-small parameter --- cmd/restic/cmd_prune.go | 14 ++++++++++++-- cmd/restic/integration_test.go | 5 +++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index f6553e9ac..c22d0b7dc 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -52,6 +52,7 @@ type PruneOptions struct { MaxRepackBytes uint64 RepackCachableOnly bool + RepackSmall bool RepackUncompressed bool } @@ -70,6 +71,7 @@ func addPruneOptions(c *cobra.Command) { f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')") f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)") f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable") + f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "also repack small packs") f.BoolVar(&pruneOptions.RepackUncompressed, "repack-uncompressed", false, "repack all uncompressed data") } @@ -423,6 +425,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption var repackCandidates []packInfoWithID repoVersion := repo.Config().Version + minPackSize := repo.MinPackSize() // loop over all packs and decide what to do bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed") @@ -464,6 +467,8 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // use a flag that pack must be compressed p.uncompressed = mustCompress + packIsLargeEnough := !opts.RepackSmall || packSize >= int64(minPackSize) + // decide what to do switch { case p.usedBlobs == 0: @@ -476,7 +481,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // if this is a data pack and --repack-cacheable-only is set => keep pack! stats.packs.keep++ - case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress: + case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress && packIsLargeEnough: // All blobs in pack are used and not mixed => keep pack! stats.packs.keep++ @@ -530,6 +535,10 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption pi := repackCandidates[i].packInfo pj := repackCandidates[j].packInfo switch { + case opts.RepackSmall && pi.unusedSize+pi.usedSize < uint64(minPackSize) && pj.unusedSize+pj.usedSize >= uint64(minPackSize): + return true + case opts.RepackSmall && pj.unusedSize+pj.usedSize < uint64(minPackSize) && pi.unusedSize+pi.usedSize >= uint64(minPackSize): + return false case pi.tpe != restic.DataBlob && pj.tpe == restic.DataBlob: return true case pj.tpe != restic.DataBlob && pi.tpe == restic.DataBlob: @@ -552,6 +561,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption for _, p := range repackCandidates { reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter) reachedRepackSize := stats.size.repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes + packIsLargeEnough := !opts.RepackSmall || p.unusedSize+p.usedSize >= uint64(minPackSize) switch { case reachedRepackSize: @@ -561,7 +571,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // repacking non-data packs / uncompressed-trees is only limited by repackSize repack(p.ID, p.packInfo) - case reachedUnusedSizeAfter: + case reachedUnusedSizeAfter && packIsLargeEnough: // for all other packs stop repacking if tolerated unused size is reached. stats.packs.keep++ diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index f15b3d9fd..6f742d2dd 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1611,6 +1611,11 @@ func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) { checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) + t.Run("Small", func(t *testing.T) { + opts := PruneOptions{MaxUnused: "unlimited", RepackSmall: true} + checkOpts := CheckOptions{ReadData: true, CheckUnused: true} + testPrune(t, opts, checkOpts) + }) } func testPrune(t *testing.T, pruneOpts PruneOptions, checkOpts CheckOptions) { From 6a6d313c9ab330b74038023ec58277ee7850eea7 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 11 Jun 2022 23:11:02 +0200 Subject: [PATCH 03/15] prune: reduce priority of repacking small packs --- cmd/restic/cmd_prune.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index c22d0b7dc..dc7fa5a74 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -535,14 +535,14 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption pi := repackCandidates[i].packInfo pj := repackCandidates[j].packInfo switch { - case opts.RepackSmall && pi.unusedSize+pi.usedSize < uint64(minPackSize) && pj.unusedSize+pj.usedSize >= uint64(minPackSize): - return true - case opts.RepackSmall && pj.unusedSize+pj.usedSize < uint64(minPackSize) && pi.unusedSize+pi.usedSize >= uint64(minPackSize): - return false case pi.tpe != restic.DataBlob && pj.tpe == restic.DataBlob: return true case pj.tpe != restic.DataBlob && pi.tpe == restic.DataBlob: return false + case opts.RepackSmall && pi.unusedSize+pi.usedSize < uint64(minPackSize) && pj.unusedSize+pj.usedSize >= uint64(minPackSize): + return true + case opts.RepackSmall && pj.unusedSize+pj.usedSize < uint64(minPackSize) && pi.unusedSize+pi.usedSize >= uint64(minPackSize): + return false } return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize }) From 1e3f05c3f100f31c6b46a98d2c7a00832c46db7c Mon Sep 17 00:00:00 2001 From: Kyle Brennan Date: Sat, 30 Apr 2022 15:05:20 -0700 Subject: [PATCH 04/15] repository: prevent header overfill --- internal/pack/pack.go | 7 +++++++ internal/repository/packer_manager.go | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 7ac06db72..11be41697 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -157,6 +157,13 @@ func (p *Packer) Count() int { return len(p.blobs) } +// HeaderFull returns true if the pack header is full. +func (p *Packer) HeaderFull() bool { + p.m.Lock() + defer p.m.Unlock() + return headerSize+uint(len(p.blobs)+1)*entrySize > MaxHeaderSize +} + // Blobs returns the slice of blobs that have been written. func (p *Packer) Blobs() []restic.Blob { p.m.Lock() diff --git a/internal/repository/packer_manager.go b/internal/repository/packer_manager.go index 6179aab5c..e83bf8769 100644 --- a/internal/repository/packer_manager.go +++ b/internal/repository/packer_manager.go @@ -87,8 +87,8 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest return 0, err } - // if the pack is not full enough, put back to the list - if packer.Size() < r.packSize { + // if the pack and header is not full enough, put back to the list + if packer.Size() < r.packSize && !packer.HeaderFull() { debug.Log("pack is not full enough (%d bytes)", packer.Size()) return size, nil } From e43be84eb8062b8d882b960b5f841dc751ad98e8 Mon Sep 17 00:00:00 2001 From: Kyle Brennan Date: Sat, 30 Apr 2022 15:16:00 -0700 Subject: [PATCH 05/15] document minPackSize --- doc/047_tuning_backup_parameters.rst | 20 ++++++++++++++++++++ doc/manual_rest.rst | 3 +++ 2 files changed, 23 insertions(+) diff --git a/doc/047_tuning_backup_parameters.rst b/doc/047_tuning_backup_parameters.rst index 78001bee6..dff992f1e 100644 --- a/doc/047_tuning_backup_parameters.rst +++ b/doc/047_tuning_backup_parameters.rst @@ -8,6 +8,7 @@ - for subsections ^ for subsubsections " for paragraphs + ######################## Tuning Backup Parameters ######################## @@ -48,3 +49,22 @@ which will compress very fast), ``max`` (which will trade backup speed and CPU u slightly better compression), or ``off`` (which disables compression). Each setting is only applied for the single run of restic. The option can also be set via the environment variable ``RESTIC_COMPRESSION``. + + +Pack Size +========= + +In certain instances, such as very large repositories, it is desired to have larger pack +sizes to reduce the number of files in the repository. Notable examples are OpenStack +Swift and some Google Drive Team accounts, where there are hard limits on the total +number of files. This can be achieved by either using the ``--min-packsize`` flag +or defining the ``$RESTIC_MIN_PACKSIZE`` environment variable. Restic currently defaults +to a 16MB minimum pack size. + +The side effect of increasing the pack size is increased client memory usage. A bit of +tuning may be required to strike a balance between memory usage and number of pack files. + +Restic uses the majority of it's memory according to the pack size, multiplied by the number +of parallel writers. For example, if you have 4 parallel writers (restic creates one per +available CPU), With a minimum pack size of 64 (Megabytes), you'll get a *minimum* of 256MB +of memory usage. diff --git a/doc/manual_rest.rst b/doc/manual_rest.rst index 040c091fa..ba9ab8990 100644 --- a/doc/manual_rest.rst +++ b/doc/manual_rest.rst @@ -56,6 +56,7 @@ Usage help is available: --key-hint key key ID of key to try decrypting first (default: $RESTIC_KEY_HINT) --limit-download int limits downloads to a maximum rate in KiB/s. (default: unlimited) --limit-upload int limits uploads to a maximum rate in KiB/s. (default: unlimited) + --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE or 16) --no-cache do not use a local cache --no-lock do not lock the repository, this allows some operations on read-only repositories -o, --option key=value set extended option (key=value, can be specified multiple times) @@ -128,6 +129,7 @@ command: --key-hint key key ID of key to try decrypting first (default: $RESTIC_KEY_HINT) --limit-download int limits downloads to a maximum rate in KiB/s. (default: unlimited) --limit-upload int limits uploads to a maximum rate in KiB/s. (default: unlimited) + --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE or 16) --no-cache do not use a local cache --no-lock do not lock the repository, this allows some operations on read-only repositories -o, --option key=value set extended option (key=value, can be specified multiple times) @@ -440,3 +442,4 @@ time it is used, so by looking at the timestamps of the sub directories of the cache directory it can decide which sub directories are old and probably not needed any more. You can either remove these directories manually, or run a restic command with the ``--cleanup-cache`` flag. + From 420ddc03c9f489b909e77ed06f405f9f235f62be Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 12 Jun 2022 13:03:59 +0200 Subject: [PATCH 06/15] rework pack size parameter documentation --- doc/047_tuning_backup_parameters.rst | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/doc/047_tuning_backup_parameters.rst b/doc/047_tuning_backup_parameters.rst index dff992f1e..6d98813b5 100644 --- a/doc/047_tuning_backup_parameters.rst +++ b/doc/047_tuning_backup_parameters.rst @@ -54,17 +54,21 @@ variable ``RESTIC_COMPRESSION``. Pack Size ========= -In certain instances, such as very large repositories, it is desired to have larger pack -sizes to reduce the number of files in the repository. Notable examples are OpenStack +In certain instances, such as very large repositories (in the TiB range) or very fast +upload connections, it is desirable to use larger pack sizes to reduce the number of +files in the repository and improve upload performance. Notable examples are OpenStack Swift and some Google Drive Team accounts, where there are hard limits on the total -number of files. This can be achieved by either using the ``--min-packsize`` flag +number of files. Larger pack size can also improve the backup speed for a repository +stored on a local HDD. This can be achieved by either using the ``--min-packsize`` flag or defining the ``$RESTIC_MIN_PACKSIZE`` environment variable. Restic currently defaults -to a 16MB minimum pack size. +to a 16MiB pack size. -The side effect of increasing the pack size is increased client memory usage. A bit of -tuning may be required to strike a balance between memory usage and number of pack files. - -Restic uses the majority of it's memory according to the pack size, multiplied by the number -of parallel writers. For example, if you have 4 parallel writers (restic creates one per -available CPU), With a minimum pack size of 64 (Megabytes), you'll get a *minimum* of 256MB -of memory usage. +The side effect of increasing the pack size is requiring more disk space for temporary pack +files created before uploading. The space must be available in the system default temp +directory, unless overwritten by setting the ``$TMPDIR`` environment variable. In addition, +depending on the backend the memory usage can also increase by a similar amount. Restic +requires temporary space according to the pack size, multiplied by the number +of backend connections plus one. For example, if the backend uses 5 connections (the default +for most backends), with a target pack size of 64MiB, you'll need a *minimum* of 384MiB +of space in the temp directory. A bit of tuning may be required to strike a balance between +resource usage at the backup client and the number of pack files in the repository. From 8a44258b6f348f7d6887a3d9271ca9519e4420e7 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 12 Jun 2022 13:07:42 +0200 Subject: [PATCH 07/15] update restic help snippets in documentation --- doc/047_tuning_backup_parameters.rst | 8 ++++---- doc/manual_rest.rst | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/doc/047_tuning_backup_parameters.rst b/doc/047_tuning_backup_parameters.rst index 6d98813b5..b3d19512c 100644 --- a/doc/047_tuning_backup_parameters.rst +++ b/doc/047_tuning_backup_parameters.rst @@ -58,10 +58,10 @@ In certain instances, such as very large repositories (in the TiB range) or very upload connections, it is desirable to use larger pack sizes to reduce the number of files in the repository and improve upload performance. Notable examples are OpenStack Swift and some Google Drive Team accounts, where there are hard limits on the total -number of files. Larger pack size can also improve the backup speed for a repository -stored on a local HDD. This can be achieved by either using the ``--min-packsize`` flag +number of files. Larger pack sizes can also improve the backup speed for a repository +stored on a local HDD. This can be achieved by either using the ``--min-packsize`` option or defining the ``$RESTIC_MIN_PACKSIZE`` environment variable. Restic currently defaults -to a 16MiB pack size. +to a 16 MiB pack size. The side effect of increasing the pack size is requiring more disk space for temporary pack files created before uploading. The space must be available in the system default temp @@ -69,6 +69,6 @@ directory, unless overwritten by setting the ``$TMPDIR`` environment variable. depending on the backend the memory usage can also increase by a similar amount. Restic requires temporary space according to the pack size, multiplied by the number of backend connections plus one. For example, if the backend uses 5 connections (the default -for most backends), with a target pack size of 64MiB, you'll need a *minimum* of 384MiB +for most backends), with a target pack size of 64 MiB, you'll need a *minimum* of 384 MiB of space in the temp directory. A bit of tuning may be required to strike a balance between resource usage at the backup client and the number of pack files in the repository. diff --git a/doc/manual_rest.rst b/doc/manual_rest.rst index ba9ab8990..317fb51d4 100644 --- a/doc/manual_rest.rst +++ b/doc/manual_rest.rst @@ -56,7 +56,7 @@ Usage help is available: --key-hint key key ID of key to try decrypting first (default: $RESTIC_KEY_HINT) --limit-download int limits downloads to a maximum rate in KiB/s. (default: unlimited) --limit-upload int limits uploads to a maximum rate in KiB/s. (default: unlimited) - --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE or 16) + --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE) --no-cache do not use a local cache --no-lock do not lock the repository, this allows some operations on read-only repositories -o, --option key=value set extended option (key=value, can be specified multiple times) @@ -129,7 +129,7 @@ command: --key-hint key key ID of key to try decrypting first (default: $RESTIC_KEY_HINT) --limit-download int limits downloads to a maximum rate in KiB/s. (default: unlimited) --limit-upload int limits uploads to a maximum rate in KiB/s. (default: unlimited) - --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE or 16) + --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE) --no-cache do not use a local cache --no-lock do not lock the repository, this allows some operations on read-only repositories -o, --option key=value set extended option (key=value, can be specified multiple times) @@ -442,4 +442,3 @@ time it is used, so by looking at the timestamps of the sub directories of the cache directory it can decide which sub directories are old and probably not needed any more. You can either remove these directories manually, or run a restic command with the ``--cleanup-cache`` flag. - From d7e2892048c1d8982860933810acf7416c64c74b Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 2 Jul 2022 23:45:13 +0200 Subject: [PATCH 08/15] Add changelog for packsize option --- changelog/unreleased/issue-2291 | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 changelog/unreleased/issue-2291 diff --git a/changelog/unreleased/issue-2291 b/changelog/unreleased/issue-2291 new file mode 100644 index 000000000..0ee9a7196 --- /dev/null +++ b/changelog/unreleased/issue-2291 @@ -0,0 +1,12 @@ +Enhancement: Allow pack size customization + +Restic now uses a target pack size of 16 MiB by default. It can be customized +using the `--packsize size` option. Supported pack sizes range between 4 and +128 MiB. + +It is possible to migrate an existing repository to _larger_ pack files using +`prune --repack-small`. This will rewrite every pack file which is +significantly smaller than the target size. + +https://github.com/restic/restic/issues/2291 +https://github.com/restic/restic/pull/3731 From 1b076cda977694c447b4a479d1a2ed69341e44e4 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 2 Jul 2022 23:52:02 +0200 Subject: [PATCH 09/15] rename option to --pack-size --- changelog/unreleased/issue-2291 | 2 +- cmd/restic/cmd_init.go | 2 +- cmd/restic/cmd_prune.go | 10 +++++----- cmd/restic/global.go | 10 +++++----- doc/040_backup.rst | 1 + doc/047_tuning_backup_parameters.rst | 4 ++-- doc/manual_rest.rst | 4 ++-- internal/repository/repository.go | 8 ++++---- internal/restic/repository.go | 2 +- 9 files changed, 22 insertions(+), 21 deletions(-) diff --git a/changelog/unreleased/issue-2291 b/changelog/unreleased/issue-2291 index 0ee9a7196..407e01765 100644 --- a/changelog/unreleased/issue-2291 +++ b/changelog/unreleased/issue-2291 @@ -1,7 +1,7 @@ Enhancement: Allow pack size customization Restic now uses a target pack size of 16 MiB by default. It can be customized -using the `--packsize size` option. Supported pack sizes range between 4 and +using the `--pack-size size` option. Supported pack sizes range between 4 and 128 MiB. It is possible to migrate an existing repository to _larger_ pack files using diff --git a/cmd/restic/cmd_init.go b/cmd/restic/cmd_init.go index ee3ec4b10..8742990f4 100644 --- a/cmd/restic/cmd_init.go +++ b/cmd/restic/cmd_init.go @@ -88,7 +88,7 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { s, err := repository.New(be, repository.Options{ Compression: gopts.Compression, - PackSize: gopts.MinPackSize * 1024 * 1024, + PackSize: gopts.PackSize * 1024 * 1024, }) if err != nil { return err diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index dc7fa5a74..7a55ccc88 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -425,7 +425,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption var repackCandidates []packInfoWithID repoVersion := repo.Config().Version - minPackSize := repo.MinPackSize() + targetPackSize := repo.PackSize() // loop over all packs and decide what to do bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed") @@ -467,7 +467,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // use a flag that pack must be compressed p.uncompressed = mustCompress - packIsLargeEnough := !opts.RepackSmall || packSize >= int64(minPackSize) + packIsLargeEnough := !opts.RepackSmall || packSize >= int64(targetPackSize) // decide what to do switch { @@ -539,9 +539,9 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption return true case pj.tpe != restic.DataBlob && pi.tpe == restic.DataBlob: return false - case opts.RepackSmall && pi.unusedSize+pi.usedSize < uint64(minPackSize) && pj.unusedSize+pj.usedSize >= uint64(minPackSize): + case opts.RepackSmall && pi.unusedSize+pi.usedSize < uint64(targetPackSize) && pj.unusedSize+pj.usedSize >= uint64(targetPackSize): return true - case opts.RepackSmall && pj.unusedSize+pj.usedSize < uint64(minPackSize) && pi.unusedSize+pi.usedSize >= uint64(minPackSize): + case opts.RepackSmall && pj.unusedSize+pj.usedSize < uint64(targetPackSize) && pi.unusedSize+pi.usedSize >= uint64(targetPackSize): return false } return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize @@ -561,7 +561,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption for _, p := range repackCandidates { reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter) reachedRepackSize := stats.size.repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes - packIsLargeEnough := !opts.RepackSmall || p.unusedSize+p.usedSize >= uint64(minPackSize) + packIsLargeEnough := !opts.RepackSmall || p.unusedSize+p.usedSize >= uint64(targetPackSize) switch { case reachedRepackSize: diff --git a/cmd/restic/global.go b/cmd/restic/global.go index d52dee34f..2173a22b7 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -63,7 +63,7 @@ type GlobalOptions struct { NoCache bool CleanupCache bool Compression repository.CompressionMode - MinPackSize uint + PackSize uint backend.TransportOptions limiter.Limits @@ -104,8 +104,8 @@ func init() { return nil }) - // parse min pack size from env, on error the default value will be used - minPackSize, _ := strconv.ParseUint(os.Getenv("RESTIC_MIN_PACKSIZE"), 10, 32) + // parse target pack size from env, on error the default value will be used + targetPackSize, _ := strconv.ParseUint(os.Getenv("RESTIC_PACK_SIZE"), 10, 32) f := cmdRoot.PersistentFlags() f.StringVarP(&globalOptions.Repo, "repo", "r", os.Getenv("RESTIC_REPOSITORY"), "`repository` to backup to or restore from (default: $RESTIC_REPOSITORY)") @@ -126,7 +126,7 @@ func init() { f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repository format version 2), one of (auto|off|max)") f.IntVar(&globalOptions.Limits.UploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.Limits.DownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)") - f.UintVar(&globalOptions.MinPackSize, "min-packsize", uint(minPackSize), "set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE)") + f.UintVar(&globalOptions.PackSize, "pack-size", uint(targetPackSize), "set target pack size in MiB. (default: $RESTIC_PACK_SIZE)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") // Use our "generate" command instead of the cobra provided "completion" command cmdRoot.CompletionOptions.DisableDefaultCmd = true @@ -448,7 +448,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { s, err := repository.New(be, repository.Options{ Compression: opts.Compression, - PackSize: opts.MinPackSize * 1024 * 1024, + PackSize: opts.PackSize * 1024 * 1024, }) if err != nil { return nil, err diff --git a/doc/040_backup.rst b/doc/040_backup.rst index 1a126f841..7ae04cde5 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -554,6 +554,7 @@ environment variables. The following lists these environment variables: RESTIC_CACHE_DIR Location of the cache directory RESTIC_COMPRESSION Compression mode (only available for repository format version 2) RESTIC_PROGRESS_FPS Frames per second by which the progress bar is updated + RESTIC_PACK_SIZE Target size for pack files TMPDIR Location for temporary files diff --git a/doc/047_tuning_backup_parameters.rst b/doc/047_tuning_backup_parameters.rst index b3d19512c..642847b22 100644 --- a/doc/047_tuning_backup_parameters.rst +++ b/doc/047_tuning_backup_parameters.rst @@ -59,8 +59,8 @@ upload connections, it is desirable to use larger pack sizes to reduce the numbe files in the repository and improve upload performance. Notable examples are OpenStack Swift and some Google Drive Team accounts, where there are hard limits on the total number of files. Larger pack sizes can also improve the backup speed for a repository -stored on a local HDD. This can be achieved by either using the ``--min-packsize`` option -or defining the ``$RESTIC_MIN_PACKSIZE`` environment variable. Restic currently defaults +stored on a local HDD. This can be achieved by either using the ``--pack-size`` option +or defining the ``$RESTIC_PACK_SIZE`` environment variable. Restic currently defaults to a 16 MiB pack size. The side effect of increasing the pack size is requiring more disk space for temporary pack diff --git a/doc/manual_rest.rst b/doc/manual_rest.rst index 317fb51d4..e17e5cd8e 100644 --- a/doc/manual_rest.rst +++ b/doc/manual_rest.rst @@ -56,7 +56,7 @@ Usage help is available: --key-hint key key ID of key to try decrypting first (default: $RESTIC_KEY_HINT) --limit-download int limits downloads to a maximum rate in KiB/s. (default: unlimited) --limit-upload int limits uploads to a maximum rate in KiB/s. (default: unlimited) - --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE) + --pack-size uint set target pack size in MiB. (default: $RESTIC_PACK_SIZE) --no-cache do not use a local cache --no-lock do not lock the repository, this allows some operations on read-only repositories -o, --option key=value set extended option (key=value, can be specified multiple times) @@ -129,7 +129,7 @@ command: --key-hint key key ID of key to try decrypting first (default: $RESTIC_KEY_HINT) --limit-download int limits downloads to a maximum rate in KiB/s. (default: unlimited) --limit-upload int limits uploads to a maximum rate in KiB/s. (default: unlimited) - --min-packsize uint set min pack size in MiB. (default: $RESTIC_MIN_PACKSIZE) + --pack-size uint set target pack size in MiB. (default: $RESTIC_PACK_SIZE) --no-cache do not use a local cache --no-lock do not lock the repository, this allows some operations on read-only repositories -o, --option key=value set extended option (key=value, can be specified multiple times) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 2b7750648..872b2d71e 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -143,8 +143,8 @@ func (r *Repository) Config() restic.Config { return r.cfg } -// MinPackSize return the minimum size of a pack file before uploading -func (r *Repository) MinPackSize() uint { +// PackSize return the target size of a pack file when uploading +func (r *Repository) PackSize() uint { return r.opts.PackSize } @@ -516,8 +516,8 @@ func (r *Repository) StartPackUploader(ctx context.Context, wg *errgroup.Group) innerWg, ctx := errgroup.WithContext(ctx) r.packerWg = innerWg r.uploader = newPackerUploader(ctx, innerWg, r, r.be.Connections()) - r.treePM = newPackerManager(r.key, restic.TreeBlob, r.MinPackSize(), r.uploader.QueuePacker) - r.dataPM = newPackerManager(r.key, restic.DataBlob, r.MinPackSize(), r.uploader.QueuePacker) + r.treePM = newPackerManager(r.key, restic.TreeBlob, r.PackSize(), r.uploader.QueuePacker) + r.dataPM = newPackerManager(r.key, restic.DataBlob, r.PackSize(), r.uploader.QueuePacker) wg.Go(func() error { return innerWg.Wait() diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 1e64289e5..36f5a73bf 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -25,7 +25,7 @@ type Repository interface { LookupBlobSize(ID, BlobType) (uint, bool) Config() Config - MinPackSize() uint + PackSize() uint // List calls the function fn for each file of type t in the repository. // When an error is returned by fn, processing stops and List() returns the From 324935cb8077a699700f7c8467b8f3349b548b3f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 3 Jul 2022 00:16:04 +0200 Subject: [PATCH 10/15] Only repack small files if there are multiple of them --- cmd/restic/cmd_prune.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 7a55ccc88..a86a9fde7 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -71,7 +71,7 @@ func addPruneOptions(c *cobra.Command) { f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')") f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)") f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable") - f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "also repack small packs") + f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "repack too small packs") f.BoolVar(&pruneOptions.RepackUncompressed, "repack-uncompressed", false, "repack all uncompressed data") } @@ -424,8 +424,10 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption repackPacks := restic.NewIDSet() var repackCandidates []packInfoWithID + var repackSmallCandidates []packInfoWithID repoVersion := repo.Config().Version - targetPackSize := repo.PackSize() + // consider files with at least 80% of the target size as large enough + targetPackSize := repo.PackSize() / 5 * 4 // loop over all packs and decide what to do bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed") @@ -482,8 +484,12 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption stats.packs.keep++ case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress && packIsLargeEnough: - // All blobs in pack are used and not mixed => keep pack! - stats.packs.keep++ + if packIsLargeEnough { + // All blobs in pack are used and not mixed => keep pack! + stats.packs.keep++ + } else { + repackSmallCandidates = append(repackSmallCandidates, packInfoWithID{ID: id, packInfo: p}) + } default: // all other packs are candidates for repacking @@ -526,6 +532,14 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption } } + if len(repackSmallCandidates) < 10 { + // too few small files to be worth the trouble, this also prevents endlessly repacking + // if there is just a single pack file below the target size + stats.packs.keep += uint(len(repackSmallCandidates)) + } else { + repackCandidates = append(repackCandidates, repackSmallCandidates...) + } + // Sort repackCandidates such that packs with highest ratio unused/used space are picked first. // This is equivalent to sorting by unused / total space. // Instead of unused[i] / used[i] > unused[j] / used[j] we use From 176b387d981f6f01f3b1ff80e38d5c24491c0c79 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 3 Jul 2022 00:18:44 +0200 Subject: [PATCH 11/15] Always repack very small pack files --- cmd/restic/cmd_prune.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index a86a9fde7..c3c135836 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -426,8 +426,12 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption var repackCandidates []packInfoWithID var repackSmallCandidates []packInfoWithID repoVersion := repo.Config().Version - // consider files with at least 80% of the target size as large enough - targetPackSize := repo.PackSize() / 5 * 4 + // only repack very small files by default + targetPackSize := repo.PackSize() / 25 + if opts.RepackSmall { + // consider files with at least 80% of the target size as large enough + targetPackSize = repo.PackSize() / 5 * 4 + } // loop over all packs and decide what to do bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed") @@ -469,8 +473,6 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // use a flag that pack must be compressed p.uncompressed = mustCompress - packIsLargeEnough := !opts.RepackSmall || packSize >= int64(targetPackSize) - // decide what to do switch { case p.usedBlobs == 0: @@ -483,8 +485,8 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // if this is a data pack and --repack-cacheable-only is set => keep pack! stats.packs.keep++ - case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress && packIsLargeEnough: - if packIsLargeEnough { + case p.unusedBlobs == 0 && p.tpe != restic.InvalidBlob && !mustCompress: + if packSize >= int64(targetPackSize) { // All blobs in pack are used and not mixed => keep pack! stats.packs.keep++ } else { @@ -544,7 +546,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption // This is equivalent to sorting by unused / total space. // Instead of unused[i] / used[i] > unused[j] / used[j] we use // unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64 - // Morover packs containing trees are sorted to the beginning + // Moreover packs containing trees and too small packs are sorted to the beginning sort.Slice(repackCandidates, func(i, j int) bool { pi := repackCandidates[i].packInfo pj := repackCandidates[j].packInfo @@ -553,9 +555,9 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption return true case pj.tpe != restic.DataBlob && pi.tpe == restic.DataBlob: return false - case opts.RepackSmall && pi.unusedSize+pi.usedSize < uint64(targetPackSize) && pj.unusedSize+pj.usedSize >= uint64(targetPackSize): + case pi.unusedSize+pi.usedSize < uint64(targetPackSize) && pj.unusedSize+pj.usedSize >= uint64(targetPackSize): return true - case opts.RepackSmall && pj.unusedSize+pj.usedSize < uint64(targetPackSize) && pi.unusedSize+pi.usedSize >= uint64(targetPackSize): + case pj.unusedSize+pj.usedSize < uint64(targetPackSize) && pi.unusedSize+pi.usedSize >= uint64(targetPackSize): return false } return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize @@ -575,7 +577,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, gopts GlobalOption for _, p := range repackCandidates { reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter) reachedRepackSize := stats.size.repack+p.unusedSize+p.usedSize >= opts.MaxRepackBytes - packIsLargeEnough := !opts.RepackSmall || p.unusedSize+p.usedSize >= uint64(targetPackSize) + packIsLargeEnough := p.unusedSize+p.usedSize >= uint64(targetPackSize) switch { case reachedRepackSize: From 7f3b2be1e8d855f64742a0c3fdf8417c66ac8f8e Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Wed, 13 Jul 2022 18:00:17 +0200 Subject: [PATCH 12/15] s3: Disable multipart uploads below 200MB --- internal/backend/s3/s3.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/backend/s3/s3.go b/internal/backend/s3/s3.go index 85c82161c..0b3816c06 100644 --- a/internal/backend/s3/s3.go +++ b/internal/backend/s3/s3.go @@ -291,6 +291,8 @@ func (be *Backend) Save(ctx context.Context, h restic.Handle, rd restic.RewindRe opts.ContentType = "application/octet-stream" // the only option with the high-level api is to let the library handle the checksum computation opts.SendContentMd5 = true + // only use multipart uploads for very large files + opts.PartSize = 200 * 1024 * 1024 debug.Log("PutObject(%v, %v, %v)", be.cfg.Bucket, objName, rd.Length()) info, err := be.client.PutObject(ctx, be.cfg.Bucket, objName, ioutil.NopCloser(rd), int64(rd.Length()), opts) From eaf43607f941b5d184750a671c3110743ba0cb1f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 16 Jul 2022 19:52:11 +0200 Subject: [PATCH 13/15] Add note that pack-size is not an exact limit --- cmd/restic/global.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/restic/global.go b/cmd/restic/global.go index 2173a22b7..45ddd74f7 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -126,7 +126,7 @@ func init() { f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repository format version 2), one of (auto|off|max)") f.IntVar(&globalOptions.Limits.UploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)") f.IntVar(&globalOptions.Limits.DownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)") - f.UintVar(&globalOptions.PackSize, "pack-size", uint(targetPackSize), "set target pack size in MiB. (default: $RESTIC_PACK_SIZE)") + f.UintVar(&globalOptions.PackSize, "pack-size", uint(targetPackSize), "set target pack size in MiB, created pack files may be larger (default: $RESTIC_PACK_SIZE)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") // Use our "generate" command instead of the cobra provided "completion" command cmdRoot.CompletionOptions.DisableDefaultCmd = true From 55a11c13965b4f7213967fe8a0f0732dd2890aad Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 16 Jul 2022 19:54:04 +0200 Subject: [PATCH 14/15] Reword prune --repack-small description --- cmd/restic/cmd_prune.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index c3c135836..676166f94 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -71,7 +71,7 @@ func addPruneOptions(c *cobra.Command) { f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')") f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)") f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable") - f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "repack too small packs") + f.BoolVar(&pruneOptions.RepackSmall, "repack-small", false, "repack pack files below 80%% of target pack size") f.BoolVar(&pruneOptions.RepackUncompressed, "repack-uncompressed", false, "repack all uncompressed data") } From 7266f07c87c39b1edf9a706635c05564d1eeabd2 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 23 Jul 2022 22:40:15 +0200 Subject: [PATCH 15/15] repository: StreamPack in parts if there are too large gaps For large pack sizes we might be only interested in the first and last blob of a pack file. Thus stream a pack file in multiple parts if the gaps between requested blobs grow too large. --- internal/repository/repository.go | 26 ++++++++++++++++++++++++++ internal/repository/repository_test.go | 21 ++++++++++++++------- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 872b2d71e..625ad9b16 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -831,6 +831,9 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte type BackendLoadFn func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error +// Skip sections with more than 4MB unused blobs +const maxUnusedRange = 4 * 1024 * 1024 + // StreamPack loads the listed blobs from the specified pack file. The plaintext blob is passed to // the handleBlobFn callback or an error if decryption failed or the blob hash does not match. In // case of download errors handleBlobFn might be called multiple times for the same blob. If the @@ -844,6 +847,29 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack sort.Slice(blobs, func(i, j int) bool { return blobs[i].Offset < blobs[j].Offset }) + + lowerIdx := 0 + lastPos := blobs[0].Offset + for i := 0; i < len(blobs); i++ { + if blobs[i].Offset < lastPos { + // don't wait for streamPackPart to fail + return errors.Errorf("overlapping blobs in pack %v", packID) + } + if blobs[i].Offset-lastPos > maxUnusedRange { + // load everything up to the skipped file section + err := streamPackPart(ctx, beLoad, key, packID, blobs[lowerIdx:i], handleBlobFn) + if err != nil { + return err + } + lowerIdx = i + } + lastPos = blobs[i].Offset + blobs[i].Length + } + // load remainder + return streamPackPart(ctx, beLoad, key, packID, blobs[lowerIdx:], handleBlobFn) +} + +func streamPackPart(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error { h := restic.Handle{Type: restic.PackFile, Name: packID.String(), ContainedBlobType: restic.DataBlob} dataStart := blobs[0].Offset diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index c0f96f7ad..b5b0ff92d 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -455,17 +455,19 @@ func testStreamPack(t *testing.T, version uint) { } blobSizes := []int{ + 5522811, 10, 5231, 18812, 123123, + 13522811, 12301, 892242, 28616, 13351, 252287, 188883, - 2522811, + 3522811, 18883, } @@ -481,6 +483,7 @@ func testStreamPack(t *testing.T, version uint) { packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key, compress) + loadCalls := 0 load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error { data := packfile @@ -495,6 +498,7 @@ func testStreamPack(t *testing.T, version uint) { } data = data[:length] + loadCalls++ return fn(bytes.NewReader(data)) @@ -504,19 +508,20 @@ func testStreamPack(t *testing.T, version uint) { t.Run("regular", func(t *testing.T) { tests := []struct { blobs []restic.Blob + calls int }{ - {packfileBlobs[1:2]}, - {packfileBlobs[2:5]}, - {packfileBlobs[2:8]}, + {packfileBlobs[1:2], 1}, + {packfileBlobs[2:5], 1}, + {packfileBlobs[2:8], 1}, {[]restic.Blob{ packfileBlobs[0], - packfileBlobs[8], packfileBlobs[4], - }}, + packfileBlobs[2], + }, 1}, {[]restic.Blob{ packfileBlobs[0], packfileBlobs[len(packfileBlobs)-1], - }}, + }, 2}, } for _, test := range tests { @@ -542,6 +547,7 @@ func testStreamPack(t *testing.T, version uint) { wantBlobs[blob.ID] = 1 } + loadCalls = 0 err = repository.StreamPack(ctx, load, &key, restic.ID{}, test.blobs, handleBlob) if err != nil { t.Fatal(err) @@ -550,6 +556,7 @@ func testStreamPack(t *testing.T, version uint) { if !cmp.Equal(wantBlobs, gotBlobs) { t.Fatal(cmp.Diff(wantBlobs, gotBlobs)) } + rtest.Equals(t, test.calls, loadCalls) }) } })