From 3e0acf13956c63bae371873b19504a726152c264 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Wed, 18 Nov 2020 12:36:06 +0100 Subject: [PATCH 1/5] restore: Don't save (part of) pack in memory --- internal/restorer/filerestorer.go | 121 +++++++++++++++++------------- 1 file changed, 67 insertions(+), 54 deletions(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 709301d82..6a2dfbd20 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -1,11 +1,12 @@ package restorer import ( - "bytes" + "bufio" "context" "io" "math" "path/filepath" + "sort" "sync" "github.com/restic/restic/internal/crypto" @@ -179,6 +180,8 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { return nil } +const maxBufferSize = 4 * 1024 * 1024 + func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { // calculate pack byte range and blob->[]files->[]offsets mappings @@ -226,18 +229,12 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { } } - packData := make([]byte, int(end-start)) - - h := restic.Handle{Type: restic.PackFile, Name: pack.id.String()} - err := r.packLoader(ctx, h, int(end-start), start, func(rd io.Reader) error { - l, err := io.ReadFull(rd, packData) - if err != nil { - return err - } - if l != len(packData) { - return errors.Errorf("unexpected pack size: expected %d but got %d", len(packData), l) - } - return nil + sortedBlobs := make([]restic.ID, 0, len(blobs)) + for blobID := range blobs { + sortedBlobs = append(sortedBlobs, blobID) + } + sort.Slice(sortedBlobs, func(i, j int) bool { + return blobs[sortedBlobs[i]].offset < blobs[sortedBlobs[j]].offset }) markFileError := func(file *fileInfo, err error) { @@ -248,6 +245,61 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { } } + h := restic.Handle{Type: restic.PackFile, Name: pack.id.String()} + err := r.packLoader(ctx, h, int(end-start), start, func(rd io.Reader) error { + bufferSize := int(end - start) + if bufferSize > maxBufferSize { + bufferSize = maxBufferSize + } + BufRd := bufio.NewReaderSize(rd, bufferSize) + currentBlobEnd := start + for _, blobID := range sortedBlobs { + blob := blobs[blobID] + _, err := BufRd.Discard(int(blob.offset - currentBlobEnd)) + if err != nil { + return err + } + blobData, err := r.loadBlob(BufRd, blobID, blob.length) + if err != nil { + for file := range blob.files { + markFileError(file, err) + } + continue + } + currentBlobEnd = blob.offset + int64(blob.length) + for file, offsets := range blob.files { + for _, offset := range offsets { + writeToFile := func() error { + // this looks overly complicated and needs explanation + // two competing requirements: + // - must create the file once and only once + // - should allow concurrent writes to the file + // so write the first blob while holding file lock + // write other blobs after releasing the lock + file.lock.Lock() + create := file.flags&fileProgress == 0 + createSize := int64(-1) + if create { + defer file.lock.Unlock() + file.flags |= fileProgress + createSize = file.size + } else { + file.lock.Unlock() + } + return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) + } + err := writeToFile() + if err != nil { + markFileError(file, err) + break + } + } + } + } + + return nil + }) + if err != nil { for file := range pack.files { markFileError(file, err) @@ -255,53 +307,14 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { return } - rd := bytes.NewReader(packData) - - for blobID, blob := range blobs { - blobData, err := r.loadBlob(rd, blobID, blob.offset-start, blob.length) - if err != nil { - for file := range blob.files { - markFileError(file, err) - } - continue - } - for file, offsets := range blob.files { - for _, offset := range offsets { - writeToFile := func() error { - // this looks overly complicated and needs explanation - // two competing requirements: - // - must create the file once and only once - // - should allow concurrent writes to the file - // so write the first blob while holding file lock - // write other blobs after releasing the lock - file.lock.Lock() - create := file.flags&fileProgress == 0 - createSize := int64(-1) - if create { - defer file.lock.Unlock() - file.flags |= fileProgress - createSize = file.size - } else { - file.lock.Unlock() - } - return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) - } - err := writeToFile() - if err != nil { - markFileError(file, err) - break - } - } - } - } } -func (r *fileRestorer) loadBlob(rd io.ReaderAt, blobID restic.ID, offset int64, length int) ([]byte, error) { +func (r *fileRestorer) loadBlob(rd io.Reader, blobID restic.ID, length int) ([]byte, error) { // TODO reconcile with Repository#loadBlob implementation buf := make([]byte, length) - n, err := rd.ReadAt(buf, offset) + n, err := rd.Read(buf) if err != nil { return nil, err } From 07b3f65a6f229949bebf1fc89460e089a25b9095 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Thu, 19 Nov 2020 13:29:44 +0100 Subject: [PATCH 2/5] filesrestorer: Re-use buffer --- internal/restorer/filerestorer.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 6a2dfbd20..7e9149e36 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -253,13 +253,14 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { } BufRd := bufio.NewReaderSize(rd, bufferSize) currentBlobEnd := start + var blobData, buf []byte for _, blobID := range sortedBlobs { blob := blobs[blobID] _, err := BufRd.Discard(int(blob.offset - currentBlobEnd)) if err != nil { return err } - blobData, err := r.loadBlob(BufRd, blobID, blob.length) + blobData, buf, err = r.loadBlob(BufRd, blobID, blob.length, buf) if err != nil { for file := range blob.files { markFileError(file, err) @@ -309,31 +310,35 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { } -func (r *fileRestorer) loadBlob(rd io.Reader, blobID restic.ID, length int) ([]byte, error) { +func (r *fileRestorer) loadBlob(rd io.Reader, blobID restic.ID, length int, buf []byte) ([]byte, []byte, error) { // TODO reconcile with Repository#loadBlob implementation - buf := make([]byte, length) + if cap(buf) < length { + buf = make([]byte, length) + } else { + buf = buf[:length] + } n, err := rd.Read(buf) if err != nil { - return nil, err + return nil, nil, err } if n != length { - return nil, errors.Errorf("error loading blob %v: wrong length returned, want %d, got %d", blobID.Str(), length, n) + return nil, nil, errors.Errorf("error loading blob %v: wrong length returned, want %d, got %d", blobID.Str(), length, n) } // decrypt nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():] plaintext, err := r.key.Open(ciphertext[:0], nonce, ciphertext, nil) if err != nil { - return nil, errors.Errorf("decrypting blob %v failed: %v", blobID, err) + return nil, nil, errors.Errorf("decrypting blob %v failed: %v", blobID, err) } // check hash if !restic.Hash(plaintext).Equal(blobID) { - return nil, errors.Errorf("blob %v returned invalid hash", blobID) + return nil, nil, errors.Errorf("blob %v returned invalid hash", blobID) } - return plaintext, nil + return plaintext, buf, nil } From 7409225fa886c4241e50173b72c979aadba9771b Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Fri, 20 Nov 2020 13:15:25 +0100 Subject: [PATCH 3/5] Add filerestorer test where only parts of pack are used --- internal/restorer/filerestorer_test.go | 48 +++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index 2058a6c4d..75433cdef 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -152,16 +152,25 @@ func newTestRepo(content []TestFile) *TestRepo { return repo } -func restoreAndVerify(t *testing.T, tempdir string, content []TestFile) { +func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) { repo := newTestRepo(content) r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup) - r.files = repo.files + + if files == nil { + r.files = repo.files + } else { + for _, file := range repo.files { + if files[file.location] { + r.files = append(r.files, file) + } + } + } err := r.restoreFiles(context.TODO()) rtest.OK(t, err) - for _, file := range repo.files { + for _, file := range r.files { target := r.targetPath(file.location) data, err := ioutil.ReadFile(target) if err != nil { @@ -203,5 +212,36 @@ func TestFileRestorerBasic(t *testing.T) { {"data3-1", "pack3-1"}, }, }, - }) + }, nil) +} + +func TestFileRestorerPackSkip(t *testing.T) { + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + files := make(map[string]bool) + files["file2"] = true + + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1"}, + {"data1-2", "pack1"}, + {"data1-3", "pack1"}, + {"data1-4", "pack1"}, + {"data1-5", "pack1"}, + {"data1-6", "pack1"}, + }, + }, + { + name: "file2", + blobs: []TestBlob{ + // file is contained in pack1 but need pack parts to be skipped + {"data1-2", "pack1"}, + {"data1-4", "pack1"}, + {"data1-6", "pack1"}, + }, + }, + }, files) } From 34a33565c846cd84d3b580181d9b4bf367366f60 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Fri, 1 Jan 2021 08:06:04 +0100 Subject: [PATCH 4/5] Fix loadBlob in filerestorer --- internal/restorer/filerestorer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 7e9149e36..0b245569c 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -319,7 +319,7 @@ func (r *fileRestorer) loadBlob(rd io.Reader, blobID restic.ID, length int, buf buf = buf[:length] } - n, err := rd.Read(buf) + n, err := io.ReadFull(rd, buf) if err != nil { return nil, nil, err } From 69d5b4c36b7ceeac8a3b4d5653fc17b25de6e67c Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 3 Jan 2021 13:55:59 +0100 Subject: [PATCH 5/5] restorer: lower-case variable name --- internal/restorer/filerestorer.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 0b245569c..92ba39e77 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -251,16 +251,16 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { if bufferSize > maxBufferSize { bufferSize = maxBufferSize } - BufRd := bufio.NewReaderSize(rd, bufferSize) + bufRd := bufio.NewReaderSize(rd, bufferSize) currentBlobEnd := start var blobData, buf []byte for _, blobID := range sortedBlobs { blob := blobs[blobID] - _, err := BufRd.Discard(int(blob.offset - currentBlobEnd)) + _, err := bufRd.Discard(int(blob.offset - currentBlobEnd)) if err != nil { return err } - blobData, buf, err = r.loadBlob(BufRd, blobID, blob.length, buf) + blobData, buf, err = r.loadBlob(bufRd, blobID, blob.length, buf) if err != nil { for file := range blob.files { markFileError(file, err)