diff --git a/src/cmds/restic/cmd_cat.go b/src/cmds/restic/cmd_cat.go index d95e22740..47a7e06a2 100644 --- a/src/cmds/restic/cmd_cat.go +++ b/src/cmds/restic/cmd_cat.go @@ -99,7 +99,7 @@ func runCat(gopts GlobalOptions, args []string) error { return nil case "key": h := restic.Handle{Type: restic.KeyFile, Name: id.String()} - buf, err := backend.LoadAll(repo.Backend(), h, nil) + buf, err := backend.LoadAll(repo.Backend(), h) if err != nil { return err } @@ -150,7 +150,7 @@ func runCat(gopts GlobalOptions, args []string) error { switch tpe { case "pack": h := restic.Handle{Type: restic.DataFile, Name: id.String()} - buf, err := backend.LoadAll(repo.Backend(), h, nil) + buf, err := backend.LoadAll(repo.Backend(), h) if err != nil { return err } diff --git a/src/restic/archiver/archiver_duplication_test.go b/src/restic/archiver/archiver_duplication_test.go index ef59bb08c..c27045bc9 100644 --- a/src/restic/archiver/archiver_duplication_test.go +++ b/src/restic/archiver/archiver_duplication_test.go @@ -43,11 +43,11 @@ func forgetfulBackend() restic.Backend { return false, nil } - be.LoadFn = func(h restic.Handle, p []byte, off int64) (int, error) { - return 0, errors.New("not found") + be.LoadFn = func(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + return nil, errors.New("not found") } - be.SaveFn = func(h restic.Handle, p []byte) error { + be.SaveFn = func(h restic.Handle, rd io.Reader) error { return nil } diff --git a/src/restic/backend.go b/src/restic/backend.go index 37a840412..0db225d59 100644 --- a/src/restic/backend.go +++ b/src/restic/backend.go @@ -1,5 +1,7 @@ package restic +import "io" + // Backend is used to store and access data. type Backend interface { // Location returns a string that describes the type and location of the @@ -15,14 +17,14 @@ type Backend interface { // Close the backend Close() error - // Load returns the data stored in the backend for h at the given offset - // and saves it in p. Load has the same semantics as io.ReaderAt, except - // that a negative offset is also allowed. In this case it references a - // position relative to the end of the file (similar to Seek()). - Load(h Handle, p []byte, off int64) (int, error) - // Save stores the data in the backend under the given handle. - Save(h Handle, p []byte) error + Save(h Handle, rd io.Reader) error + + // Load returns a reader that yields the contents of the file at h at the + // given offset. If length is larger than zero, only a portion of the file + // is returned. rd must be closed after use. If an error is returned, the + // ReadCloser must be nil. + Load(h Handle, length int, offset int64) (io.ReadCloser, error) // Stat returns information about the File identified by h. Stat(h Handle) (FileInfo, error) diff --git a/src/restic/backend/local/backend_test.go b/src/restic/backend/local/backend_test.go index 8954dc83d..8607f01b7 100644 --- a/src/restic/backend/local/backend_test.go +++ b/src/restic/backend/local/backend_test.go @@ -51,13 +51,6 @@ func TestLocalBackendLoad(t *testing.T) { test.TestLoad(t) } -func TestLocalBackendLoadNegativeOffset(t *testing.T) { - if SkipMessage != "" { - t.Skip(SkipMessage) - } - test.TestLoadNegativeOffset(t) -} - func TestLocalBackendSave(t *testing.T) { if SkipMessage != "" { t.Skip(SkipMessage) diff --git a/src/restic/backend/local/local.go b/src/restic/backend/local/local.go index e09650e5c..4f7dbfdb8 100644 --- a/src/restic/backend/local/local.go +++ b/src/restic/backend/local/local.go @@ -101,58 +101,18 @@ func dirname(base string, t restic.FileType, name string) string { return filepath.Join(base, n) } -// Load returns the data stored in the backend for h at the given offset and -// saves it in p. Load has the same semantics as io.ReaderAt, with one -// exception: when off is lower than zero, it is treated as an offset relative -// to the end of the file. -func (b *Local) Load(h restic.Handle, p []byte, off int64) (n int, err error) { - debug.Log("Load %v, length %v at %v", h, len(p), off) - if err := h.Valid(); err != nil { - return 0, err - } - - f, err := fs.Open(filename(b.p, h.Type, h.Name)) - if err != nil { - return 0, errors.Wrap(err, "Open") - } - - defer func() { - e := f.Close() - if err == nil { - err = errors.Wrap(e, "Close") - } - }() - - switch { - case off > 0: - _, err = f.Seek(off, 0) - case off < 0: - _, err = f.Seek(off, 2) - } - - if err != nil { - return 0, errors.Wrap(err, "Seek") - } - - return io.ReadFull(f, p) -} - -// writeToTempfile saves p into a tempfile in tempdir. -func writeToTempfile(tempdir string, p []byte) (filename string, err error) { +// copyToTempfile saves p into a tempfile in tempdir. +func copyToTempfile(tempdir string, rd io.Reader) (filename string, err error) { tmpfile, err := ioutil.TempFile(tempdir, "temp-") if err != nil { return "", errors.Wrap(err, "TempFile") } - n, err := tmpfile.Write(p) + _, err = io.Copy(tmpfile, rd) if err != nil { return "", errors.Wrap(err, "Write") } - if n != len(p) { - return "", errors.New("not all bytes writen") - } - if err = tmpfile.Sync(); err != nil { return "", errors.Wrap(err, "Syncn") } @@ -166,14 +126,14 @@ func writeToTempfile(tempdir string, p []byte) (filename string, err error) { } // Save stores data in the backend at the handle. -func (b *Local) Save(h restic.Handle, p []byte) (err error) { - debug.Log("Save %v, length %v", h, len(p)) +func (b *Local) Save(h restic.Handle, rd io.Reader) (err error) { + debug.Log("Save %v", h) if err := h.Valid(); err != nil { return err } - tmpfile, err := writeToTempfile(filepath.Join(b.p, backend.Paths.Temp), p) - debug.Log("saved %v (%d bytes) to %v", h, len(p), tmpfile) + tmpfile, err := copyToTempfile(filepath.Join(b.p, backend.Paths.Temp), rd) + debug.Log("saved %v to %v", h, tmpfile) if err != nil { return err } @@ -210,6 +170,39 @@ func (b *Local) Save(h restic.Handle, p []byte) (err error) { return setNewFileMode(filename, fi) } +// Load returns a reader that yields the contents of the file at h at the +// given offset. If length is nonzero, only a portion of the file is +// returned. rd must be closed after use. +func (b *Local) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + debug.Log("Load %v, length %v, offset %v", h, length, offset) + if err := h.Valid(); err != nil { + return nil, err + } + + if offset < 0 { + return nil, errors.New("offset is negative") + } + + f, err := os.Open(filename(b.p, h.Type, h.Name)) + if err != nil { + return nil, err + } + + if offset > 0 { + _, err = f.Seek(offset, 0) + if err != nil { + f.Close() + return nil, err + } + } + + if length > 0 { + return backend.LimitReadCloser(f, int64(length)), nil + } + + return f, nil +} + // Stat returns information about a blob. func (b *Local) Stat(h restic.Handle) (restic.FileInfo, error) { debug.Log("Stat %v", h) diff --git a/src/restic/backend/mem/backend_test.go b/src/restic/backend/mem/backend_test.go index 6bf19580f..13e95f115 100644 --- a/src/restic/backend/mem/backend_test.go +++ b/src/restic/backend/mem/backend_test.go @@ -51,13 +51,6 @@ func TestMemBackendLoad(t *testing.T) { test.TestLoad(t) } -func TestMemBackendLoadNegativeOffset(t *testing.T) { - if SkipMessage != "" { - t.Skip(SkipMessage) - } - test.TestLoadNegativeOffset(t) -} - func TestMemBackendSave(t *testing.T) { if SkipMessage != "" { t.Skip(SkipMessage) diff --git a/src/restic/backend/mem/mem_backend.go b/src/restic/backend/mem/mem_backend.go index a40ad9936..76a00b7fe 100644 --- a/src/restic/backend/mem/mem_backend.go +++ b/src/restic/backend/mem/mem_backend.go @@ -1,10 +1,13 @@ package mem import ( + "bytes" "io" + "io/ioutil" "restic" "sync" + "restic/backend" "restic/errors" "restic/debug" @@ -52,48 +55,8 @@ func (be *MemoryBackend) Test(t restic.FileType, name string) (bool, error) { return false, nil } -// Load reads data from the backend. -func (be *MemoryBackend) Load(h restic.Handle, p []byte, off int64) (int, error) { - if err := h.Valid(); err != nil { - return 0, err - } - - be.m.Lock() - defer be.m.Unlock() - - if h.Type == restic.ConfigFile { - h.Name = "" - } - - debug.Log("get %v offset %v len %v", h, off, len(p)) - - if _, ok := be.data[entry{h.Type, h.Name}]; !ok { - return 0, errors.New("no such data") - } - - buf := be.data[entry{h.Type, h.Name}] - switch { - case off > int64(len(buf)): - return 0, errors.New("offset beyond end of file") - case off < -int64(len(buf)): - off = 0 - case off < 0: - off = int64(len(buf)) + off - } - - buf = buf[off:] - - n := copy(p, buf) - - if len(p) > len(buf) { - return n, io.ErrUnexpectedEOF - } - - return n, nil -} - // Save adds new Data to the backend. -func (be *MemoryBackend) Save(h restic.Handle, p []byte) error { +func (be *MemoryBackend) Save(h restic.Handle, rd io.Reader) error { if err := h.Valid(); err != nil { return err } @@ -109,14 +72,55 @@ func (be *MemoryBackend) Save(h restic.Handle, p []byte) error { return errors.New("file already exists") } - debug.Log("save %v bytes at %v", len(p), h) - buf := make([]byte, len(p)) - copy(buf, p) + buf, err := ioutil.ReadAll(rd) + if err != nil { + return err + } + be.data[entry{h.Type, h.Name}] = buf + debug.Log("saved %v bytes at %v", len(buf), h) return nil } +// Load returns a reader that yields the contents of the file at h at the +// given offset. If length is nonzero, only a portion of the file is +// returned. rd must be closed after use. +func (be *MemoryBackend) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + if err := h.Valid(); err != nil { + return nil, err + } + + be.m.Lock() + defer be.m.Unlock() + + if h.Type == restic.ConfigFile { + h.Name = "" + } + + debug.Log("Load %v offset %v len %v", h, offset, length) + + if offset < 0 { + return nil, errors.New("offset is negative") + } + + if _, ok := be.data[entry{h.Type, h.Name}]; !ok { + return nil, errors.New("no such data") + } + + buf := be.data[entry{h.Type, h.Name}] + if offset > int64(len(buf)) { + return nil, errors.New("offset beyond end of file") + } + + buf = buf[offset:] + if length > 0 && len(buf) > length { + buf = buf[:length] + } + + return backend.Closer{bytes.NewReader(buf)}, nil +} + // Stat returns information about a file in the backend. func (be *MemoryBackend) Stat(h restic.Handle) (restic.FileInfo, error) { be.m.Lock() diff --git a/src/restic/backend/rest/backend_test.go b/src/restic/backend/rest/backend_test.go index 9605396d6..4274bfcb1 100644 --- a/src/restic/backend/rest/backend_test.go +++ b/src/restic/backend/rest/backend_test.go @@ -51,13 +51,6 @@ func TestRestBackendLoad(t *testing.T) { test.TestLoad(t) } -func TestRestBackendLoadNegativeOffset(t *testing.T) { - if SkipMessage != "" { - t.Skip(SkipMessage) - } - test.TestLoadNegativeOffset(t) -} - func TestRestBackendSave(t *testing.T) { if SkipMessage != "" { t.Skip(SkipMessage) diff --git a/src/restic/backend/rest/rest.go b/src/restic/backend/rest/rest.go index f01854fd6..6ed05965f 100644 --- a/src/restic/backend/rest/rest.go +++ b/src/restic/backend/rest/rest.go @@ -1,7 +1,6 @@ package rest import ( - "bytes" "encoding/json" "fmt" "io" @@ -12,6 +11,7 @@ import ( "restic" "strings" + "restic/debug" "restic/errors" "restic/backend" @@ -19,6 +19,9 @@ import ( const connLimit = 10 +// make sure the rest backend implements restic.Backend +var _ restic.Backend = &restBackend{} + // restPath returns the path to the given resource. func restPath(url *url.URL, h restic.Handle) string { u := *url @@ -71,65 +74,14 @@ func (b *restBackend) Location() string { return b.url.String() } -// Load returns the data stored in the backend for h at the given offset -// and saves it in p. Load has the same semantics as io.ReaderAt. -func (b *restBackend) Load(h restic.Handle, p []byte, off int64) (n int, err error) { - if err := h.Valid(); err != nil { - return 0, err - } - - // invert offset - if off < 0 { - info, err := b.Stat(h) - if err != nil { - return 0, errors.Wrap(err, "Stat") - } - - if -off > info.Size { - off = 0 - } else { - off = info.Size + off - } - } - - req, err := http.NewRequest("GET", restPath(b.url, h), nil) - if err != nil { - return 0, errors.Wrap(err, "http.NewRequest") - } - req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", off, off+int64(len(p)))) - <-b.connChan - resp, err := b.client.Do(req) - b.connChan <- struct{}{} - - if resp != nil { - defer func() { - io.Copy(ioutil.Discard, resp.Body) - e := resp.Body.Close() - - if err == nil { - err = errors.Wrap(e, "Close") - } - }() - } - - if err != nil { - return 0, errors.Wrap(err, "client.Do") - } - if resp.StatusCode != 200 && resp.StatusCode != 206 { - return 0, errors.Errorf("unexpected HTTP response code %v", resp.StatusCode) - } - - return io.ReadFull(resp.Body, p) -} - // Save stores data in the backend at the handle. -func (b *restBackend) Save(h restic.Handle, p []byte) (err error) { +func (b *restBackend) Save(h restic.Handle, rd io.Reader) (err error) { if err := h.Valid(); err != nil { return err } <-b.connChan - resp, err := b.client.Post(restPath(b.url, h), "binary/octet-stream", bytes.NewReader(p)) + resp, err := b.client.Post(restPath(b.url, h), "binary/octet-stream", rd) b.connChan <- struct{}{} if resp != nil { @@ -154,6 +106,56 @@ func (b *restBackend) Save(h restic.Handle, p []byte) (err error) { return nil } +// Load returns a reader that yields the contents of the file at h at the +// given offset. If length is nonzero, only a portion of the file is +// returned. rd must be closed after use. +func (b *restBackend) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + debug.Log("Load %v, length %v, offset %v", h, length, offset) + if err := h.Valid(); err != nil { + return nil, err + } + + if offset < 0 { + return nil, errors.New("offset is negative") + } + + if length < 0 { + return nil, errors.Errorf("invalid length %d", length) + } + + req, err := http.NewRequest("GET", restPath(b.url, h), nil) + if err != nil { + return nil, errors.Wrap(err, "http.NewRequest") + } + + byteRange := fmt.Sprintf("bytes=%d-", offset) + if length > 0 { + byteRange = fmt.Sprintf("bytes=%d-%d", offset, offset+int64(length)-1) + } + req.Header.Add("Range", byteRange) + debug.Log("Load(%v) send range %v", h, byteRange) + + <-b.connChan + resp, err := b.client.Do(req) + b.connChan <- struct{}{} + + if err != nil { + if resp != nil { + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + } + return nil, errors.Wrap(err, "client.Do") + } + + if resp.StatusCode != 200 && resp.StatusCode != 206 { + io.Copy(ioutil.Discard, resp.Body) + resp.Body.Close() + return nil, errors.Errorf("unexpected HTTP response code %v", resp.StatusCode) + } + + return resp.Body, nil +} + // Stat returns information about a blob. func (b *restBackend) Stat(h restic.Handle) (restic.FileInfo, error) { if err := h.Valid(); err != nil { diff --git a/src/restic/backend/s3/backend_test.go b/src/restic/backend/s3/backend_test.go index 9fb4dd3fa..82eca2631 100644 --- a/src/restic/backend/s3/backend_test.go +++ b/src/restic/backend/s3/backend_test.go @@ -51,13 +51,6 @@ func TestS3BackendLoad(t *testing.T) { test.TestLoad(t) } -func TestS3BackendLoadNegativeOffset(t *testing.T) { - if SkipMessage != "" { - t.Skip(SkipMessage) - } - test.TestLoadNegativeOffset(t) -} - func TestS3BackendSave(t *testing.T) { if SkipMessage != "" { t.Skip(SkipMessage) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index 56a6833cc..00ca758eb 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -7,6 +7,7 @@ import ( "restic" "strings" + "restic/backend" "restic/errors" "github.com/minio/minio-go" @@ -73,86 +74,13 @@ func (be *s3) Location() string { return be.bucketname } -// Load returns the data stored in the backend for h at the given offset -// and saves it in p. Load has the same semantics as io.ReaderAt. -func (be s3) Load(h restic.Handle, p []byte, off int64) (n int, err error) { - var obj *minio.Object - - debug.Log("%v, offset %v, len %v", h, off, len(p)) - objName := be.s3path(h.Type, h.Name) - - <-be.connChan - defer func() { - be.connChan <- struct{}{} - }() - - obj, err = be.client.GetObject(be.bucketname, objName) - if err != nil { - debug.Log(" err %v", err) - return 0, errors.Wrap(err, "client.GetObject") - } - - // make sure that the object is closed properly. - defer func() { - e := obj.Close() - if err == nil { - err = errors.Wrap(e, "Close") - } - }() - - info, err := obj.Stat() - if err != nil { - return 0, errors.Wrap(err, "obj.Stat") - } - - // handle negative offsets - if off < 0 { - // if the negative offset is larger than the object itself, read from - // the beginning. - if -off > info.Size { - off = 0 - } else { - // otherwise compute the offset from the end of the file. - off = info.Size + off - } - } - - // return an error if the offset is beyond the end of the file - if off > info.Size { - return 0, errors.Wrap(io.EOF, "") - } - - var nextError error - - // manually create an io.ErrUnexpectedEOF - if off+int64(len(p)) > info.Size { - newlen := info.Size - off - p = p[:newlen] - - nextError = io.ErrUnexpectedEOF - - debug.Log(" capped buffer to %v byte", len(p)) - } - - n, err = obj.ReadAt(p, off) - if int64(n) == info.Size-off && errors.Cause(err) == io.EOF { - err = nil - } - - if err == nil { - err = nextError - } - - return n, err -} - // Save stores data in the backend at the handle. -func (be s3) Save(h restic.Handle, p []byte) (err error) { +func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { if err := h.Valid(); err != nil { return err } - debug.Log("%v with %d bytes", h, len(p)) + debug.Log("Save %v", h) objName := be.s3path(h.Type, h.Name) @@ -168,16 +96,94 @@ func (be s3) Save(h restic.Handle, p []byte) (err error) { be.connChan <- struct{}{} }() - debug.Log("PutObject(%v, %v, %v, %v)", - be.bucketname, objName, int64(len(p)), "binary/octet-stream") - n, err := be.client.PutObject(be.bucketname, objName, bytes.NewReader(p), "binary/octet-stream") + debug.Log("PutObject(%v, %v)", + be.bucketname, objName) + n, err := be.client.PutObject(be.bucketname, objName, rd, "binary/octet-stream") debug.Log("%v -> %v bytes, err %#v", objName, n, err) return errors.Wrap(err, "client.PutObject") } +// Load returns a reader that yields the contents of the file at h at the +// given offset. If length is nonzero, only a portion of the file is +// returned. rd must be closed after use. +func (be *s3) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + debug.Log("Load %v, length %v, offset %v", h, length, offset) + if err := h.Valid(); err != nil { + return nil, err + } + + if offset < 0 { + return nil, errors.New("offset is negative") + } + + if length < 0 { + return nil, errors.Errorf("invalid length %d", length) + } + + var obj *minio.Object + + objName := be.s3path(h.Type, h.Name) + + <-be.connChan + defer func() { + be.connChan <- struct{}{} + }() + + obj, err := be.client.GetObject(be.bucketname, objName) + if err != nil { + debug.Log(" err %v", err) + return nil, errors.Wrap(err, "client.GetObject") + } + + // if we're going to read the whole object, just pass it on. + if length == 0 { + debug.Log("Load %v: pass on object", h) + _, err = obj.Seek(offset, 0) + if err != nil { + _ = obj.Close() + return nil, errors.Wrap(err, "obj.Seek") + } + + return obj, nil + } + + // otherwise use a buffer with ReadAt + info, err := obj.Stat() + if err != nil { + _ = obj.Close() + return nil, errors.Wrap(err, "obj.Stat") + } + + if offset > info.Size { + _ = obj.Close() + return nil, errors.Errorf("offset larger than file size") + } + + l := int64(length) + if offset+l > info.Size { + l = info.Size - offset + } + + buf := make([]byte, l) + n, err := obj.ReadAt(buf, offset) + debug.Log("Load %v: use buffer with ReadAt: %v, %v", h, n, err) + if err == io.EOF { + debug.Log("Load %v: shorten buffer %v -> %v", h, len(buf), n) + buf = buf[:n] + err = nil + } + + if err != nil { + _ = obj.Close() + return nil, errors.Wrap(err, "obj.ReadAt") + } + + return backend.Closer{Reader: bytes.NewReader(buf)}, nil +} + // Stat returns information about a blob. -func (be s3) Stat(h restic.Handle) (bi restic.FileInfo, err error) { +func (be *s3) Stat(h restic.Handle) (bi restic.FileInfo, err error) { debug.Log("%v", h) objName := be.s3path(h.Type, h.Name) diff --git a/src/restic/backend/sftp/backend_test.go b/src/restic/backend/sftp/backend_test.go index c28dd8c99..a812f8cd0 100644 --- a/src/restic/backend/sftp/backend_test.go +++ b/src/restic/backend/sftp/backend_test.go @@ -51,13 +51,6 @@ func TestSftpBackendLoad(t *testing.T) { test.TestLoad(t) } -func TestSftpBackendLoadNegativeOffset(t *testing.T) { - if SkipMessage != "" { - t.Skip(SkipMessage) - } - test.TestLoadNegativeOffset(t) -} - func TestSftpBackendSave(t *testing.T) { if SkipMessage != "" { t.Skip(SkipMessage) diff --git a/src/restic/backend/sftp/sftp.go b/src/restic/backend/sftp/sftp.go index 95555441f..b7f0922b6 100644 --- a/src/restic/backend/sftp/sftp.go +++ b/src/restic/backend/sftp/sftp.go @@ -326,47 +326,9 @@ func (r *SFTP) dirname(t restic.FileType, name string) string { return Join(r.p, n) } -// Load returns the data stored in the backend for h at the given offset -// and saves it in p. Load has the same semantics as io.ReaderAt. -func (r *SFTP) Load(h restic.Handle, p []byte, off int64) (n int, err error) { - debug.Log("load %v, %d bytes, offset %v", h, len(p), off) - if err := r.clientError(); err != nil { - return 0, err - } - - if err := h.Valid(); err != nil { - return 0, err - } - - f, err := r.c.Open(r.filename(h.Type, h.Name)) - if err != nil { - return 0, errors.Wrap(err, "Open") - } - - defer func() { - e := f.Close() - if err == nil { - err = errors.Wrap(e, "Close") - } - }() - - switch { - case off > 0: - _, err = f.Seek(off, 0) - case off < 0: - _, err = f.Seek(off, 2) - } - - if err != nil { - return 0, errors.Wrap(err, "Seek") - } - - return io.ReadFull(f, p) -} - // Save stores data in the backend at the handle. -func (r *SFTP) Save(h restic.Handle, p []byte) (err error) { - debug.Log("save %v bytes to %v", h, len(p)) +func (r *SFTP) Save(h restic.Handle, rd io.Reader) (err error) { + debug.Log("save to %v", h) if err := r.clientError(); err != nil { return err } @@ -380,16 +342,12 @@ func (r *SFTP) Save(h restic.Handle, p []byte) (err error) { return err } - debug.Log("save %v (%d bytes) to %v", h, len(p), filename) - - n, err := tmpfile.Write(p) + n, err := io.Copy(tmpfile, rd) if err != nil { return errors.Wrap(err, "Write") } - if n != len(p) { - return errors.New("not all bytes writen") - } + debug.Log("saved %v (%d bytes) to %v", h, n, filename) err = tmpfile.Close() if err != nil { @@ -402,6 +360,39 @@ func (r *SFTP) Save(h restic.Handle, p []byte) (err error) { return err } +// Load returns a reader that yields the contents of the file at h at the +// given offset. If length is nonzero, only a portion of the file is +// returned. rd must be closed after use. +func (r *SFTP) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + debug.Log("Load %v, length %v, offset %v", h, length, offset) + if err := h.Valid(); err != nil { + return nil, err + } + + if offset < 0 { + return nil, errors.New("offset is negative") + } + + f, err := r.c.Open(r.filename(h.Type, h.Name)) + if err != nil { + return nil, err + } + + if offset > 0 { + _, err = f.Seek(offset, 0) + if err != nil { + _ = f.Close() + return nil, err + } + } + + if length > 0 { + return backend.LimitReadCloser(f, int64(length)), nil + } + + return f, nil +} + // Stat returns information about a blob. func (r *SFTP) Stat(h restic.Handle) (restic.FileInfo, error) { debug.Log("stat %v", h) diff --git a/src/restic/backend/test/backend_test.go b/src/restic/backend/test/backend_test.go index c577092fb..b495ce663 100644 --- a/src/restic/backend/test/backend_test.go +++ b/src/restic/backend/test/backend_test.go @@ -51,13 +51,6 @@ func TestTestBackendLoad(t *testing.T) { test.TestLoad(t) } -func TestTestBackendLoadNegativeOffset(t *testing.T) { - if SkipMessage != "" { - t.Skip(SkipMessage) - } - test.TestLoadNegativeOffset(t) -} - func TestTestBackendSave(t *testing.T) { if SkipMessage != "" { t.Skip(SkipMessage) diff --git a/src/restic/backend/test/tests.go b/src/restic/backend/test/tests.go index c79d45041..54583b0ce 100644 --- a/src/restic/backend/test/tests.go +++ b/src/restic/backend/test/tests.go @@ -4,13 +4,14 @@ import ( "bytes" "fmt" "io" + "io/ioutil" "math/rand" "reflect" "restic" "sort" + "strings" "testing" - "restic/errors" "restic/test" "restic/backend" @@ -152,12 +153,12 @@ func TestConfig(t testing.TB) { var testString = "Config" // create config and read it back - _, err := backend.LoadAll(b, restic.Handle{Type: restic.ConfigFile}, nil) + _, err := backend.LoadAll(b, restic.Handle{Type: restic.ConfigFile}) if err == nil { t.Fatalf("did not get expected error for non-existing config") } - err = b.Save(restic.Handle{Type: restic.ConfigFile}, []byte(testString)) + err = b.Save(restic.Handle{Type: restic.ConfigFile}, strings.NewReader(testString)) if err != nil { t.Fatalf("Save() error: %v", err) } @@ -166,7 +167,7 @@ func TestConfig(t testing.TB) { // same config for _, name := range []string{"", "foo", "bar", "0000000000000000000000000000000000000000000000000000000000000000"} { h := restic.Handle{Type: restic.ConfigFile, Name: name} - buf, err := backend.LoadAll(b, h, nil) + buf, err := backend.LoadAll(b, h) if err != nil { t.Fatalf("unable to read config with name %q: %v", name, err) } @@ -182,12 +183,12 @@ func TestLoad(t testing.TB) { b := open(t) defer close(t) - _, err := b.Load(restic.Handle{}, nil, 0) + _, err := b.Load(restic.Handle{}, 0, 0) if err == nil { t.Fatalf("Load() did not return an error for invalid handle") } - _, err = b.Load(restic.Handle{Type: restic.DataFile, Name: "foobar"}, nil, 0) + _, err = b.Load(restic.Handle{Type: restic.DataFile, Name: "foobar"}, 0, 0) if err == nil { t.Fatalf("Load() did not return an error for non-existing blob") } @@ -198,11 +199,20 @@ func TestLoad(t testing.TB) { id := restic.Hash(data) handle := restic.Handle{Type: restic.DataFile, Name: id.String()} - err = b.Save(handle, data) + err = b.Save(handle, bytes.NewReader(data)) if err != nil { t.Fatalf("Save() error: %v", err) } + rd, err := b.Load(handle, 100, -1) + if err == nil { + t.Fatalf("Load() returned no error for negative offset!") + } + + if rd != nil { + t.Fatalf("Load() returned a non-nil reader for negative offset!") + } + for i := 0; i < 50; i++ { l := rand.Intn(length + 2000) o := rand.Intn(length + 2000) @@ -215,154 +225,47 @@ func TestLoad(t testing.TB) { d = d[:0] } - if l > 0 && l < len(d) { - d = d[:l] - } - - buf := make([]byte, l) - n, err := b.Load(handle, buf, int64(o)) - - // if we requested data beyond the end of the file, require - // ErrUnexpectedEOF error - if l > len(d) { - if errors.Cause(err) != io.ErrUnexpectedEOF { - t.Errorf("Load(%d, %d) did not return io.ErrUnexpectedEOF", len(buf), int64(o)) - } - err = nil - buf = buf[:len(d)] - } - - if err != nil { - t.Errorf("Load(%d, %d): unexpected error: %v", len(buf), int64(o), err) - continue - } - - if n != len(buf) { - t.Errorf("Load(%d, %d): wrong length returned, want %d, got %d", - len(buf), int64(o), len(buf), n) - continue - } - - buf = buf[:n] - if !bytes.Equal(buf, d) { - t.Errorf("Load(%d, %d) returned wrong bytes", len(buf), int64(o)) - continue - } - } - - // test with negative offset - for i := 0; i < 50; i++ { - l := rand.Intn(length + 2000) - o := rand.Intn(length + 2000) - - d := data - if o < len(d) { - d = d[len(d)-o:] - } else { - o = 0 + getlen := l + if l >= len(d) && rand.Float32() >= 0.5 { + getlen = 0 } if l > 0 && l < len(d) { d = d[:l] } - buf := make([]byte, l) - n, err := b.Load(handle, buf, -int64(o)) - - // if we requested data beyond the end of the file, require - // ErrUnexpectedEOF error - if l > len(d) { - if errors.Cause(err) != io.ErrUnexpectedEOF { - t.Errorf("Load(%d, %d) did not return io.ErrUnexpectedEOF", len(buf), int64(o)) - continue - } - err = nil - buf = buf[:len(d)] - } - + rd, err := b.Load(handle, getlen, int64(o)) if err != nil { - t.Errorf("Load(%d, %d): unexpected error: %v", len(buf), int64(o), err) + t.Errorf("Load(%d, %d) returned unexpected error: %v", l, o, err) continue } - if n != len(buf) { - t.Errorf("Load(%d, %d): wrong length returned, want %d, got %d", - len(buf), int64(o), len(buf), n) + buf, err := ioutil.ReadAll(rd) + if err != nil { + t.Errorf("Load(%d, %d) ReadAll() returned unexpected error: %v", l, o, err) + continue + } + + if l <= len(d) && len(buf) != l { + t.Errorf("Load(%d, %d) wrong number of bytes read: want %d, got %d", l, o, l, len(buf)) + continue + } + + if l > len(d) && len(buf) != len(d) { + t.Errorf("Load(%d, %d) wrong number of bytes read for overlong read: want %d, got %d", l, o, l, len(buf)) continue } - buf = buf[:n] if !bytes.Equal(buf, d) { - t.Errorf("Load(%d, %d) returned wrong bytes", len(buf), int64(o)) + t.Errorf("Load(%d, %d) returned wrong bytes", l, o) continue } - } - - // load with a too-large buffer, this should return io.ErrUnexpectedEOF - buf := make([]byte, length+100) - n, err := b.Load(handle, buf, 0) - if n != length { - t.Errorf("wrong length for larger buffer returned, want %d, got %d", length, n) - } - - if errors.Cause(err) != io.ErrUnexpectedEOF { - t.Errorf("wrong error returned for larger buffer: want io.ErrUnexpectedEOF, got %#v", err) - } - - test.OK(t, b.Remove(restic.DataFile, id.String())) -} - -// TestLoadNegativeOffset tests the backend's Load function with negative offsets. -func TestLoadNegativeOffset(t testing.TB) { - b := open(t) - defer close(t) - - length := rand.Intn(1<<24) + 2000 - - data := test.Random(23, length) - id := restic.Hash(data) - - handle := restic.Handle{Type: restic.DataFile, Name: id.String()} - err := b.Save(handle, data) - if err != nil { - t.Fatalf("Save() error: %v", err) - } - - // test normal reads - for i := 0; i < 50; i++ { - l := rand.Intn(length + 2000) - o := -rand.Intn(length + 2000) - - buf := make([]byte, l) - n, err := b.Load(handle, buf, int64(o)) - - // if we requested data beyond the end of the file, require - // ErrUnexpectedEOF error - if len(buf) > -o { - if errors.Cause(err) != io.ErrUnexpectedEOF { - t.Errorf("Load(%d, %d) did not return io.ErrUnexpectedEOF", len(buf), o) - continue - } - err = nil - buf = buf[:-o] - } + err = rd.Close() if err != nil { - t.Errorf("Load(%d, %d) returned error: %v", len(buf), o, err) + t.Errorf("Load(%d, %d) rd.Close() returned unexpected error: %v", l, o, err) continue } - - if n != len(buf) { - t.Errorf("Load(%d, %d) returned short read, only got %d bytes", len(buf), o, n) - continue - } - - p := len(data) + o - if !bytes.Equal(buf, data[p:p+len(buf)]) { - t.Errorf("Load(%d, %d) returned wrong bytes", len(buf), o) - continue - } - } test.OK(t, b.Remove(restic.DataFile, id.String())) @@ -384,10 +287,10 @@ func TestSave(t testing.TB) { Type: restic.DataFile, Name: fmt.Sprintf("%s-%d", id, i), } - err := b.Save(h, data) + err := b.Save(h, bytes.NewReader(data)) test.OK(t, err) - buf, err := backend.LoadAll(b, h, nil) + buf, err := backend.LoadAll(b, h) test.OK(t, err) if len(buf) != len(data) { t.Fatalf("number of bytes does not match, want %v, got %v", len(data), len(buf)) @@ -430,13 +333,13 @@ func TestSaveFilenames(t testing.TB) { for i, test := range filenameTests { h := restic.Handle{Name: test.name, Type: restic.DataFile} - err := b.Save(h, []byte(test.data)) + err := b.Save(h, strings.NewReader(test.data)) if err != nil { t.Errorf("test %d failed: Save() returned %v", i, err) continue } - buf, err := backend.LoadAll(b, h, nil) + buf, err := backend.LoadAll(b, h) if err != nil { t.Errorf("test %d failed: Load() returned %v", i, err) continue @@ -466,7 +369,7 @@ var testStrings = []struct { func store(t testing.TB, b restic.Backend, tpe restic.FileType, data []byte) { id := restic.Hash(data) - err := b.Save(restic.Handle{Name: id.String(), Type: tpe}, data) + err := b.Save(restic.Handle{Name: id.String(), Type: tpe}, bytes.NewReader(data)) test.OK(t, err) } @@ -495,7 +398,7 @@ func TestBackend(t testing.TB) { test.Assert(t, err != nil, "blob data could be extracted before creation") // try to read not existing blob - _, err = b.Load(h, nil, 0) + _, err = b.Load(h, 0, 0) test.Assert(t, err != nil, "blob reader could be obtained before creation") // try to get string out, should fail @@ -510,7 +413,7 @@ func TestBackend(t testing.TB) { // test Load() h := restic.Handle{Type: tpe, Name: ts.id} - buf, err := backend.LoadAll(b, h, nil) + buf, err := backend.LoadAll(b, h) test.OK(t, err) test.Equals(t, ts.data, string(buf)) @@ -520,9 +423,18 @@ func TestBackend(t testing.TB) { length := end - start buf2 := make([]byte, length) - n, err := b.Load(h, buf2, int64(start)) + rd, err := b.Load(h, len(buf2), int64(start)) test.OK(t, err) - test.Equals(t, length, n) + n, err := io.ReadFull(rd, buf2) + test.OK(t, err) + test.Equals(t, len(buf2), n) + + remaining, err := io.Copy(ioutil.Discard, rd) + test.OK(t, err) + test.Equals(t, int64(0), remaining) + + test.OK(t, rd.Close()) + test.Equals(t, ts.data[start:end], string(buf2)) } @@ -530,7 +442,7 @@ func TestBackend(t testing.TB) { ts := testStrings[0] // create blob - err := b.Save(restic.Handle{Type: tpe, Name: ts.id}, []byte(ts.data)) + err := b.Save(restic.Handle{Type: tpe, Name: ts.id}, strings.NewReader(ts.data)) test.Assert(t, err != nil, "expected error, got %v", err) // remove and recreate @@ -543,7 +455,7 @@ func TestBackend(t testing.TB) { test.Assert(t, ok == false, "removed blob still present") // create blob - err = b.Save(restic.Handle{Type: tpe, Name: ts.id}, []byte(ts.data)) + err = b.Save(restic.Handle{Type: tpe, Name: ts.id}, strings.NewReader(ts.data)) test.OK(t, err) // list items diff --git a/src/restic/backend/utils.go b/src/restic/backend/utils.go index 82a899515..27d2b9ad5 100644 --- a/src/restic/backend/utils.go +++ b/src/restic/backend/utils.go @@ -2,29 +2,51 @@ package backend import ( "io" + "io/ioutil" "restic" - - "restic/errors" ) -// LoadAll reads all data stored in the backend for the handle. The buffer buf -// is resized to accomodate all data in the blob. Errors returned by be.Load() -// are passed on, except io.ErrUnexpectedEOF is silenced and nil returned -// instead, since it means this function is working properly. -func LoadAll(be restic.Backend, h restic.Handle, buf []byte) ([]byte, error) { - fi, err := be.Stat(h) +// LoadAll reads all data stored in the backend for the handle. +func LoadAll(be restic.Backend, h restic.Handle) (buf []byte, err error) { + rd, err := be.Load(h, 0, 0) if err != nil { - return nil, errors.Wrap(err, "Stat") + return nil, err } - if fi.Size > int64(len(buf)) { - buf = make([]byte, int(fi.Size)) - } + defer func() { + io.Copy(ioutil.Discard, rd) + e := rd.Close() + if err == nil { + err = e + } + }() - n, err := be.Load(h, buf, 0) - if errors.Cause(err) == io.ErrUnexpectedEOF { - err = nil - } - buf = buf[:n] - return buf, err + return ioutil.ReadAll(rd) +} + +// Closer wraps an io.Reader and adds a Close() method that does nothing. +type Closer struct { + io.Reader +} + +// Close is a no-op. +func (c Closer) Close() error { + return nil +} + +// LimitedReadCloser wraps io.LimitedReader and exposes the Close() method. +type LimitedReadCloser struct { + io.ReadCloser + io.Reader +} + +// Read reads data from the limited reader. +func (l *LimitedReadCloser) Read(p []byte) (int, error) { + return l.Reader.Read(p) +} + +// LimitReadCloser returns a new reader wraps r in an io.LimitReader, but also +// exposes the Close() method. +func LimitReadCloser(r io.ReadCloser, n int64) *LimitedReadCloser { + return &LimitedReadCloser{ReadCloser: r, Reader: io.LimitReader(r, n)} } diff --git a/src/restic/backend/utils_test.go b/src/restic/backend/utils_test.go index 59eed7089..2996cf494 100644 --- a/src/restic/backend/utils_test.go +++ b/src/restic/backend/utils_test.go @@ -21,10 +21,10 @@ func TestLoadAll(t *testing.T) { data := Random(23+i, rand.Intn(MiB)+500*KiB) id := restic.Hash(data) - err := b.Save(restic.Handle{Name: id.String(), Type: restic.DataFile}, data) + err := b.Save(restic.Handle{Name: id.String(), Type: restic.DataFile}, bytes.NewReader(data)) OK(t, err) - buf, err := backend.LoadAll(b, restic.Handle{Type: restic.DataFile, Name: id.String()}, nil) + buf, err := backend.LoadAll(b, restic.Handle{Type: restic.DataFile, Name: id.String()}) OK(t, err) if len(buf) != len(data) { @@ -46,11 +46,11 @@ func TestLoadSmallBuffer(t *testing.T) { data := Random(23+i, rand.Intn(MiB)+500*KiB) id := restic.Hash(data) - err := b.Save(restic.Handle{Name: id.String(), Type: restic.DataFile}, data) + err := b.Save(restic.Handle{Name: id.String(), Type: restic.DataFile}, bytes.NewReader(data)) OK(t, err) buf := make([]byte, len(data)-23) - buf, err = backend.LoadAll(b, restic.Handle{Type: restic.DataFile, Name: id.String()}, buf) + buf, err = backend.LoadAll(b, restic.Handle{Type: restic.DataFile, Name: id.String()}) OK(t, err) if len(buf) != len(data) { @@ -72,11 +72,11 @@ func TestLoadLargeBuffer(t *testing.T) { data := Random(23+i, rand.Intn(MiB)+500*KiB) id := restic.Hash(data) - err := b.Save(restic.Handle{Name: id.String(), Type: restic.DataFile}, data) + err := b.Save(restic.Handle{Name: id.String(), Type: restic.DataFile}, bytes.NewReader(data)) OK(t, err) buf := make([]byte, len(data)+100) - buf, err = backend.LoadAll(b, restic.Handle{Type: restic.DataFile, Name: id.String()}, buf) + buf, err = backend.LoadAll(b, restic.Handle{Type: restic.DataFile, Name: id.String()}) OK(t, err) if len(buf) != len(data) { diff --git a/src/restic/checker/checker.go b/src/restic/checker/checker.go index 600ec2e54..3b2f0787b 100644 --- a/src/restic/checker/checker.go +++ b/src/restic/checker/checker.go @@ -658,7 +658,7 @@ func (c *Checker) CountPacks() uint64 { func checkPack(r restic.Repository, id restic.ID) error { debug.Log("checking pack %v", id.Str()) h := restic.Handle{Type: restic.DataFile, Name: id.String()} - buf, err := backend.LoadAll(r.Backend(), h, nil) + buf, err := backend.LoadAll(r.Backend(), h) if err != nil { return err } diff --git a/src/restic/checker/checker_test.go b/src/restic/checker/checker_test.go index 26528d24a..bdd611471 100644 --- a/src/restic/checker/checker_test.go +++ b/src/restic/checker/checker_test.go @@ -1,6 +1,7 @@ package checker_test import ( + "io" "math/rand" "path/filepath" "sort" @@ -208,15 +209,35 @@ type errorBackend struct { ProduceErrors bool } -func (b errorBackend) Load(h restic.Handle, p []byte, off int64) (int, error) { - n, err := b.Backend.Load(h, p, off) +func (b errorBackend) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + rd, err := b.Backend.Load(h, length, offset) + if err != nil { + return rd, err + } if b.ProduceErrors { - induceError(p) + return errorReadCloser{rd}, err + } + + return rd, nil +} + +type errorReadCloser struct { + io.ReadCloser +} + +func (erd errorReadCloser) Read(p []byte) (int, error) { + n, err := erd.ReadCloser.Read(p) + if n > 0 { + induceError(p[:n]) } return n, err } +func (erd errorReadCloser) Close() error { + return erd.ReadCloser.Close() +} + // induceError flips a bit in the slice. func induceError(data []byte) { if rand.Float32() < 0.2 { @@ -266,7 +287,7 @@ func TestCheckerModifiedData(t *testing.T) { } for _, err := range checkData(chkr) { - t.Logf("struct error: %v", err) + t.Logf("data error: %v", err) errFound = true } diff --git a/src/restic/hashing/reader.go b/src/restic/hashing/reader.go new file mode 100644 index 000000000..a499f4a63 --- /dev/null +++ b/src/restic/hashing/reader.go @@ -0,0 +1,29 @@ +package hashing + +import ( + "hash" + "io" +) + +// Reader hashes all data read from the underlying reader. +type Reader struct { + r io.Reader + h hash.Hash +} + +// NewReader returns a new Reader that uses the hash h. +func NewReader(r io.Reader, h hash.Hash) *Reader { + return &Reader{ + h: h, + r: io.TeeReader(r, h), + } +} + +func (h *Reader) Read(p []byte) (int, error) { + return h.r.Read(p) +} + +// Sum returns the hash of the data read so far. +func (h *Reader) Sum(d []byte) []byte { + return h.h.Sum(d) +} diff --git a/src/restic/hashing/reader_test.go b/src/restic/hashing/reader_test.go new file mode 100644 index 000000000..d17f264de --- /dev/null +++ b/src/restic/hashing/reader_test.go @@ -0,0 +1,73 @@ +package hashing + +import ( + "bytes" + "crypto/rand" + "crypto/sha256" + "io" + "io/ioutil" + "testing" +) + +func TestReader(t *testing.T) { + tests := []int{5, 23, 2<<18 + 23, 1 << 20} + + for _, size := range tests { + data := make([]byte, size) + _, err := io.ReadFull(rand.Reader, data) + if err != nil { + t.Fatalf("ReadFull: %v", err) + } + + expectedHash := sha256.Sum256(data) + + rd := NewReader(bytes.NewReader(data), sha256.New()) + n, err := io.Copy(ioutil.Discard, rd) + if err != nil { + t.Fatal(err) + } + + if n != int64(size) { + t.Errorf("Reader: invalid number of bytes written: got %d, expected %d", + n, size) + } + + resultingHash := rd.Sum(nil) + + if !bytes.Equal(expectedHash[:], resultingHash) { + t.Errorf("Reader: hashes do not match: expected %02x, got %02x", + expectedHash, resultingHash) + } + } +} + +func BenchmarkReader(b *testing.B) { + buf := make([]byte, 1<<22) + _, err := io.ReadFull(rand.Reader, buf) + if err != nil { + b.Fatal(err) + } + + expectedHash := sha256.Sum256(buf) + + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + rd := NewReader(bytes.NewReader(buf), sha256.New()) + n, err := io.Copy(ioutil.Discard, rd) + if err != nil { + b.Fatal(err) + } + + if n != int64(len(buf)) { + b.Errorf("Reader: invalid number of bytes written: got %d, expected %d", + n, len(buf)) + } + + resultingHash := rd.Sum(nil) + if !bytes.Equal(expectedHash[:], resultingHash) { + b.Errorf("Reader: hashes do not match: expected %02x, got %02x", + expectedHash, resultingHash) + } + } +} diff --git a/src/restic/hashing/writer.go b/src/restic/hashing/writer.go new file mode 100644 index 000000000..2940a6271 --- /dev/null +++ b/src/restic/hashing/writer.go @@ -0,0 +1,31 @@ +package hashing + +import ( + "hash" + "io" +) + +// Writer transparently hashes all data while writing it to the underlying writer. +type Writer struct { + w io.Writer + h hash.Hash +} + +// NewWriter wraps the writer w and feeds all data written to the hash h. +func NewWriter(w io.Writer, h hash.Hash) *Writer { + return &Writer{ + h: h, + w: io.MultiWriter(w, h), + } +} + +// Write wraps the write method of the underlying writer and also hashes all data. +func (h *Writer) Write(p []byte) (int, error) { + n, err := h.w.Write(p) + return n, err +} + +// Sum returns the hash of all data written so far. +func (h *Writer) Sum(d []byte) []byte { + return h.h.Sum(d) +} diff --git a/src/restic/hashing/writer_test.go b/src/restic/hashing/writer_test.go new file mode 100644 index 000000000..46999f20f --- /dev/null +++ b/src/restic/hashing/writer_test.go @@ -0,0 +1,74 @@ +package hashing + +import ( + "bytes" + "crypto/rand" + "crypto/sha256" + "io" + "io/ioutil" + "testing" +) + +func TestWriter(t *testing.T) { + tests := []int{5, 23, 2<<18 + 23, 1 << 20} + + for _, size := range tests { + data := make([]byte, size) + _, err := io.ReadFull(rand.Reader, data) + if err != nil { + t.Fatalf("ReadFull: %v", err) + } + + expectedHash := sha256.Sum256(data) + + wr := NewWriter(ioutil.Discard, sha256.New()) + + n, err := io.Copy(wr, bytes.NewReader(data)) + if err != nil { + t.Fatal(err) + } + + if n != int64(size) { + t.Errorf("Writer: invalid number of bytes written: got %d, expected %d", + n, size) + } + + resultingHash := wr.Sum(nil) + + if !bytes.Equal(expectedHash[:], resultingHash) { + t.Errorf("Writer: hashes do not match: expected %02x, got %02x", + expectedHash, resultingHash) + } + } +} + +func BenchmarkWriter(b *testing.B) { + buf := make([]byte, 1<<22) + _, err := io.ReadFull(rand.Reader, buf) + if err != nil { + b.Fatal(err) + } + + expectedHash := sha256.Sum256(buf) + + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + wr := NewWriter(ioutil.Discard, sha256.New()) + n, err := io.Copy(wr, bytes.NewReader(buf)) + if err != nil { + b.Fatal(err) + } + + if n != int64(len(buf)) { + b.Errorf("Writer: invalid number of bytes written: got %d, expected %d", + n, len(buf)) + } + + resultingHash := wr.Sum(nil) + if !bytes.Equal(expectedHash[:], resultingHash) { + b.Errorf("Writer: hashes do not match: expected %02x, got %02x", + expectedHash, resultingHash) + } + } +} diff --git a/src/restic/id.go b/src/restic/id.go index 08cb6f64b..c64508a4e 100644 --- a/src/restic/id.go +++ b/src/restic/id.go @@ -114,3 +114,13 @@ func (id *ID) UnmarshalJSON(b []byte) error { return nil } + +// IDFromHash returns the ID for the hash. +func IDFromHash(hash []byte) (id ID) { + if len(hash) != idSize { + panic("invalid hash type, not enough/too many bytes") + } + + copy(id[:], hash) + return id +} diff --git a/src/restic/mock/backend.go b/src/restic/mock/backend.go index 5aadc849d..9c5504f1a 100644 --- a/src/restic/mock/backend.go +++ b/src/restic/mock/backend.go @@ -1,6 +1,7 @@ package mock import ( + "io" "restic" "restic/errors" @@ -9,8 +10,8 @@ import ( // Backend implements a mock backend. type Backend struct { CloseFn func() error - LoadFn func(h restic.Handle, p []byte, off int64) (int, error) - SaveFn func(h restic.Handle, p []byte) error + SaveFn func(h restic.Handle, rd io.Reader) error + LoadFn func(h restic.Handle, length int, offset int64) (io.ReadCloser, error) StatFn func(h restic.Handle) (restic.FileInfo, error) ListFn func(restic.FileType, <-chan struct{}) <-chan string RemoveFn func(restic.FileType, string) error @@ -37,22 +38,22 @@ func (m *Backend) Location() string { return m.LocationFn() } -// Load loads data from the backend. -func (m *Backend) Load(h restic.Handle, p []byte, off int64) (int, error) { - if m.LoadFn == nil { - return 0, errors.New("not implemented") - } - - return m.LoadFn(h, p, off) -} - // Save data in the backend. -func (m *Backend) Save(h restic.Handle, p []byte) error { +func (m *Backend) Save(h restic.Handle, rd io.Reader) error { if m.SaveFn == nil { return errors.New("not implemented") } - return m.SaveFn(h, p) + return m.SaveFn(h, rd) +} + +// Load loads data from the backend. +func (m *Backend) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) { + if m.LoadFn == nil { + return nil, errors.New("not implemented") + } + + return m.LoadFn(h, length, offset) } // Stat an object in the backend. diff --git a/src/restic/pack/pack_test.go b/src/restic/pack/pack_test.go index c797cb1c0..39cdbba66 100644 --- a/src/restic/pack/pack_test.go +++ b/src/restic/pack/pack_test.go @@ -126,7 +126,7 @@ func TestUnpackReadSeeker(t *testing.T) { id := restic.Hash(packData) handle := restic.Handle{Type: restic.DataFile, Name: id.String()} - OK(t, b.Save(handle, packData)) + OK(t, b.Save(handle, bytes.NewReader(packData))) verifyBlobs(t, bufs, k, restic.ReaderAt(b, handle), packSize) } @@ -139,6 +139,6 @@ func TestShortPack(t *testing.T) { id := restic.Hash(packData) handle := restic.Handle{Type: restic.DataFile, Name: id.String()} - OK(t, b.Save(handle, packData)) + OK(t, b.Save(handle, bytes.NewReader(packData))) verifyBlobs(t, bufs, k, restic.ReaderAt(b, handle), packSize) } diff --git a/src/restic/readerat.go b/src/restic/readerat.go index 7d36b3396..320d032a4 100644 --- a/src/restic/readerat.go +++ b/src/restic/readerat.go @@ -9,11 +9,27 @@ type backendReaderAt struct { h Handle } -func (brd backendReaderAt) ReadAt(p []byte, off int64) (n int, err error) { - return brd.be.Load(brd.h, p, off) +func (brd backendReaderAt) ReadAt(p []byte, offset int64) (n int, err error) { + return ReadAt(brd.be, brd.h, offset, p) } // ReaderAt returns an io.ReaderAt for a file in the backend. func ReaderAt(be Backend, h Handle) io.ReaderAt { return backendReaderAt{be: be, h: h} } + +// ReadAt reads from the backend handle h at the given position. +func ReadAt(be Backend, h Handle, offset int64, p []byte) (n int, err error) { + rd, err := be.Load(h, len(p), offset) + if err != nil { + return 0, err + } + + n, err = io.ReadFull(rd, p) + e := rd.Close() + if err == nil { + err = e + } + + return n, err +} diff --git a/src/restic/repository/key.go b/src/restic/repository/key.go index 3deeb9cb2..7ce9757aa 100644 --- a/src/restic/repository/key.go +++ b/src/restic/repository/key.go @@ -1,6 +1,7 @@ package repository import ( + "bytes" "encoding/json" "fmt" "os" @@ -146,7 +147,7 @@ func SearchKey(s *Repository, password string, maxKeys int) (*Key, error) { // LoadKey loads a key from the backend. func LoadKey(s *Repository, name string) (k *Key, err error) { h := restic.Handle{Type: restic.KeyFile, Name: name} - data, err := backend.LoadAll(s.be, h, nil) + data, err := backend.LoadAll(s.be, h) if err != nil { return nil, err } @@ -232,7 +233,7 @@ func AddKey(s *Repository, password string, template *crypto.Key) (*Key, error) Name: restic.Hash(buf).String(), } - err = s.be.Save(h, buf) + err = s.be.Save(h, bytes.NewReader(buf)) if err != nil { return nil, err } diff --git a/src/restic/repository/packer_manager.go b/src/restic/repository/packer_manager.go index c686effe6..8e9327e3e 100644 --- a/src/restic/repository/packer_manager.go +++ b/src/restic/repository/packer_manager.go @@ -1,6 +1,7 @@ package repository import ( + "crypto/sha256" "io" "io/ioutil" "os" @@ -8,6 +9,7 @@ import ( "sync" "restic/errors" + "restic/hashing" "restic/crypto" "restic/debug" @@ -17,15 +19,36 @@ import ( // Saver implements saving data in a backend. type Saver interface { - Save(h restic.Handle, jp []byte) error + Save(restic.Handle, io.Reader) error +} + +// Packer holds a pack.Packer together with a hash writer. +type Packer struct { + *pack.Packer + hw *hashing.Writer + tmpfile *os.File +} + +// Finalize finalizes the pack.Packer and then closes the tempfile. +func (p *Packer) Finalize() (uint, error) { + n, err := p.Packer.Finalize() + if err != nil { + return n, err + } + + if err = p.tmpfile.Close(); err != nil { + return n, err + } + + return n, nil } // packerManager keeps a list of open packs and creates new on demand. type packerManager struct { - be Saver - key *crypto.Key - pm sync.Mutex - packs []*pack.Packer + be Saver + key *crypto.Key + pm sync.Mutex + packers []*Packer pool sync.Pool } @@ -50,18 +73,18 @@ func newPackerManager(be Saver, key *crypto.Key) *packerManager { // findPacker returns a packer for a new blob of size bytes. Either a new one is // created or one is returned that already has some blobs. -func (r *packerManager) findPacker(size uint) (packer *pack.Packer, err error) { +func (r *packerManager) findPacker(size uint) (packer *Packer, err error) { r.pm.Lock() defer r.pm.Unlock() // search for a suitable packer - if len(r.packs) > 0 { + if len(r.packers) > 0 { debug.Log("searching packer for %d bytes\n", size) - for i, p := range r.packs { - if p.Size()+size < maxPackSize { + for i, p := range r.packers { + if p.Packer.Size()+size < maxPackSize { debug.Log("found packer %v", p) // remove from list - r.packs = append(r.packs[:i], r.packs[i+1:]...) + r.packers = append(r.packers[:i], r.packers[i+1:]...) return p, nil } } @@ -74,50 +97,43 @@ func (r *packerManager) findPacker(size uint) (packer *pack.Packer, err error) { return nil, errors.Wrap(err, "ioutil.TempFile") } - return pack.NewPacker(r.key, tmpfile), nil + hw := hashing.NewWriter(tmpfile, sha256.New()) + p := pack.NewPacker(r.key, hw) + packer = &Packer{ + Packer: p, + hw: hw, + tmpfile: tmpfile, + } + + return packer, nil } // insertPacker appends p to s.packs. -func (r *packerManager) insertPacker(p *pack.Packer) { +func (r *packerManager) insertPacker(p *Packer) { r.pm.Lock() defer r.pm.Unlock() - r.packs = append(r.packs, p) - debug.Log("%d packers\n", len(r.packs)) + r.packers = append(r.packers, p) + debug.Log("%d packers\n", len(r.packers)) } // savePacker stores p in the backend. -func (r *Repository) savePacker(p *pack.Packer) error { - debug.Log("save packer with %d blobs\n", p.Count()) - n, err := p.Finalize() +func (r *Repository) savePacker(p *Packer) error { + debug.Log("save packer with %d blobs\n", p.Packer.Count()) + _, err := p.Packer.Finalize() if err != nil { return err } - tmpfile := p.Writer().(*os.File) - f, err := fs.Open(tmpfile.Name()) + _, err = p.tmpfile.Seek(0, 0) if err != nil { - return errors.Wrap(err, "Open") + return errors.Wrap(err, "Seek") } - data := make([]byte, n) - m, err := io.ReadFull(f, data) - if err != nil { - return errors.Wrap(err, "ReadFul") - } - - if uint(m) != n { - return errors.Errorf("read wrong number of bytes from %v: want %v, got %v", tmpfile.Name(), n, m) - } - - if err = f.Close(); err != nil { - return errors.Wrap(err, "Close") - } - - id := restic.Hash(data) + id := restic.IDFromHash(p.hw.Sum(nil)) h := restic.Handle{Type: restic.DataFile, Name: id.String()} - err = r.be.Save(h, data) + err = r.be.Save(h, p.tmpfile) if err != nil { debug.Log("Save(%v) error: %v", h, err) return err @@ -125,13 +141,18 @@ func (r *Repository) savePacker(p *pack.Packer) error { debug.Log("saved as %v", h) - err = fs.Remove(tmpfile.Name()) + err = p.tmpfile.Close() + if err != nil { + return errors.Wrap(err, "close tempfile") + } + + err = fs.Remove(p.tmpfile.Name()) if err != nil { return errors.Wrap(err, "Remove") } // update blobs in the index - for _, b := range p.Blobs() { + for _, b := range p.Packer.Blobs() { debug.Log(" updating blob %v to pack %v", b.ID.Str(), id.Str()) r.idx.Store(restic.PackedBlob{ Blob: restic.Blob{ @@ -152,5 +173,5 @@ func (r *packerManager) countPacker() int { r.pm.Lock() defer r.pm.Unlock() - return len(r.packs) + return len(r.packers) } diff --git a/src/restic/repository/packer_manager_test.go b/src/restic/repository/packer_manager_test.go index bf6258428..465fbadcb 100644 --- a/src/restic/repository/packer_manager_test.go +++ b/src/restic/repository/packer_manager_test.go @@ -7,6 +7,7 @@ import ( "restic" "restic/backend/mem" "restic/crypto" + "restic/mock" "testing" ) @@ -46,27 +47,20 @@ func randomID(rd io.Reader) restic.ID { const maxBlobSize = 1 << 20 -func saveFile(t testing.TB, be Saver, filename string, n int) { +func saveFile(t testing.TB, be Saver, filename string, id restic.ID) { f, err := os.Open(filename) if err != nil { t.Fatal(err) } - data := make([]byte, n) - m, err := io.ReadFull(f, data) + h := restic.Handle{Type: restic.DataFile, Name: id.String()} + t.Logf("save file %v", h) - if m != n { - t.Fatalf("read wrong number of bytes from %v: want %v, got %v", filename, m, n) - } - - if err = f.Close(); err != nil { + if err = be.Save(h, f); err != nil { t.Fatal(err) } - h := restic.Handle{Type: restic.DataFile, Name: restic.Hash(data).String()} - - err = be.Save(h, data) - if err != nil { + if err = f.Close(); err != nil { t.Fatal(err) } @@ -105,13 +99,13 @@ func fillPacks(t testing.TB, rnd *randReader, be Saver, pm *packerManager, buf [ continue } - bytesWritten, err := packer.Finalize() + _, err = packer.Finalize() if err != nil { t.Fatal(err) } - tmpfile := packer.Writer().(*os.File) - saveFile(t, be, tmpfile.Name(), int(bytesWritten)) + packID := restic.IDFromHash(packer.hw.Sum(nil)) + saveFile(t, be, packer.tmpfile.Name(), packID) } return bytes @@ -119,27 +113,21 @@ func fillPacks(t testing.TB, rnd *randReader, be Saver, pm *packerManager, buf [ func flushRemainingPacks(t testing.TB, rnd *randReader, be Saver, pm *packerManager) (bytes int) { if pm.countPacker() > 0 { - for _, packer := range pm.packs { + for _, packer := range pm.packers { n, err := packer.Finalize() if err != nil { t.Fatal(err) } bytes += int(n) - tmpfile := packer.Writer().(*os.File) - saveFile(t, be, tmpfile.Name(), bytes) + packID := restic.IDFromHash(packer.hw.Sum(nil)) + saveFile(t, be, packer.tmpfile.Name(), packID) } } return bytes } -type fakeBackend struct{} - -func (f *fakeBackend) Save(h restic.Handle, data []byte) error { - return nil -} - func TestPackerManager(t *testing.T) { rnd := newRandReader(rand.NewSource(23)) @@ -157,17 +145,18 @@ func TestPackerManager(t *testing.T) { func BenchmarkPackerManager(t *testing.B) { rnd := newRandReader(rand.NewSource(23)) - be := &fakeBackend{} - pm := newPackerManager(be, crypto.NewRandomKey()) + be := &mock.Backend{ + SaveFn: func(restic.Handle, io.Reader) error { return nil }, + } blobBuf := make([]byte, maxBlobSize) t.ResetTimer() - bytes := 0 for i := 0; i < t.N; i++ { + bytes := 0 + pm := newPackerManager(be, crypto.NewRandomKey()) bytes += fillPacks(t, rnd, be, pm, blobBuf) + bytes += flushRemainingPacks(t, rnd, be, pm) + t.Logf("saved %d bytes", bytes) } - - bytes += flushRemainingPacks(t, rnd, be, pm) - t.Logf("saved %d bytes", bytes) } diff --git a/src/restic/repository/repack.go b/src/restic/repository/repack.go index 0a82a23f2..f58932ff6 100644 --- a/src/restic/repository/repack.go +++ b/src/restic/repository/repack.go @@ -1,11 +1,14 @@ package repository import ( - "bytes" + "crypto/sha256" "io" + "io/ioutil" + "os" "restic" "restic/crypto" "restic/debug" + "restic/hashing" "restic/pack" "restic/errors" @@ -18,30 +21,47 @@ import ( func Repack(repo restic.Repository, packs restic.IDSet, keepBlobs restic.BlobSet) (err error) { debug.Log("repacking %d packs while keeping %d blobs", len(packs), len(keepBlobs)) - buf := make([]byte, 0, maxPackSize) for packID := range packs { - // load the complete pack + // load the complete pack into a temp file h := restic.Handle{Type: restic.DataFile, Name: packID.String()} - l, err := repo.Backend().Load(h, buf[:cap(buf)], 0) - if errors.Cause(err) == io.ErrUnexpectedEOF { - err = nil - buf = buf[:l] + tempfile, err := ioutil.TempFile("", "restic-temp-repack-") + if err != nil { + return errors.Wrap(err, "TempFile") } + beRd, err := repo.Backend().Load(h, 0, 0) if err != nil { return err } - debug.Log("pack %v loaded (%d bytes)", packID.Str(), len(buf)) + defer beRd.Close() - blobs, err := pack.List(repo.Key(), bytes.NewReader(buf), int64(len(buf))) + hrd := hashing.NewReader(beRd, sha256.New()) + packLength, err := io.Copy(tempfile, hrd) + if err != nil { + return errors.Wrap(err, "Copy") + } + + hash := restic.IDFromHash(hrd.Sum(nil)) + debug.Log("pack %v loaded (%d bytes), hash %v", packID.Str(), packLength, hash.Str()) + + if !packID.Equal(hash) { + return errors.Errorf("hash does not match id: want %v, got %v", packID, hash) + } + + _, err = tempfile.Seek(0, 0) + if err != nil { + return errors.Wrap(err, "Seek") + } + + blobs, err := pack.List(repo.Key(), tempfile, packLength) if err != nil { return err } debug.Log("processing pack %v, blobs: %v", packID.Str(), len(blobs)) - var plaintext []byte + var buf []byte for _, entry := range blobs { h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} if !keepBlobs.Has(h) { @@ -50,21 +70,36 @@ func Repack(repo restic.Repository, packs restic.IDSet, keepBlobs restic.BlobSet debug.Log(" process blob %v", h) - ciphertext := buf[entry.Offset : entry.Offset+entry.Length] - plaintext = plaintext[:len(plaintext)] - if len(plaintext) < len(ciphertext) { - plaintext = make([]byte, len(ciphertext)) + buf = buf[:len(buf)] + if uint(len(buf)) < entry.Length { + buf = make([]byte, entry.Length) + } + buf = buf[:entry.Length] + + n, err := tempfile.ReadAt(buf, int64(entry.Offset)) + if err != nil { + return errors.Wrap(err, "ReadAt") } - debug.Log(" ciphertext %d, plaintext %d", len(plaintext), len(ciphertext)) + if n != len(buf) { + return errors.Errorf("read blob %v from %v: not enough bytes read, want %v, got %v", + h, tempfile.Name(), len(buf), n) + } - n, err := crypto.Decrypt(repo.Key(), plaintext, ciphertext) + n, err = crypto.Decrypt(repo.Key(), buf, buf) if err != nil { return err } - plaintext = plaintext[:n] - _, err = repo.SaveBlob(entry.Type, plaintext, entry.ID) + buf = buf[:n] + + id := restic.Hash(buf) + if !id.Equal(entry.ID) { + return errors.Errorf("read blob %v from %v: wrong data returned, hash is %v", + h, tempfile.Name(), id) + } + + _, err = repo.SaveBlob(entry.Type, buf, entry.ID) if err != nil { return err } @@ -73,6 +108,14 @@ func Repack(repo restic.Repository, packs restic.IDSet, keepBlobs restic.BlobSet keepBlobs.Delete(h) } + + if err = tempfile.Close(); err != nil { + return errors.Wrap(err, "Close") + } + + if err = os.Remove(tempfile.Name()); err != nil { + return errors.Wrap(err, "Remove") + } } if err := repo.Flush(); err != nil { diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index 948301cc5..3545f42de 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -54,7 +54,7 @@ func (r *Repository) LoadAndDecrypt(t restic.FileType, id restic.ID) ([]byte, er debug.Log("load %v with id %v", t, id.Str()) h := restic.Handle{Type: t, Name: id.String()} - buf, err := backend.LoadAll(r.be, h, nil) + buf, err := backend.LoadAll(r.be, h) if err != nil { debug.Log("error loading %v: %v", id.Str(), err) return nil, err @@ -97,7 +97,8 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by // load blob from pack h := restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()} plaintextBuf = plaintextBuf[:cap(plaintextBuf)] - n, err := r.be.Load(h, plaintextBuf, int64(blob.Offset)) + + n, err := restic.ReadAt(r.be, h, int64(blob.Offset), plaintextBuf) if err != nil { debug.Log("error loading blob %v: %v", blob, err) lastError = err @@ -220,7 +221,7 @@ func (r *Repository) SaveUnpacked(t restic.FileType, p []byte) (id restic.ID, er id = restic.Hash(ciphertext) h := restic.Handle{Type: t, Name: id.String()} - err = r.be.Save(h, ciphertext) + err = r.be.Save(h, bytes.NewReader(ciphertext)) if err != nil { debug.Log("error saving blob %v: %v", h, err) return restic.ID{}, err @@ -235,15 +236,15 @@ func (r *Repository) Flush() error { r.pm.Lock() defer r.pm.Unlock() - debug.Log("manually flushing %d packs", len(r.packs)) + debug.Log("manually flushing %d packs", len(r.packerManager.packers)) - for _, p := range r.packs { + for _, p := range r.packerManager.packers { err := r.savePacker(p) if err != nil { return err } } - r.packs = r.packs[:0] + r.packerManager.packers = r.packerManager.packers[:0] return nil }