From be0e53c07bc61a1817a934db6436241c8ac4c406 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 14 May 2017 00:15:17 +0200 Subject: [PATCH 1/8] tests: Add test for backend Save() from file --- src/restic/backend/test/tests.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/restic/backend/test/tests.go b/src/restic/backend/test/tests.go index 7c1031099..594acd412 100644 --- a/src/restic/backend/test/tests.go +++ b/src/restic/backend/test/tests.go @@ -293,7 +293,7 @@ func (s *Suite) TestSave(t *testing.T) { t.Fatal(err) } - if _, err = tmpfile.Seek(0, 0); err != nil { + if _, err = tmpfile.Seek(0, io.SeekStart); err != nil { t.Fatal(err) } @@ -306,11 +306,22 @@ func (s *Suite) TestSave(t *testing.T) { t.Fatal(err) } - if err = tmpfile.Close(); err != nil { + err = b.Remove(h) + if err != nil { + t.Fatalf("error removing item: %+v", err) + } + + // try again directly with the temp file + if _, err = tmpfile.Seek(588, io.SeekStart); err != nil { t.Fatal(err) } - if err = os.Remove(tmpfile.Name()); err != nil { + err = b.Save(h, tmpfile) + if err != nil { + t.Fatal(err) + } + + if err = tmpfile.Close(); err != nil { t.Fatal(err) } @@ -318,6 +329,10 @@ func (s *Suite) TestSave(t *testing.T) { if err != nil { t.Fatalf("error removing item: %+v", err) } + + if err = os.Remove(tmpfile.Name()); err != nil { + t.Fatal(err) + } } var filenameTests = []struct { From 1e0e6ee57386b8346bef1e212aac46e575645d75 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 13 May 2017 21:18:14 +0200 Subject: [PATCH 2/8] s3: Use low-level API with a Range header for Load benchmark old ns/op new ns/op delta BenchmarkBackendMinio/LoadFile-4 9213315 11001787 +19.41% BenchmarkBackendMinio/LoadPartialFile-4 4176619 3479707 -16.69% BenchmarkBackendMinio/LoadPartialFileOffset-4 4391521 3139214 -28.52% BenchmarkBackendS3/LoadFile-4 2886070905 2505907501 -13.17% BenchmarkBackendS3/LoadPartialFile-4 762702722 735694398 -3.54% BenchmarkBackendS3/LoadPartialFileOffset-4 789724328 1108989142 +40.43% benchmark old MB/s new MB/s speedup BenchmarkBackendMinio/LoadFile-4 1821.21 1525.15 0.84x BenchmarkBackendMinio/LoadPartialFile-4 1004.49 1205.67 1.20x BenchmarkBackendMinio/LoadPartialFileOffset-4 955.34 1336.45 1.40x BenchmarkBackendS3/LoadFile-4 5.81 6.70 1.15x BenchmarkBackendS3/LoadPartialFile-4 5.50 5.70 1.04x BenchmarkBackendS3/LoadPartialFileOffset-4 5.31 3.78 0.71x benchmark old allocs new allocs delta BenchmarkBackendMinio/LoadFile-4 406 204 -49.75% BenchmarkBackendMinio/LoadPartialFile-4 225 206 -8.44% BenchmarkBackendMinio/LoadPartialFileOffset-4 227 207 -8.81% BenchmarkBackendS3/LoadFile-4 600 388 -35.33% BenchmarkBackendS3/LoadPartialFile-4 416 302 -27.40% BenchmarkBackendS3/LoadPartialFileOffset-4 417 303 -27.34% benchmark old bytes new bytes delta BenchmarkBackendMinio/LoadFile-4 29475 13904 -52.83% BenchmarkBackendMinio/LoadPartialFile-4 4218838 13958 -99.67% BenchmarkBackendMinio/LoadPartialFileOffset-4 4219175 14332 -99.66% BenchmarkBackendS3/LoadFile-4 114152 97424 -14.65% BenchmarkBackendS3/LoadPartialFile-4 4265416 56212 -98.68% BenchmarkBackendS3/LoadPartialFileOffset-4 4266520 56308 -98.68% --- src/restic/backend/s3/s3.go | 94 +++++-------------------------------- 1 file changed, 12 insertions(+), 82 deletions(-) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index f16b3c594..301598d04 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -1,7 +1,7 @@ package s3 import ( - "bytes" + "fmt" "io" "path" "restic" @@ -139,96 +139,26 @@ func (be *s3) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, er return nil, errors.Errorf("invalid length %d", length) } - var obj *minio.Object - var size int64 - objName := be.Filename(h) // get token for connection <-be.connChan - obj, err := be.client.GetObject(be.bucketname, objName) - if err != nil { - debug.Log(" err %v", err) - - // return token - be.connChan <- struct{}{} - - return nil, errors.Wrap(err, "client.GetObject") + byteRange := fmt.Sprintf("bytes=%d-", offset) + if length > 0 { + byteRange = fmt.Sprintf("bytes=%d-%d", offset, offset+int64(length)-1) } + headers := minio.NewGetReqHeaders() + headers.Add("Range", byteRange) + debug.Log("Load(%v) send range %v", h, byteRange) - // if we're going to read the whole object, just pass it on. - if length == 0 { - debug.Log("Load %v: pass on object", h) + coreClient := minio.Core{be.client} + rd, _, err := coreClient.GetObject(be.bucketname, objName, headers) - _, err = obj.Seek(offset, 0) - if err != nil { - _ = obj.Close() + // return token + be.connChan <- struct{}{} - // return token - be.connChan <- struct{}{} - - return nil, errors.Wrap(err, "obj.Seek") - } - - rd := wrapReader{ - ReadCloser: obj, - f: func() { - debug.Log("Close()") - // return token - be.connChan <- struct{}{} - }, - } - return rd, nil - } - - defer func() { - // return token - be.connChan <- struct{}{} - }() - - // otherwise use a buffer with ReadAt - be.cacheMutex.RLock() - size, cacheHit := be.cacheObjSize[objName] - be.cacheMutex.RUnlock() - - if !cacheHit { - info, err := obj.Stat() - if err != nil { - _ = obj.Close() - return nil, errors.Wrap(err, "obj.Stat") - } - size = info.Size - be.cacheMutex.Lock() - be.cacheObjSize[objName] = size - be.cacheMutex.Unlock() - } - - if offset > size { - _ = obj.Close() - return nil, errors.New("offset larger than file size") - } - - l := int64(length) - if offset+l > size { - l = size - offset - } - - buf := make([]byte, l) - n, err := obj.ReadAt(buf, offset) - debug.Log("Load %v: use buffer with ReadAt: %v, %v", h, n, err) - if err == io.EOF { - debug.Log("Load %v: shorten buffer %v -> %v", h, len(buf), n) - buf = buf[:n] - err = nil - } - - if err != nil { - _ = obj.Close() - return nil, errors.Wrap(err, "obj.ReadAt") - } - - return backend.Closer{Reader: bytes.NewReader(buf)}, nil + return rd, err } // Stat returns information about a blob. From 9452f416bfb748f2ecb464c38c495a2c3f1bdf14 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 13 May 2017 23:17:10 +0200 Subject: [PATCH 3/8] s3: Use low level API for saving files benchmark old ns/op new ns/op delta BenchmarkBackendMinio/Save-4 184482294 40663344 -77.96% BenchmarkBackendS3/Save-4 35030825568 54475455819 +55.51% benchmark old MB/s new MB/s speedup BenchmarkBackendMinio/Save-4 90.95 412.64 4.54x BenchmarkBackendS3/Save-4 0.48 0.31 0.65x benchmark old allocs new allocs delta BenchmarkBackendMinio/Save-4 631 560 -11.25% BenchmarkBackendS3/Save-4 646 584 -9.60% benchmark old bytes new bytes delta BenchmarkBackendMinio/Save-4 66818060 50735 -99.92% BenchmarkBackendS3/Save-4 66834000 73024 -99.89% --- src/restic/backend/s3/s3.go | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index 301598d04..bf340e163 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -80,6 +80,10 @@ func (be *s3) Location() string { return be.bucketname } +type Sizer interface { + Size() int64 +} + // Save stores data in the backend at the handle. func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { if err := h.Valid(); err != nil { @@ -88,6 +92,13 @@ func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { objName := be.Filename(h) + var size int64 + if r, ok := rd.(Sizer); ok { + size = r.Size() + } else { + panic("Save() got passed a reader without a method to determine the data size") + } + debug.Log("Save %v at %v", h, objName) // Check key does not already exist @@ -98,14 +109,14 @@ func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { } <-be.connChan - defer func() { - be.connChan <- struct{}{} - }() - debug.Log("PutObject(%v, %v)", - be.bucketname, objName) - n, err := be.client.PutObject(be.bucketname, objName, rd, "binary/octet-stream") - debug.Log("%v -> %v bytes, err %#v", objName, n, err) + debug.Log("PutObject(%v, %v)", be.bucketname, objName) + coreClient := minio.Core{be.client} + info, err := coreClient.PutObject(be.bucketname, objName, size, rd, nil, nil, nil) + + // return token + be.connChan <- struct{}{} + debug.Log("%v -> %v bytes, err %#v", objName, info.Size, err) return errors.Wrap(err, "client.PutObject") } From 246ccf09b998e6af6006c5e40b05acccbf18342d Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 13 May 2017 23:55:22 +0200 Subject: [PATCH 4/8] s3: add getRemainingSize --- src/restic/backend/s3/s3.go | 39 +++++++++++--- src/restic/backend/s3/s3_internal_test.go | 66 +++++++++++++++++++++++ 2 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 src/restic/backend/s3/s3_internal_test.go diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index bf340e163..0a3fcccc1 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -3,6 +3,7 @@ package s3 import ( "fmt" "io" + "os" "path" "restic" "strings" @@ -84,6 +85,35 @@ type Sizer interface { Size() int64 } +type Lenner interface { + Len() int +} + +// getRemainingSize returns number of bytes remaining. If it is not possible to +// determine the size, panic() is called. +func getRemainingSize(rd io.Reader) (size int64, err error) { + if r, ok := rd.(Lenner); ok { + size = int64(r.Len()) + } else if r, ok := rd.(Sizer); ok { + size = r.Size() + } else if f, ok := rd.(*os.File); ok { + fi, err := f.Stat() + if err != nil { + return 0, err + } + + pos, err := f.Seek(0, io.SeekCurrent) + if err != nil { + return 0, err + } + + size = fi.Size() - pos + } else { + panic(fmt.Sprintf("Save() got passed a reader without a method to determine the data size, type is %T", rd)) + } + return size, nil +} + // Save stores data in the backend at the handle. func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { if err := h.Valid(); err != nil { @@ -91,12 +121,9 @@ func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { } objName := be.Filename(h) - - var size int64 - if r, ok := rd.(Sizer); ok { - size = r.Size() - } else { - panic("Save() got passed a reader without a method to determine the data size") + size, err := getRemainingSize(rd) + if err != nil { + return err } debug.Log("Save %v at %v", h, objName) diff --git a/src/restic/backend/s3/s3_internal_test.go b/src/restic/backend/s3/s3_internal_test.go new file mode 100644 index 000000000..3b0a7eb2c --- /dev/null +++ b/src/restic/backend/s3/s3_internal_test.go @@ -0,0 +1,66 @@ +package s3 + +import ( + "bytes" + "io" + "io/ioutil" + "os" + "restic/test" + "testing" +) + +func writeFile(t testing.TB, data []byte, offset int64) *os.File { + tempfile, err := ioutil.TempFile("", "restic-test-") + if err != nil { + t.Fatal(err) + } + + if err = os.Remove(tempfile.Name()); err != nil { + t.Fatal(err) + } + + if _, err = tempfile.Write(data); err != nil { + t.Fatal(err) + } + + if _, err = tempfile.Seek(offset, io.SeekStart); err != nil { + t.Fatal(err) + } + + return tempfile +} + +func TestGetRemainingSize(t *testing.T) { + length := 18 * 1123 + partialRead := 1005 + + data := test.Random(23, length) + + partReader := bytes.NewReader(data) + buf := make([]byte, partialRead) + _, _ = io.ReadFull(partReader, buf) + + partFileReader := writeFile(t, data, int64(partialRead)) + + var tests = []struct { + io.Reader + size int64 + }{ + {bytes.NewReader([]byte("foobar test")), 11}, + {partReader, int64(length - partialRead)}, + {partFileReader, int64(length - partialRead)}, + } + + for _, test := range tests { + t.Run("", func(t *testing.T) { + size, err := getRemainingSize(test.Reader) + if err != nil { + t.Fatal(err) + } + + if size != test.size { + t.Fatalf("invalid size returned, want %v, got %v", test.size, size) + } + }) + } +} From 0bd40bae6e7e65a1a8aa368f66239e0bda344bab Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 14 May 2017 00:09:49 +0200 Subject: [PATCH 5/8] s3: Prevent net/http client from closing the reader --- src/restic/backend/s3/s3.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index 0a3fcccc1..174146040 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -114,6 +114,17 @@ func getRemainingSize(rd io.Reader) (size int64, err error) { return size, nil } +// preventCloser wraps an io.Reader to run a function instead of the original Close() function. +type preventCloser struct { + io.Reader + f func() +} + +func (wr preventCloser) Close() error { + wr.f() + return nil +} + // Save stores data in the backend at the handle. func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { if err := h.Valid(); err != nil { @@ -137,6 +148,15 @@ func (be *s3) Save(h restic.Handle, rd io.Reader) (err error) { <-be.connChan + // wrap the reader so that net/http client cannot close the reader, return + // the token instead. + rd = preventCloser{ + Reader: rd, + f: func() { + debug.Log("Close()") + }, + } + debug.Log("PutObject(%v, %v)", be.bucketname, objName) coreClient := minio.Core{be.client} info, err := coreClient.PutObject(be.bucketname, objName, size, rd, nil, nil, nil) From 26c16b9fd39ac6512d926670a36320c88571aad8 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 14 May 2017 00:09:59 +0200 Subject: [PATCH 6/8] s3: Correctly return token in Load --- src/restic/backend/s3/s3.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index 174146040..56cf71249 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -212,11 +212,22 @@ func (be *s3) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, er coreClient := minio.Core{be.client} rd, _, err := coreClient.GetObject(be.bucketname, objName, headers) + if err != nil { + // return token + be.connChan <- struct{}{} + return nil, err + } - // return token - be.connChan <- struct{}{} + closeRd := wrapReader{ + ReadCloser: rd, + f: func() { + debug.Log("Close()") + // return token + be.connChan <- struct{}{} + }, + } - return rd, err + return closeRd, err } // Stat returns information about a blob. From 3b44b8713753bc5041d987828043a79f4b820aad Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 14 May 2017 20:34:22 +0200 Subject: [PATCH 7/8] s3: Remove file after usage in test --- src/restic/backend/s3/s3_internal_test.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/restic/backend/s3/s3_internal_test.go b/src/restic/backend/s3/s3_internal_test.go index 3b0a7eb2c..ce6c96036 100644 --- a/src/restic/backend/s3/s3_internal_test.go +++ b/src/restic/backend/s3/s3_internal_test.go @@ -15,10 +15,6 @@ func writeFile(t testing.TB, data []byte, offset int64) *os.File { t.Fatal(err) } - if err = os.Remove(tempfile.Name()); err != nil { - t.Fatal(err) - } - if _, err = tempfile.Write(data); err != nil { t.Fatal(err) } @@ -41,6 +37,15 @@ func TestGetRemainingSize(t *testing.T) { _, _ = io.ReadFull(partReader, buf) partFileReader := writeFile(t, data, int64(partialRead)) + defer func() { + if err := partFileReader.Close(); err != nil { + t.Fatal(err) + } + + if err := os.Remove(partFileReader.Name()); err != nil { + t.Fatal(err) + } + }() var tests = []struct { io.Reader From 250a45ab15587909133a0336b8ad55fa5e9bc68c Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 14 May 2017 20:36:26 +0200 Subject: [PATCH 8/8] s3: Move interfaces to function --- src/restic/backend/s3/s3.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/restic/backend/s3/s3.go b/src/restic/backend/s3/s3.go index 56cf71249..9afd13df5 100644 --- a/src/restic/backend/s3/s3.go +++ b/src/restic/backend/s3/s3.go @@ -81,17 +81,17 @@ func (be *s3) Location() string { return be.bucketname } -type Sizer interface { - Size() int64 -} - -type Lenner interface { - Len() int -} - // getRemainingSize returns number of bytes remaining. If it is not possible to // determine the size, panic() is called. func getRemainingSize(rd io.Reader) (size int64, err error) { + type Sizer interface { + Size() int64 + } + + type Lenner interface { + Len() int + } + if r, ok := rd.(Lenner); ok { size = int64(r.Len()) } else if r, ok := rd.(Sizer); ok {