diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 075edc769..6f3b7db12 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -8,7 +8,6 @@ import ( "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" - "github.com/restic/restic/internal/pack" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -233,7 +232,7 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB // iterate over all blobs in index to find out which blobs are duplicates for blob := range repo.Index().Each(ctx) { bh := blob.Handle() - size := uint64(pack.PackedSizeOfBlob(blob.Length)) + size := uint64(blob.Length) switch { case usedBlobs.Has(bh): // used blob, move to keepBlobs usedBlobs.Delete(bh) @@ -261,19 +260,28 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB indexPack := make(map[restic.ID]packInfo) + // save computed pack header size + for pid, hdrSize := range repo.Index().PackSize(ctx, true) { + // initialize tpe with NumBlobTypes to indicate it's not set + indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)} + } + // iterate over all blobs in index to generate packInfo for blob := range repo.Index().Each(ctx) { - ip, ok := indexPack[blob.PackID] - if !ok { - ip = packInfo{tpe: blob.Type, usedSize: pack.HeaderSize} + ip := indexPack[blob.PackID] + + // Set blob type if not yet set + if ip.tpe == restic.NumBlobTypes { + ip.tpe = blob.Type } + // mark mixed packs with "Invalid blob type" if ip.tpe != blob.Type { ip.tpe = restic.InvalidBlob } bh := blob.Handle() - size := uint64(pack.PackedSizeOfBlob(blob.Length)) + size := uint64(blob.Length) switch { case duplicateBlobs.Has(bh): // duplicate blob ip.usedSize += size diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_rebuild_index.go index 850c03dd2..e1fdcd7e4 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_rebuild_index.go @@ -1,7 +1,6 @@ package main import ( - "github.com/restic/restic/internal/pack" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -91,17 +90,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor } Verbosef("getting pack files to read...\n") - - // Compute size of each pack from index entries - packSizeFromIndex := make(map[restic.ID]int64) - for blob := range repo.Index().Each(ctx) { - size, ok := packSizeFromIndex[blob.PackID] - if !ok { - size = pack.HeaderSize - } - // update packSizeFromIndex - packSizeFromIndex[blob.PackID] = size + int64(pack.PackedSizeOfBlob(blob.Length)) - } + packSizeFromIndex := repo.Index().PackSize(ctx, false) err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error { size, ok := packSizeFromIndex[id] diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 1e31ba986..133485497 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -178,13 +178,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { c.masterIndex.MergeFinalIndexes() // compute pack size using index entries - for blob := range c.masterIndex.Each(ctx) { - size, ok := c.packs[blob.PackID] - if !ok { - size = pack.HeaderSize - } - c.packs[blob.PackID] = size + int64(pack.PackedSizeOfBlob(blob.Length)) - } + c.packs = c.masterIndex.PackSize(ctx, false) debug.Log("checking for duplicate packs") for packID := range c.packs { @@ -749,17 +743,17 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, size int6 return errors.Errorf("Pack size does not match, want %v, got %v", size, realSize) } - blobs, err := pack.List(r.Key(), packfile, size) + blobs, hdrSize, err := pack.List(r.Key(), packfile, size) if err != nil { return err } var errs []error var buf []byte - sizeFromBlobs := int64(pack.HeaderSize) // pack size computed only from blob information + sizeFromBlobs := uint(hdrSize) idx := r.Index() for i, blob := range blobs { - sizeFromBlobs += int64(pack.PackedSizeOfBlob(blob.Length)) + sizeFromBlobs += blob.Length debug.Log(" check blob %d: %v", i, blob) buf = buf[:cap(buf)] @@ -809,7 +803,7 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID, size int6 } } - if sizeFromBlobs != size { + if int64(sizeFromBlobs) != size { debug.Log("Pack size does not match, want %v, got %v", size, sizeFromBlobs) errs = append(errs, errors.Errorf("Pack size does not match, want %v, got %v", size, sizeFromBlobs)) } diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 2c39009b1..0c26e6a75 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -46,7 +46,7 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) return n, errors.Wrap(err, "Write") } -var entrySize = uint(binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{})) +var EntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{})) // headerEntry describes the format of header entries. It serves only as // documentation. @@ -88,7 +88,7 @@ func (p *Packer) Finalize() (uint, error) { bytesWritten += uint(hdrBytes) // write length - err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(entrySize)))) + err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(EntrySize)))) if err != nil { return 0, errors.Wrap(err, "binary.Write") } @@ -100,7 +100,7 @@ func (p *Packer) Finalize() (uint, error) { // makeHeader constructs the header for p. func (p *Packer) makeHeader() ([]byte, error) { - buf := make([]byte, 0, len(p.blobs)*int(entrySize)) + buf := make([]byte, 0, len(p.blobs)*int(EntrySize)) for _, b := range p.blobs { switch b.Type { @@ -151,7 +151,7 @@ func (p *Packer) String() string { var ( // we require at least one entry in the header, and one blob for a pack file - minFileSize = entrySize + crypto.Extension + uint(headerLengthSize) + minFileSize = EntrySize + crypto.Extension + uint(headerLengthSize) ) const ( @@ -171,7 +171,7 @@ const ( // the appropriate size. func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { var bufsize int - bufsize += max * int(entrySize) + bufsize += max * int(EntrySize) bufsize += crypto.Extension bufsize += headerLengthSize @@ -195,7 +195,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { err = InvalidFileError{Message: "header length is zero"} case hlen < crypto.Extension: err = InvalidFileError{Message: "header length is too small"} - case (hlen-crypto.Extension)%uint32(entrySize) != 0: + case (hlen-crypto.Extension)%uint32(EntrySize) != 0: err = InvalidFileError{Message: "header length is invalid"} case int64(hlen) > size-int64(headerLengthSize): err = InvalidFileError{Message: "header is larger than file"} @@ -206,7 +206,7 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) { return nil, 0, errors.Wrap(err, "readHeader") } - total := (int(hlen) - crypto.Extension) / int(entrySize) + total := (int(hlen) - crypto.Extension) / int(EntrySize) if total < max { // truncate to the beginning of the pack header b = b[len(b)-int(hlen):] @@ -252,52 +252,55 @@ func (e InvalidFileError) Error() string { return e.Message } -// List returns the list of entries found in a pack file. -func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, err error) { +// List returns the list of entries found in a pack file and the length of the +// header (including header size and crypto overhead) +func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdrSize uint32, err error) { buf, err := readHeader(rd, size) if err != nil { - return nil, err + return nil, 0, err } if len(buf) < k.NonceSize()+k.Overhead() { - return nil, errors.New("invalid header, too small") + return nil, 0, errors.New("invalid header, too small") } + hdrSize = headerLengthSize + uint32(len(buf)) + nonce, buf := buf[:k.NonceSize()], buf[k.NonceSize():] buf, err = k.Open(buf[:0], nonce, buf, nil) if err != nil { - return nil, err + return nil, 0, err } - entries = make([]restic.Blob, 0, uint(len(buf))/entrySize) + entries = make([]restic.Blob, 0, uint(len(buf))/EntrySize) pos := uint(0) for len(buf) > 0 { entry, err := parseHeaderEntry(buf) if err != nil { - return nil, err + return nil, 0, err } entry.Offset = pos entries = append(entries, entry) pos += entry.Length - buf = buf[entrySize:] + buf = buf[EntrySize:] } - return entries, nil + return entries, hdrSize, nil } // PackedSizeOfBlob returns the size a blob actually uses when saved in a pack func PackedSizeOfBlob(blobLength uint) uint { - return blobLength + entrySize + return blobLength + EntrySize } func parseHeaderEntry(p []byte) (b restic.Blob, err error) { - if uint(len(p)) < entrySize { + if uint(len(p)) < EntrySize { err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p)) return b, err } - p = p[:entrySize] + p = p[:EntrySize] switch p[0] { case 0: diff --git a/internal/pack/pack_internal_test.go b/internal/pack/pack_internal_test.go index 84b03adc2..6502e4a35 100644 --- a/internal/pack/pack_internal_test.go +++ b/internal/pack/pack_internal_test.go @@ -41,7 +41,7 @@ func TestParseHeaderEntry(t *testing.T) { buf.Reset() _ = binary.Write(buf, binary.LittleEndian, &h) - b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1]) + b, err = parseHeaderEntry(buf.Bytes()[:EntrySize-1]) rtest.Assert(t, err != nil, "no error for short input") } @@ -58,7 +58,7 @@ func (rd *countingReaderAt) ReadAt(p []byte, off int64) (n int, err error) { func TestReadHeaderEagerLoad(t *testing.T) { testReadHeader := func(dataSize, entryCount, expectedReadInvocationCount int) { - expectedHeader := rtest.Random(0, entryCount*int(entrySize)+crypto.Extension) + expectedHeader := rtest.Random(0, entryCount*int(EntrySize)+crypto.Extension) buf := &bytes.Buffer{} buf.Write(rtest.Random(0, dataSize)) // pack blobs data @@ -83,8 +83,8 @@ func TestReadHeaderEagerLoad(t *testing.T) { testReadHeader(100, eagerEntries+1, 2) // file size == eager header load size - eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension) - headerSize := int(1*entrySize) + crypto.Extension + eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension) + headerSize := int(1*EntrySize) + crypto.Extension dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0)) testReadHeader(dataSize-1, 1, 1) testReadHeader(dataSize, 1, 1) @@ -96,8 +96,8 @@ func TestReadHeaderEagerLoad(t *testing.T) { func TestReadRecords(t *testing.T) { testReadRecords := func(dataSize, entryCount, totalRecords int) { - totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension) - off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension) + totalHeader := rtest.Random(0, totalRecords*int(EntrySize)+crypto.Extension) + off := len(totalHeader) - (entryCount*int(EntrySize) + crypto.Extension) if off < 0 { off = 0 } @@ -127,8 +127,8 @@ func TestReadRecords(t *testing.T) { testReadRecords(100, eagerEntries, eagerEntries+1) // file size == eager header load size - eagerLoadSize := int((eagerEntries * entrySize) + crypto.Extension) - headerSize := int(1*entrySize) + crypto.Extension + eagerLoadSize := int((eagerEntries * EntrySize) + crypto.Extension) + headerSize := int(1*EntrySize) + crypto.Extension dataSize := eagerLoadSize - headerSize - binary.Size(uint32(0)) testReadRecords(dataSize-1, 1, 1) testReadRecords(dataSize, 1, 1) diff --git a/internal/pack/pack_test.go b/internal/pack/pack_test.go index 99755c36f..471e901c3 100644 --- a/internal/pack/pack_test.go +++ b/internal/pack/pack_test.go @@ -53,19 +53,18 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSi for _, buf := range bufs { written += len(buf.data) } - // header length - written += binary.Size(uint32(0)) - // header + header crypto - headerSize := len(bufs) * (binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{})) - written += restic.CiphertextLength(headerSize) + // header length + header + header crypto + headerSize := binary.Size(uint32(0)) + restic.CiphertextLength(len(bufs)*int(pack.EntrySize)) + written += headerSize // check length rtest.Equals(t, uint(written), packSize) // read and parse it again - entries, err := pack.List(k, rd, int64(packSize)) + entries, hdrSize, err := pack.List(k, rd, int64(packSize)) rtest.OK(t, err) rtest.Equals(t, len(entries), len(bufs)) + rtest.Equals(t, headerSize, int(hdrSize)) var buf []byte for i, b := range bufs { diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 041bbc669..a23296a7e 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/pack" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/ui/progress" "golang.org/x/sync/errgroup" @@ -111,6 +112,27 @@ func (mi *MasterIndex) Packs() restic.IDSet { return packs } +// PackSize returns the size of all packs computed by index information. +// If onlyHdr is set to true, only the size of the header is returned +// Note that this function only gives correct sizes, if there are no +// duplicates in the index. +func (mi *MasterIndex) PackSize(ctx context.Context, onlyHdr bool) map[restic.ID]int64 { + packSize := make(map[restic.ID]int64) + + for blob := range mi.Each(ctx) { + size, ok := packSize[blob.PackID] + if !ok { + size = pack.HeaderSize + } + if !onlyHdr { + size += int64(blob.Length) + } + packSize[blob.PackID] = size + int64(pack.EntrySize) + } + + return packSize +} + // Count returns the number of blobs of type t in the index. func (mi *MasterIndex) Count(t restic.BlobType) (n uint) { mi.idxMutex.RLock() diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 9304204e1..423f3c831 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -92,7 +92,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee for job := range processQueue { tempfile, packID, packLength := job.tempfile, job.hash, job.packLength - blobs, err := pack.List(repo.Key(), tempfile, packLength) + blobs, _, err := pack.List(repo.Key(), tempfile, packLength) if err != nil { return err } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index f40ce9097..ea79829c6 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -740,16 +740,11 @@ func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic } // ListPack returns the list of blobs saved in the pack id and the length of -// the file as stored in the backend. -func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, int64, error) { +// the the pack header. +func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, uint32, error) { h := restic.Handle{Type: restic.PackFile, Name: id.String()} - blobs, err := pack.List(r.Key(), restic.ReaderAt(ctx, r.Backend(), h), size) - if err != nil { - return nil, 0, err - } - - return blobs, size, nil + return pack.List(r.Key(), restic.ReaderAt(ctx, r.Backend(), h), size) } // Delete calls backend.Delete() if implemented, and returns an error diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 5efdfbc03..b9ba77171 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -32,7 +32,10 @@ type Repository interface { // // The function fn is called in the same Goroutine List() was called from. List(ctx context.Context, t FileType, fn func(ID, int64) error) error - ListPack(context.Context, ID, int64) ([]Blob, int64, error) + + // ListPack returns the list of blobs saved in the pack id and the length of + // the the pack header. + ListPack(context.Context, ID, int64) ([]Blob, uint32, error) Flush(context.Context) error @@ -63,6 +66,7 @@ type MasterIndex interface { Lookup(ID, BlobType) []PackedBlob Count(BlobType) uint Packs() IDSet + PackSize(ctx context.Context, onlyHdr bool) map[ID]int64 // Each returns a channel that yields all blobs known to the index. When // the context is cancelled, the background goroutine terminates. This