diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 76801dea4..e0273122e 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -242,11 +242,26 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB Verbosef("searching used packs...\n") + indexPack := make(map[restic.ID]packInfo) keepBlobs := restic.NewBlobSet() - duplicateBlobs := restic.NewBlobSet() - // iterate over all blobs in index to find out which blobs are duplicates + // iterate over all blobs in index to generate packInfo and find duplicates for blob := range repo.Index().Each(ctx) { + ip, seen := indexPack[blob.PackID] + + if seen { + // mark mixed packs with "Invalid blob type" + if ip.tpe != blob.Type { + ip.tpe = restic.InvalidBlob + } + } else { + ip = packInfo{ + tpe: blob.Type, + usedSize: pack.HeaderSize, + } + } + ip.usedSize += uint64(pack.CalculateEntrySize(blob.Blob)) + bh := blob.BlobHandle size := uint64(blob.Length) switch { @@ -255,14 +270,27 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB keepBlobs.Insert(bh) stats.size.used += size stats.blobs.used++ - case keepBlobs.Has(bh): // duplicate blob - duplicateBlobs.Insert(bh) + ip.usedSize += size + ip.usedBlobs++ + + case keepBlobs.Has(bh): // duplicate of a blob that we want to keep stats.size.duplicate += size stats.blobs.duplicate++ - default: + ip.usedSize += size + ip.duplicateBlobs++ + + default: // unused, don't care if it's a duplicate stats.size.unused += size stats.blobs.unused++ + ip.unusedSize += size + ip.unusedBlobs++ } + + if !blob.IsCompressed() { + ip.uncompressed = true + } + // update indexPack + indexPack[blob.PackID] = ip } // Check if all used blobs have been found in index @@ -275,48 +303,6 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB return errorIndexIncomplete } - indexPack := make(map[restic.ID]packInfo) - - // save computed pack header size - for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) { - // initialize tpe with NumBlobTypes to indicate it's not set - indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)} - } - - // iterate over all blobs in index to generate packInfo - for blob := range repo.Index().Each(ctx) { - ip := indexPack[blob.PackID] - - // Set blob type if not yet set - if ip.tpe == restic.NumBlobTypes { - ip.tpe = blob.Type - } - - // mark mixed packs with "Invalid blob type" - if ip.tpe != blob.Type { - ip.tpe = restic.InvalidBlob - } - - bh := blob.BlobHandle - size := uint64(blob.Length) - switch { - case duplicateBlobs.Has(bh): // duplicate blob - ip.usedSize += size - ip.duplicateBlobs++ - case keepBlobs.Has(bh): // used blob, not duplicate - ip.usedSize += size - ip.usedBlobs++ - default: // unused blob - ip.unusedSize += size - ip.unusedBlobs++ - } - if !blob.IsCompressed() { - ip.uncompressed = true - } - // update indexPack - indexPack[blob.PackID] = ip - } - Verbosef("collecting packs for deletion and repacking\n") removePacksFirst := restic.NewIDSet() removePacks := restic.NewIDSet() diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_rebuild_index.go index 0b3274ec4..5611fa939 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_rebuild_index.go @@ -98,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor if err != nil { return err } - packSizeFromIndex = pack.Size(ctx, repo.Index(), false) + packSizeFromIndex = pack.Size(ctx, repo.Index()) } Verbosef("getting pack files to read...\n") diff --git a/internal/checker/checker.go b/internal/checker/checker.go index a31235fae..e8b24b8c7 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -131,7 +131,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { } // compute pack size using index entries - c.packs = pack.Size(ctx, c.masterIndex, false) + c.packs = pack.Size(ctx, c.masterIndex) debug.Log("checking for duplicate packs") for packID := range c.packs { diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 2d7a5c3fb..196d882cd 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -177,8 +177,8 @@ var ( const ( // size of the header-length field at the end of the file; it is a uint32 headerLengthSize = 4 - // headerSize is the header's constant overhead (independent of #entries) - headerSize = headerLengthSize + crypto.Extension + // HeaderSize is the header's constant overhead (independent of #entries) + HeaderSize = headerLengthSize + crypto.Extension // MaxHeaderSize is the max size of header including header-length field MaxHeaderSize = 16*1024*1024 + headerLengthSize @@ -242,7 +242,7 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { // eagerly download eagerEntries header entries as part of header-length request. // only make second request if actual number of entries is greater than eagerEntries - eagerSize := eagerEntries*int(entrySize) + headerSize + eagerSize := eagerEntries*int(entrySize) + HeaderSize b, c, err := readRecords(rd, size, eagerSize) if err != nil { return nil, err @@ -349,7 +349,7 @@ func CalculateEntrySize(blob restic.Blob) int { } func CalculateHeaderSize(blobs []restic.Blob) int { - size := headerSize + size := HeaderSize for _, blob := range blobs { size += CalculateEntrySize(blob) } @@ -357,20 +357,17 @@ func CalculateHeaderSize(blobs []restic.Blob) int { } // Size returns the size of all packs computed by index information. -// If onlyHdr is set to true, only the size of the header is returned // Note that this function only gives correct sizes, if there are no // duplicates in the index. -func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 { +func Size(ctx context.Context, mi restic.MasterIndex) map[restic.ID]int64 { packSize := make(map[restic.ID]int64) for blob := range mi.Each(ctx) { size, ok := packSize[blob.PackID] if !ok { - size = headerSize - } - if !onlyHdr { - size += int64(blob.Length) + size = HeaderSize } + size += int64(blob.Length) packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob)) }