Index: Use slices instead of maps, reduce data

This commit is contained in:
Alexander Neumann 2017-01-15 15:45:52 +01:00
parent cd9b526203
commit d40f566e41
3 changed files with 33 additions and 74 deletions

View File

@ -103,11 +103,13 @@ func runPrune(gopts GlobalOptions) error {
return err
}
blobs := 0
for _, pack := range idx.Packs {
stats.bytes += pack.Size
blobs += len(pack.Entries)
}
Verbosef("repository contains %v packs (%v blobs) with %v bytes\n",
len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes)))
len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
blobCount := make(map[restic.BlobHandle]int)
duplicateBlobs := 0
@ -164,14 +166,17 @@ func runPrune(gopts GlobalOptions) error {
// find packs that need a rewrite
rewritePacks := restic.NewIDSet()
for h, blob := range idx.Blobs {
if !usedBlobs.Has(h) {
rewritePacks.Merge(blob.Packs)
continue
}
for _, pack := range idx.Packs {
for _, blob := range pack.Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !usedBlobs.Has(h) {
rewritePacks.Insert(pack.ID)
continue
}
if blobCount[h] > 1 {
rewritePacks.Merge(blob.Packs)
if blobCount[h] > 1 {
rewritePacks.Insert(pack.ID)
}
}
}

View File

@ -14,27 +14,20 @@ import (
// Pack contains information about the contents of a pack.
type Pack struct {
ID restic.ID
Size int64
Entries []restic.Blob
}
// Blob contains information about a blob.
type Blob struct {
Size int64
Packs restic.IDSet
}
// Index contains information about blobs and packs stored in a repo.
type Index struct {
Packs map[restic.ID]Pack
Blobs map[restic.BlobHandle]Blob
IndexIDs restic.IDSet
}
func newIndex() *Index {
return &Index{
Packs: make(map[restic.ID]Pack),
Blobs: make(map[restic.BlobHandle]Blob),
IndexIDs: restic.NewIDSet(),
}
}
@ -70,7 +63,7 @@ func New(repo restic.Repository, p *restic.Progress) (*Index, error) {
return nil, err
}
p := Pack{Entries: j.Entries(), Size: j.Size()}
p := Pack{ID: packID, Entries: j.Entries(), Size: j.Size()}
idx.Packs[packID] = p
}
@ -181,18 +174,6 @@ func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error
idx.Packs[id] = Pack{Size: size, Entries: entries}
for _, entry := range entries {
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
if _, ok := idx.Blobs[h]; !ok {
idx.Blobs[h] = Blob{
Size: int64(entry.Length),
Packs: restic.NewIDSet(),
}
}
idx.Blobs[h].Packs.Insert(id)
}
return nil
}
@ -202,15 +183,6 @@ func (idx *Index) RemovePack(id restic.ID) error {
return errors.Errorf("pack %v not found in the index", id.Str())
}
for _, blob := range idx.Packs[id].Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
idx.Blobs[h].Packs.Delete(id)
if len(idx.Blobs[h].Packs) == 0 {
delete(idx.Blobs, h)
}
}
delete(idx.Packs, id)
return nil
@ -239,14 +211,11 @@ func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) {
func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) {
packs = restic.NewIDSet()
for h := range blobs {
blob, ok := idx.Blobs[h]
if !ok {
continue
}
for id := range blob.Packs {
packs.Insert(id)
for id, p := range idx.Packs {
for _, entry := range p.Entries {
if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) {
packs.Insert(id)
}
}
}
@ -264,31 +233,20 @@ type Location struct {
var ErrBlobNotFound = errors.New("blob not found in index")
// FindBlob returns a list of packs and positions the blob can be found in.
func (idx *Index) FindBlob(h restic.BlobHandle) ([]Location, error) {
blob, ok := idx.Blobs[h]
if !ok {
return nil, ErrBlobNotFound
func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) {
for id, p := range idx.Packs {
for _, entry := range p.Entries {
if entry.ID.Equal(h.ID) && entry.Type == h.Type {
result = append(result, Location{
PackID: id,
Blob: entry,
})
}
}
}
result := make([]Location, 0, len(blob.Packs))
for packID := range blob.Packs {
pack, ok := idx.Packs[packID]
if !ok {
return nil, errors.Errorf("pack %v not found in index", packID.Str())
}
for _, entry := range pack.Entries {
if entry.Type != h.Type {
continue
}
if !entry.ID.Equal(h.ID) {
continue
}
loc := Location{PackID: packID, Blob: entry}
result = append(result, loc)
}
if len(result) == 0 {
return nil, ErrBlobNotFound
}
return result, nil

View File

@ -151,7 +151,7 @@ func TestIndexDuplicateBlobs(t *testing.T) {
if len(dups) == 0 {
t.Errorf("no duplicate blobs found")
}
t.Logf("%d packs, %d unique blobs", len(idx.Packs), len(idx.Blobs))
t.Logf("%d packs, %d duplicate blobs", len(idx.Packs), len(dups))
packs := idx.PacksForBlobs(dups)
if len(packs) == 0 {
@ -249,10 +249,6 @@ func TestIndexAddRemovePack(t *testing.T) {
if err == nil {
t.Errorf("removed blob %v found in index", h)
}
if _, ok := idx.Blobs[h]; ok {
t.Errorf("removed blob %v found in index.Blobs", h)
}
}
}