From 7419844885b0468df53e97cebfadf218277c60d3 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Fri, 12 Jun 2020 12:57:23 +0200 Subject: [PATCH] add changelog, benchmark, memory calculation --- changelog/unreleased/pull-2781 | 6 ++++++ internal/repository/index.go | 22 ++++++++++++++++++++++ internal/repository/index_test.go | 22 ++++++++++++++-------- 3 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 changelog/unreleased/pull-2781 diff --git a/changelog/unreleased/pull-2781 b/changelog/unreleased/pull-2781 new file mode 100644 index 000000000..9535fc101 --- /dev/null +++ b/changelog/unreleased/pull-2781 @@ -0,0 +1,6 @@ +Enhancement: Reduce memory consumption of in-memory index + +We've improved how the index is stored in memory. +This change reduces memory usage for large repositories by about 30-40%. + +https://github.com/restic/restic/pull/2781 diff --git a/internal/repository/index.go b/internal/repository/index.go index 70e7b7bf7..d6038e2e1 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -13,6 +13,28 @@ import ( "github.com/restic/restic/internal/debug" ) +// In large repositories, millions of blobs are stored in the repository +// and restic needs to store an index entry for each blob in memory for +// most operations. +// Hence the index data structure defined here is one of the main contributions +// to the total memory requirements of restic. +// +// We use a map to store each index entry. +// The key of the map is a BlobHandle +// The entries of the maps are slices which contain the actual index entries. +// +// To compute the needed amount of memory, we need some assumptions. +// Maps need an overhead of allocated but not needed elements. +// For computations, we assume an overhead of 50% and use OF=1.5 (overhead factor) +// +// We have the following sizes: +// key: 32 + 1 = 33 bytes +// slice: 24 bytes (pointer, len and cap) +// indexEntry: 32 + 8 + 8 = 48 bytes +// +// To save N index entries, we therefore need: +// N * OF * (33 + 24) bytes + N * 48 bytes = N * 134 bytes + // Index holds a lookup table for id -> pack. type Index struct { m sync.Mutex diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index e1f2829bd..c96101906 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -398,18 +398,16 @@ func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.I // create index with 200k pack files for i := 0; i < 200000; i++ { packID := NewRandomTestID(rng) + var blobs []restic.Blob offset := 0 for offset < maxPackSize { size := 2000 + rand.Intn(4*1024*1024) id := NewRandomTestID(rng) - idx.Store(restic.PackedBlob{ - PackID: packID, - Blob: restic.Blob{ - Type: restic.DataBlob, - ID: id, - Length: uint(size), - Offset: uint(offset), - }, + blobs = append(blobs, restic.Blob{ + Type: restic.DataBlob, + ID: id, + Length: uint(size), + Offset: uint(offset), }) offset += size @@ -418,6 +416,7 @@ func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.I lookupID = id } } + idx.StorePack(packID, blobs) } return idx, lookupID @@ -444,6 +443,13 @@ func BenchmarkIndexHasKnown(b *testing.B) { } } +func BenchmarkIndexAlloc(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + createRandomIndex(rand.New(rand.NewSource(0))) + } +} + func TestIndexHas(t *testing.T) { type testEntry struct { id restic.ID