repository: parallelize lock tests

This commit is contained in:
Michael Eischer 2024-02-24 17:07:14 +01:00
parent e8df50fa3c
commit 044e8bf821
2 changed files with 72 additions and 57 deletions

View File

@ -18,21 +18,31 @@ type lockContext struct {
refreshWG sync.WaitGroup
}
var (
retrySleepStart = 5 * time.Second
retrySleepMax = 60 * time.Second
)
type locker struct {
retrySleepStart time.Duration
retrySleepMax time.Duration
refreshInterval time.Duration
refreshabilityTimeout time.Duration
}
func minDuration(a, b time.Duration) time.Duration {
if a <= b {
return a
}
return b
const defaultRefreshInterval = 5 * time.Minute
var lockerInst = &locker{
retrySleepStart: 5 * time.Second,
retrySleepMax: 60 * time.Second,
refreshInterval: defaultRefreshInterval,
// consider a lock refresh failed a bit before the lock actually becomes stale
// the difference allows to compensate for a small time drift between clients.
refreshabilityTimeout: restic.StaleLockTimeout - defaultRefreshInterval*3/2,
}
func Lock(ctx context.Context, repo restic.Repository, exclusive bool, retryLock time.Duration, printRetry func(msg string), logger func(format string, args ...interface{})) (*Unlocker, context.Context, error) {
return lockerInst.Lock(ctx, repo, exclusive, retryLock, printRetry, logger)
}
// Lock wraps the ctx such that it is cancelled when the repository is unlocked
// cancelling the original context also stops the lock refresh
func Lock(ctx context.Context, repo restic.Repository, exclusive bool, retryLock time.Duration, printRetry func(msg string), logger func(format string, args ...interface{})) (*Unlocker, context.Context, error) {
func (l *locker) Lock(ctx context.Context, repo restic.Repository, exclusive bool, retryLock time.Duration, printRetry func(msg string), logger func(format string, args ...interface{})) (*Unlocker, context.Context, error) {
lockFn := restic.NewLock
if exclusive {
@ -42,7 +52,7 @@ func Lock(ctx context.Context, repo restic.Repository, exclusive bool, retryLock
var lock *restic.Lock
var err error
retrySleep := minDuration(retrySleepStart, retryLock)
retrySleep := minDuration(l.retrySleepStart, retryLock)
retryMessagePrinted := false
retryTimeout := time.After(retryLock)
@ -68,7 +78,7 @@ retryLoop:
lock, err = lockFn(ctx, repo)
break retryLoop
case <-retrySleepCh:
retrySleep = minDuration(retrySleep*2, retrySleepMax)
retrySleep = minDuration(retrySleep*2, l.retrySleepMax)
}
} else {
// anything else, either a successful lock or another error
@ -92,26 +102,27 @@ retryLoop:
refreshChan := make(chan struct{})
forceRefreshChan := make(chan refreshLockRequest)
go refreshLocks(ctx, repo.Backend(), lockInfo, refreshChan, forceRefreshChan, logger)
go monitorLockRefresh(ctx, lockInfo, refreshChan, forceRefreshChan, logger)
go l.refreshLocks(ctx, repo.Backend(), lockInfo, refreshChan, forceRefreshChan, logger)
go l.monitorLockRefresh(ctx, lockInfo, refreshChan, forceRefreshChan, logger)
return &Unlocker{lockInfo}, ctx, nil
}
var refreshInterval = 5 * time.Minute
// consider a lock refresh failed a bit before the lock actually becomes stale
// the difference allows to compensate for a small time drift between clients.
var refreshabilityTimeout = restic.StaleLockTimeout - refreshInterval*3/2
func minDuration(a, b time.Duration) time.Duration {
if a <= b {
return a
}
return b
}
type refreshLockRequest struct {
result chan bool
}
func refreshLocks(ctx context.Context, backend backend.Backend, lockInfo *lockContext, refreshed chan<- struct{}, forceRefresh <-chan refreshLockRequest, logger func(format string, args ...interface{})) {
func (l *locker) refreshLocks(ctx context.Context, backend backend.Backend, lockInfo *lockContext, refreshed chan<- struct{}, forceRefresh <-chan refreshLockRequest, logger func(format string, args ...interface{})) {
debug.Log("start")
lock := lockInfo.lock
ticker := time.NewTicker(refreshInterval)
ticker := time.NewTicker(l.refreshInterval)
lastRefresh := lock.Time
defer func() {
@ -151,7 +162,7 @@ func refreshLocks(ctx context.Context, backend backend.Backend, lockInfo *lockCo
}
case <-ticker.C:
if time.Since(lastRefresh) > refreshabilityTimeout {
if time.Since(lastRefresh) > l.refreshabilityTimeout {
// the lock is too old, wait until the expiry monitor cancels the context
continue
}
@ -172,14 +183,14 @@ func refreshLocks(ctx context.Context, backend backend.Backend, lockInfo *lockCo
}
}
func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <-chan struct{}, forceRefresh chan<- refreshLockRequest, logger func(format string, args ...interface{})) {
func (l *locker) monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <-chan struct{}, forceRefresh chan<- refreshLockRequest, logger func(format string, args ...interface{})) {
// time.Now() might use a monotonic timer which is paused during standby
// convert to unix time to ensure we compare real time values
lastRefresh := time.Now().UnixNano()
pollDuration := 1 * time.Second
if refreshInterval < pollDuration {
if l.refreshInterval < pollDuration {
// required for TestLockFailedRefresh
pollDuration = refreshInterval / 5
pollDuration = l.refreshInterval / 5
}
// timers are paused during standby, which is a problem as the refresh timeout
// _must_ expire if the host was too long in standby. Thus fall back to periodic checks
@ -205,7 +216,7 @@ func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <-
}
lastRefresh = time.Now().UnixNano()
case <-ticker.C:
if time.Now().UnixNano()-lastRefresh < refreshabilityTimeout.Nanoseconds() || refreshStaleLockResult != nil {
if time.Now().UnixNano()-lastRefresh < l.refreshabilityTimeout.Nanoseconds() || refreshStaleLockResult != nil {
continue
}

View File

@ -37,8 +37,8 @@ func openLockTestRepo(t *testing.T, wrapper backendWrapper) restic.Repository {
return repo
}
func checkedLockRepo(ctx context.Context, t *testing.T, repo restic.Repository, retryLock time.Duration) (*Unlocker, context.Context) {
lock, wrappedCtx, err := Lock(ctx, repo, false, retryLock, func(msg string) {}, func(format string, args ...interface{}) {})
func checkedLockRepo(ctx context.Context, t *testing.T, repo restic.Repository, lockerInst *locker, retryLock time.Duration) (*Unlocker, context.Context) {
lock, wrappedCtx, err := lockerInst.Lock(ctx, repo, false, retryLock, func(msg string) {}, func(format string, args ...interface{}) {})
test.OK(t, err)
test.OK(t, wrappedCtx.Err())
if lock.info.lock.Stale() {
@ -48,9 +48,10 @@ func checkedLockRepo(ctx context.Context, t *testing.T, repo restic.Repository,
}
func TestLock(t *testing.T) {
t.Parallel()
repo := openLockTestRepo(t, nil)
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, 0)
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, lockerInst, 0)
lock.Unlock()
if wrappedCtx.Err() == nil {
t.Fatal("unlock did not cancel context")
@ -58,11 +59,12 @@ func TestLock(t *testing.T) {
}
func TestLockCancel(t *testing.T) {
t.Parallel()
repo := openLockTestRepo(t, nil)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
lock, wrappedCtx := checkedLockRepo(ctx, t, repo, 0)
lock, wrappedCtx := checkedLockRepo(ctx, t, repo, lockerInst, 0)
cancel()
if wrappedCtx.Err() == nil {
t.Fatal("canceled parent context did not cancel context")
@ -73,6 +75,7 @@ func TestLockCancel(t *testing.T) {
}
func TestLockConflict(t *testing.T) {
t.Parallel()
repo := openLockTestRepo(t, nil)
repo2, err := New(repo.Backend(), Options{})
test.OK(t, err)
@ -102,19 +105,19 @@ func (b *writeOnceBackend) Save(ctx context.Context, h backend.Handle, rd backen
}
func TestLockFailedRefresh(t *testing.T) {
t.Parallel()
repo := openLockTestRepo(t, func(r backend.Backend) (backend.Backend, error) {
return &writeOnceBackend{Backend: r}, nil
})
// reduce locking intervals to be suitable for testing
ri, rt := refreshInterval, refreshabilityTimeout
refreshInterval = 20 * time.Millisecond
refreshabilityTimeout = 100 * time.Millisecond
defer func() {
refreshInterval, refreshabilityTimeout = ri, rt
}()
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, 0)
li := &locker{
retrySleepStart: lockerInst.retrySleepStart,
retrySleepMax: lockerInst.retrySleepMax,
refreshInterval: 20 * time.Millisecond,
refreshabilityTimeout: 100 * time.Millisecond,
}
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, li, 0)
select {
case <-wrappedCtx.Done():
@ -139,6 +142,7 @@ func (b *loggingBackend) Save(ctx context.Context, h backend.Handle, rd backend.
}
func TestLockSuccessfulRefresh(t *testing.T) {
t.Parallel()
repo := openLockTestRepo(t, func(r backend.Backend) (backend.Backend, error) {
return &loggingBackend{
Backend: r,
@ -148,14 +152,13 @@ func TestLockSuccessfulRefresh(t *testing.T) {
t.Logf("test for successful lock refresh %v", time.Now())
// reduce locking intervals to be suitable for testing
ri, rt := refreshInterval, refreshabilityTimeout
refreshInterval = 60 * time.Millisecond
refreshabilityTimeout = 500 * time.Millisecond
defer func() {
refreshInterval, refreshabilityTimeout = ri, rt
}()
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, 0)
li := &locker{
retrySleepStart: lockerInst.retrySleepStart,
retrySleepMax: lockerInst.retrySleepMax,
refreshInterval: 60 * time.Millisecond,
refreshabilityTimeout: 500 * time.Millisecond,
}
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, li, 0)
select {
case <-wrappedCtx.Done():
@ -168,7 +171,7 @@ func TestLockSuccessfulRefresh(t *testing.T) {
buf = buf[:n]
t.Log(string(buf))
case <-time.After(2 * refreshabilityTimeout):
case <-time.After(2 * li.refreshabilityTimeout):
// expected lock refresh to work
}
// Unlock should not crash
@ -190,6 +193,7 @@ func (b *slowBackend) Save(ctx context.Context, h backend.Handle, rd backend.Rew
}
func TestLockSuccessfulStaleRefresh(t *testing.T) {
t.Parallel()
var sb *slowBackend
repo := openLockTestRepo(t, func(r backend.Backend) (backend.Backend, error) {
sb = &slowBackend{Backend: r}
@ -198,17 +202,17 @@ func TestLockSuccessfulStaleRefresh(t *testing.T) {
t.Logf("test for successful lock refresh %v", time.Now())
// reduce locking intervals to be suitable for testing
ri, rt := refreshInterval, refreshabilityTimeout
refreshInterval = 10 * time.Millisecond
refreshabilityTimeout = 50 * time.Millisecond
defer func() {
refreshInterval, refreshabilityTimeout = ri, rt
}()
li := &locker{
retrySleepStart: lockerInst.retrySleepStart,
retrySleepMax: lockerInst.retrySleepMax,
refreshInterval: 10 * time.Millisecond,
refreshabilityTimeout: 50 * time.Millisecond,
}
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, 0)
lock, wrappedCtx := checkedLockRepo(context.Background(), t, repo, li, 0)
// delay lock refreshing long enough that the lock would expire
sb.m.Lock()
sb.sleep = refreshabilityTimeout + refreshInterval
sb.sleep = li.refreshabilityTimeout + li.refreshInterval
sb.m.Unlock()
select {
@ -216,7 +220,7 @@ func TestLockSuccessfulStaleRefresh(t *testing.T) {
// don't call t.Fatal to allow the lock to be properly cleaned up
t.Error("lock refresh failed", time.Now())
case <-time.After(refreshabilityTimeout):
case <-time.After(li.refreshabilityTimeout):
}
// reset slow backend
sb.m.Lock()
@ -229,7 +233,7 @@ func TestLockSuccessfulStaleRefresh(t *testing.T) {
// don't call t.Fatal to allow the lock to be properly cleaned up
t.Error("lock refresh failed", time.Now())
case <-time.After(3 * refreshabilityTimeout):
case <-time.After(3 * li.refreshabilityTimeout):
// expected lock refresh to work
}