Skip to content

Commit

Permalink
TBS: drop and recreate badger db after exceeding storage limit for TT…
Browse files Browse the repository at this point in the history
…L time
  • Loading branch information
carsonip committed Jan 3, 2025
1 parent 5a3dd8d commit 9605745
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 0 deletions.
134 changes: 134 additions & 0 deletions x-pack/apm-server/sampling/eventstorage/storage_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package eventstorage

import (
"errors"
"os"
"path/filepath"
"sync"
Expand Down Expand Up @@ -34,9 +35,14 @@ type StorageManager struct {
storage *Storage
rw *ShardedReadWriter

// mu guards db, storage, and rw swaps.
mu sync.RWMutex
// subscriberPosMu protects the subscriber file from concurrent RW.
subscriberPosMu sync.Mutex

// dropLoopCh acts as a mutex to ensure that there is only 1 active RunDropLoop per StorageManager,
// as it is possible that 2 separate RunDropLoop are created by 2 TBS processors during a hot reload.
dropLoopCh chan struct{}
// gcLoopCh acts as a mutex to ensure only 1 gc loop is running per StorageManager.
// as it is possible that 2 separate RunGCLoop are created by 2 TBS processors during a hot reload.
gcLoopCh chan struct{}
Expand All @@ -46,6 +52,7 @@ type StorageManager struct {
func NewStorageManager(storageDir string) (*StorageManager, error) {
sm := &StorageManager{
storageDir: storageDir,
dropLoopCh: make(chan struct{}, 1),
gcLoopCh: make(chan struct{}, 1),
logger: logp.NewLogger(logs.Sampling),
}
Expand Down Expand Up @@ -107,16 +114,131 @@ func (s *StorageManager) runValueLogGC(discardRatio float64) error {
return s.db.RunValueLogGC(discardRatio)
}

// RunDropLoop runs a loop that detects if storage limit has been exceeded for at least ttl.
// If so, it drops and recreates the underlying badger DB.
// The loop stops when it receives from stopping.
func (s *StorageManager) RunDropLoop(stopping <-chan struct{}, ttl time.Duration, storageLimitInBytes uint64) error {
select {
case <-stopping:
return nil
case s.dropLoopCh <- struct{}{}:
}
defer func() {
<-s.dropLoopCh
}()

if storageLimitInBytes == 0 {
<-stopping
return nil
}

timer := time.NewTicker(min(time.Minute, ttl)) // Eval db size every minute as badger reports them with 1m lag, but use min to facilitate testing
defer timer.Stop()
var firstExceeded time.Time
for {
select {
case <-stopping:
return nil
case <-timer.C:
lsm, vlog := s.Size()
if uint64(lsm+vlog) >= storageLimitInBytes { //FIXME: Add a bit of buffer? Is s.storage.pendingSize reliable enough?
now := time.Now()
if firstExceeded.IsZero() {
firstExceeded = now
}
if now.Sub(firstExceeded) >= ttl {
s.logger.Warnf("badger db size has exceeded storage limit for over TTL, please consider increasing sampling.tail.storage_size; dropping and recreating badger db to recover")
s.DropAndRecreate()
s.logger.Info("badger db dropped and recreated")
}
} else {
firstExceeded = time.Time{}
}
}
}
}

func (s *StorageManager) Close() error {
s.mu.RLock()
defer s.mu.RUnlock()
s.rw.Close()
return s.db.Close()
}

// Reset initializes db, storage, and rw.
func (s *StorageManager) Reset() error {
db, err := OpenBadger(s.storageDir, -1)
if err != nil {
return err
}
s.db = db
s.storage = New(db, ProtobufCodec{})
s.rw = s.storage.NewShardedReadWriter()
return nil
}

// Size returns the db size
//
// Caller should either be main Run loop or should be holding RLock already
func (s *StorageManager) Size() (lsm, vlog int64) {
return s.db.Size()
}

func (s *StorageManager) runValueLogGC(discardRatio float64) error {
s.mu.RLock()
defer s.mu.RUnlock()
return s.db.RunValueLogGC(discardRatio)
}

// DropAndRecreate deletes the underlying badger DB at a file system level, and replaces it with a new badger DB.
func (s *StorageManager) DropAndRecreate() {
s.mu.Lock()
s.rw.Close()
err := s.db.Close()
if err != nil {
s.logger.With(logp.Error(err)).Error("error closing badger db during drop and recreate")
}

s.subscriberPosMu.Lock()
backupPath := filepath.Join(filepath.Dir(s.storageDir), filepath.Base(s.storageDir)+".old")
// FIXME: what if backupPath already exists?
err = os.Rename(s.storageDir, backupPath)
if err != nil {
s.logger.With(logp.Error(err)).Error("error renaming old badger db during drop and recreate")
}

// Since subscriber position file lives in the same tail sampling directory as badger DB,
// Create tail sampling dir, move back subscriber position file, as it is not a part of the DB.
var mode os.FileMode
stat, err := os.Stat(backupPath)
if err != nil {
mode = 0700
s.logger.With(logp.Error(err)).Error("error stat backup path during drop and recreate")
} else {
mode = stat.Mode()
}
err = os.Mkdir(s.storageDir, mode)
if err != nil {
s.logger.With(logp.Error(err)).Error("error mkdir storage dir during drop and recreate")
}
err = os.Rename(filepath.Join(backupPath, subscriberPositionFile), filepath.Join(s.storageDir, subscriberPositionFile))
if err != nil && !errors.Is(err, os.ErrNotExist) {
s.logger.With(logp.Error(err)).Error("error copying subscriber position file during drop and recreate")
}

err = s.Reset() //FIXME: this is likely fatal. Return error to crash the processor
if err != nil {
s.logger.With(logp.Error(err)).Error("error creating new badger db during drop and recreate")
}
s.subscriberPosMu.Unlock()
s.mu.Unlock()

err = os.RemoveAll(backupPath)
if err != nil {
s.logger.With(logp.Error(err)).Error("error removing old badger db during drop and recreate")
}
}

func (s *StorageManager) ReadSubscriberPosition() ([]byte, error) {
s.subscriberPosMu.Lock()
defer s.subscriberPosMu.Unlock()
Expand All @@ -142,26 +264,38 @@ type ManagedReadWriter struct {
}

func (s *ManagedReadWriter) ReadTraceEvents(traceID string, out *modelpb.Batch) error {
s.sm.mu.RLock()
defer s.sm.mu.RUnlock()
return s.sm.rw.ReadTraceEvents(traceID, out)
}

func (s *ManagedReadWriter) WriteTraceEvent(traceID, id string, event *modelpb.APMEvent, opts WriterOpts) error {
s.sm.mu.RLock()
defer s.sm.mu.RUnlock()
return s.sm.rw.WriteTraceEvent(traceID, id, event, opts)
}

func (s *ManagedReadWriter) WriteTraceSampled(traceID string, sampled bool, opts WriterOpts) error {
s.sm.mu.RLock()
defer s.sm.mu.RUnlock()
return s.sm.rw.WriteTraceSampled(traceID, sampled, opts)
}

func (s *ManagedReadWriter) IsTraceSampled(traceID string) (bool, error) {
s.sm.mu.RLock()
defer s.sm.mu.RUnlock()
return s.sm.rw.IsTraceSampled(traceID)
}

func (s *ManagedReadWriter) DeleteTraceEvent(traceID, id string) error {
s.sm.mu.RLock()
defer s.sm.mu.RUnlock()
return s.sm.rw.DeleteTraceEvent(traceID, id)
}

func (s *ManagedReadWriter) Flush() error {
s.sm.mu.RLock()
defer s.sm.mu.RUnlock()
return s.sm.rw.Flush()
}

Expand Down
3 changes: 3 additions & 0 deletions x-pack/apm-server/sampling/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,9 @@ func (p *Processor) Run() error {
g.Go(func() error {
return p.config.DB.RunGCLoop(p.stopping, p.config.StorageGCInterval)
})
g.Go(func() error {
return p.config.DB.RunDropLoop(p.stopping, p.config.TTL, p.config.StorageLimit)
})
g.Go(func() error {
// Subscribe to remotely sampled trace IDs. This is cancelled immediately when
// Stop is called. But it is possible that both old and new subscriber goroutines
Expand Down
123 changes: 123 additions & 0 deletions x-pack/apm-server/sampling/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,129 @@ func TestProcessRemoteTailSamplingPersistence(t *testing.T) {
assert.Equal(t, `{"index_name":1}`, string(data))
}

func TestDropAndRecreate(t *testing.T) {
// This test ensures that if badger is stuck at storage limit for TTL,
// DB is dropped and recreated.
if testing.Short() {
t.Skip("skipping slow test")
}

writeBatch := func(t *testing.T, n int, c sampling.Config, assertBatch func(b modelpb.Batch)) *sampling.Processor {
processor, err := sampling.NewProcessor(c)
require.NoError(t, err)
go processor.Run()
defer processor.Stop(context.Background())
batch := make(modelpb.Batch, 0, n)
for i := 0; i < n; i++ {
traceID := uuid.Must(uuid.NewV4()).String()
batch = append(batch, &modelpb.APMEvent{
Trace: &modelpb.Trace{Id: traceID},
Event: &modelpb.Event{Duration: uint64(123 * time.Millisecond)},
Span: &modelpb.Span{
Type: "type",
Id: traceID,
},
})
}
err = processor.ProcessBatch(context.Background(), &batch)
require.NoError(t, err)
assertBatch(batch)
return processor
}

for _, tc := range []struct {
name string
subscriberPosExists bool
}{
{
name: "subscriber_position_not_exist",
subscriberPosExists: false,
},
{
name: "subscriber_position_exists",
subscriberPosExists: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
config := newTempdirConfig(t)
config.StorageGCInterval = time.Hour // effectively disable GC

config.FlushInterval = 10 * time.Millisecond
subscriberChan := make(chan string)
subscriber := pubsubtest.SubscriberChan(subscriberChan)
config.Elasticsearch = pubsubtest.Client(nil, subscriber)
subscriberPositionFile := filepath.Join(config.StorageDir, "subscriber_position.json")

// Write 5K span events and close the DB to persist to disk the storage
// size and assert that none are reported immediately.
writeBatch(t, 5000, config, func(b modelpb.Batch) {
assert.Empty(t, b, fmt.Sprintf("expected empty but size is %d", len(b)))

subscriberChan <- "0102030405060708090a0b0c0d0e0f10"
assert.Eventually(t, func() bool {
b, err := os.ReadFile(subscriberPositionFile)
return err == nil && string(b) == `{"index_name":1}`
}, time.Second, 100*time.Millisecond)
})
assert.NoError(t, config.Storage.Flush())
config.Storage.Close()
assert.NoError(t, config.DB.Close())

if !tc.subscriberPosExists {
err := os.Remove(subscriberPositionFile)
assert.NoError(t, err)
}

// Open a new instance of the badgerDB and check the size.
var err error
config.DB, err = eventstorage.NewStorageManager(config.StorageDir)
require.NoError(t, err)
t.Cleanup(func() { config.DB.Close() })

lsm, vlog := config.DB.Size()
assert.GreaterOrEqual(t, lsm+vlog, int64(1024))

sstFilenames := func() []string {
entries, _ := os.ReadDir(config.StorageDir)

var ssts []string
for _, entry := range entries {
name := entry.Name()
if strings.HasSuffix(name, ".sst") {
ssts = append(ssts, name)
}
}
sort.Strings(ssts)
return ssts
}
assert.NotEqual(t, "000000.sst", sstFilenames()[0])

config.Elasticsearch = pubsubtest.Client(nil, nil)

config.StorageLimit = uint64(lsm) - 100
config.TTL = time.Second
processor, err := sampling.NewProcessor(config)
require.NoError(t, err)
go processor.Run()
defer processor.Stop(context.Background())

var filenames []string
assert.Eventually(t, func() bool {
filenames = sstFilenames()
return len(filenames) == 0
}, 10*time.Second, 500*time.Millisecond, filenames)

b, err := os.ReadFile(subscriberPositionFile)
assert.NoError(t, err)
if tc.subscriberPosExists {
assert.Equal(t, `{"index_name":1}`, string(b))
} else {
assert.Equal(t, "{}", string(b))
}
})
}
}

func TestGracefulShutdown(t *testing.T) {
config := newTempdirConfig(t)
sampleRate := 0.5
Expand Down

0 comments on commit 9605745

Please sign in to comment.