| package model_test |
|
|
| import ( |
| "sync" |
| "time" |
|
|
| "github.com/mudler/LocalAI/pkg/model" |
| . "github.com/onsi/ginkgo/v2" |
| . "github.com/onsi/gomega" |
| ) |
|
|
| |
| type mockProcessManager struct { |
| mu sync.Mutex |
| shutdownCalls []string |
| shutdownErrors map[string]error |
| } |
|
|
| func newMockProcessManager() *mockProcessManager { |
| return &mockProcessManager{ |
| shutdownCalls: []string{}, |
| shutdownErrors: make(map[string]error), |
| } |
| } |
|
|
| func (m *mockProcessManager) ShutdownModel(modelName string) error { |
| m.mu.Lock() |
| defer m.mu.Unlock() |
| m.shutdownCalls = append(m.shutdownCalls, modelName) |
| if err, ok := m.shutdownErrors[modelName]; ok { |
| return err |
| } |
| return nil |
| } |
|
|
| func (m *mockProcessManager) getShutdownCalls() []string { |
| m.mu.Lock() |
| defer m.mu.Unlock() |
| result := make([]string, len(m.shutdownCalls)) |
| copy(result, m.shutdownCalls) |
| return result |
| } |
|
|
| var _ = Describe("WatchDog", func() { |
| var ( |
| wd *model.WatchDog |
| pm *mockProcessManager |
| ) |
|
|
| BeforeEach(func() { |
| pm = newMockProcessManager() |
| }) |
|
|
| Context("LRU Limit", func() { |
| It("should create watchdog with LRU limit", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithBusyTimeout(5*time.Minute), |
| model.WithIdleTimeout(15*time.Minute), |
| model.WithLRULimit(2), |
| ) |
| Expect(wd.GetLRULimit()).To(Equal(2)) |
| }) |
|
|
| It("should allow updating LRU limit dynamically", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithLRULimit(2), |
| ) |
| wd.SetLRULimit(5) |
| Expect(wd.GetLRULimit()).To(Equal(5)) |
| }) |
|
|
| It("should return 0 for disabled LRU", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithLRULimit(0), |
| ) |
| Expect(wd.GetLRULimit()).To(Equal(0)) |
| }) |
| }) |
|
|
| Context("Memory Reclaimer Options", func() { |
| It("should create watchdog with memory reclaimer settings", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithMemoryReclaimer(true, 0.85), |
| ) |
| enabled, threshold := wd.GetMemoryReclaimerSettings() |
| Expect(enabled).To(BeTrue()) |
| Expect(threshold).To(Equal(0.85)) |
| }) |
|
|
| It("should allow setting memory reclaimer via separate options", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithMemoryReclaimerEnabled(true), |
| model.WithMemoryReclaimerThreshold(0.90), |
| ) |
| enabled, threshold := wd.GetMemoryReclaimerSettings() |
| Expect(enabled).To(BeTrue()) |
| Expect(threshold).To(Equal(0.90)) |
| }) |
|
|
| It("should use default threshold when not specified", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| ) |
| _, threshold := wd.GetMemoryReclaimerSettings() |
| Expect(threshold).To(Equal(model.DefaultMemoryReclaimerThreshold)) |
| }) |
|
|
| It("should allow updating memory reclaimer settings dynamically", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| ) |
| wd.SetMemoryReclaimer(true, 0.80) |
| enabled, threshold := wd.GetMemoryReclaimerSettings() |
| Expect(enabled).To(BeTrue()) |
| Expect(threshold).To(Equal(0.80)) |
| }) |
| }) |
|
|
| Context("Model Tracking", func() { |
| BeforeEach(func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithBusyTimeout(5*time.Minute), |
| model.WithIdleTimeout(15*time.Minute), |
| model.WithLRULimit(3), |
| ) |
| }) |
|
|
| It("should track loaded models count", func() { |
| Expect(wd.GetLoadedModelCount()).To(Equal(0)) |
|
|
| wd.AddAddressModelMap("addr1", "model1") |
| Expect(wd.GetLoadedModelCount()).To(Equal(1)) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| Expect(wd.GetLoadedModelCount()).To(Equal(2)) |
| }) |
|
|
| It("should update lastUsed time on Mark", func() { |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| |
| |
| }) |
|
|
| It("should update lastUsed time on UnMark", func() { |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
| wd.UnMark("addr1") |
| |
| }) |
|
|
| It("should update lastUsed time via UpdateLastUsed", func() { |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.UpdateLastUsed("addr1") |
| |
| }) |
| }) |
|
|
| Context("EnforceLRULimit", func() { |
| BeforeEach(func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithBusyTimeout(5*time.Minute), |
| model.WithIdleTimeout(15*time.Minute), |
| model.WithLRULimit(2), |
| model.WithForceEvictionWhenBusy(true), |
| ) |
| }) |
|
|
| It("should not evict when under limit", func() { |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
|
|
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(0)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(BeEmpty()) |
| }) |
|
|
| It("should evict oldest model when at limit", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(1)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model1")) |
| }) |
|
|
| It("should evict multiple models when needed", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr3", "model3") |
| wd.Mark("addr3") |
| wd.UnMark("addr3") |
|
|
| |
| wd.SetLRULimit(1) |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(3)) |
| Expect(result.NeedMore).To(BeFalse()) |
| shutdowns := pm.getShutdownCalls() |
| Expect(shutdowns).To(ContainElement("model1")) |
| Expect(shutdowns).To(ContainElement("model2")) |
| Expect(shutdowns).To(ContainElement("model3")) |
| }) |
|
|
| It("should account for pending loads", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
|
|
| |
| |
| result := wd.EnforceLRULimit(1) |
| Expect(result.EvictedCount).To(Equal(2)) |
| Expect(result.NeedMore).To(BeFalse()) |
| }) |
|
|
| It("should not evict when LRU is disabled", func() { |
| wd.SetLRULimit(0) |
|
|
| wd.AddAddressModelMap("addr1", "model1") |
| wd.AddAddressModelMap("addr2", "model2") |
| wd.AddAddressModelMap("addr3", "model3") |
|
|
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(0)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(BeEmpty()) |
| }) |
|
|
| It("should evict least recently used first", func() { |
| wd.SetLRULimit(2) |
|
|
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
| time.Sleep(20 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
| time.Sleep(20 * time.Millisecond) |
|
|
| |
| wd.UpdateLastUsed("addr1") |
| time.Sleep(20 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr3", "model3") |
| wd.Mark("addr3") |
| wd.UnMark("addr3") |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(BeNumerically(">=", 1)) |
| Expect(result.NeedMore).To(BeFalse()) |
|
|
| shutdowns := pm.getShutdownCalls() |
| |
| if len(shutdowns) >= 1 { |
| Expect(shutdowns[0]).To(Equal("model2")) |
| } |
| }) |
| }) |
|
|
| Context("Single Backend Mode (LRU=1)", func() { |
| BeforeEach(func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithBusyTimeout(5*time.Minute), |
| model.WithIdleTimeout(15*time.Minute), |
| model.WithLRULimit(1), |
| model.WithForceEvictionWhenBusy(true), |
| ) |
| }) |
|
|
| It("should evict existing model when loading new one", func() { |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(1)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model1")) |
| }) |
|
|
| It("should handle rapid model switches", func() { |
| for i := 0; i < 5; i++ { |
| wd.AddAddressModelMap("addr", "model") |
| wd.Mark("addr") |
| wd.UnMark("addr") |
| wd.EnforceLRULimit(0) |
| } |
| |
| Expect(len(pm.getShutdownCalls())).To(Equal(5)) |
| }) |
| }) |
|
|
| Context("Force Eviction When Busy", func() { |
| BeforeEach(func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithLRULimit(2), |
| model.WithForceEvictionWhenBusy(false), |
| ) |
| }) |
|
|
| It("should skip eviction for busy models when forceEvictionWhenBusy is false", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
|
|
| |
| |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| |
| |
| Expect(result.EvictedCount).To(Equal(1)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model2")) |
| Expect(pm.getShutdownCalls()).ToNot(ContainElement("model1")) |
| }) |
|
|
| It("should evict busy models when forceEvictionWhenBusy is true", func() { |
| wd.SetForceEvictionWhenBusy(true) |
|
|
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
|
|
| |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(1)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model1")) |
| }) |
|
|
| It("should set NeedMore when all models are busy and forceEvictionWhenBusy is false", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
|
|
| |
| wd.Mark("addr1") |
| wd.Mark("addr2") |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(0)) |
| Expect(result.NeedMore).To(BeTrue()) |
| Expect(pm.getShutdownCalls()).To(BeEmpty()) |
| }) |
|
|
| It("should allow updating forceEvictionWhenBusy dynamically", func() { |
| |
| Expect(wd).ToNot(BeNil()) |
|
|
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
| |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(result.EvictedCount).To(Equal(1)) |
|
|
| |
| wd.SetForceEvictionWhenBusy(true) |
|
|
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| |
|
|
| |
| result = wd.EnforceLRULimit(0) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(result.EvictedCount).To(Equal(1)) |
| }) |
|
|
| It("should continue to next LRU model when busy model is skipped", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr3", "model3") |
| wd.Mark("addr3") |
| wd.UnMark("addr3") |
|
|
| |
|
|
| |
| |
| result := wd.EnforceLRULimit(0) |
| |
| Expect(result.EvictedCount).To(Equal(2)) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model2")) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model3")) |
| }) |
| }) |
|
|
| Context("EnforceLRULimitResult", func() { |
| BeforeEach(func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithLRULimit(2), |
| model.WithForceEvictionWhenBusy(false), |
| ) |
| }) |
|
|
| It("should return NeedMore=false when eviction is successful", func() { |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| wd.UnMark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
|
|
| result := wd.EnforceLRULimit(0) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(result.EvictedCount).To(Equal(1)) |
| }) |
|
|
| It("should return NeedMore=true when not enough models can be evicted", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
|
|
| |
| |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.NeedMore).To(BeTrue()) |
| Expect(result.EvictedCount).To(Equal(0)) |
| }) |
|
|
| It("should return NeedMore=true when need to evict multiple but some are busy", func() { |
| |
| wd.SetLRULimit(1) |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr3", "model3") |
| wd.Mark("addr3") |
| |
|
|
| |
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(1)) |
| Expect(result.NeedMore).To(BeTrue()) |
| }) |
|
|
| It("should return correct EvictedCount when some models are evicted", func() { |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.UnMark("addr2") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr3", "model3") |
| wd.Mark("addr3") |
| wd.UnMark("addr3") |
|
|
| |
|
|
| |
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.EvictedCount).To(Equal(2)) |
| Expect(result.NeedMore).To(BeFalse()) |
| }) |
| }) |
|
|
| Context("Functional Options", func() { |
| It("should use default options when none provided", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| ) |
| Expect(wd.GetLRULimit()).To(Equal(0)) |
|
|
| enabled, threshold := wd.GetMemoryReclaimerSettings() |
| Expect(enabled).To(BeFalse()) |
| Expect(threshold).To(Equal(model.DefaultMemoryReclaimerThreshold)) |
| }) |
|
|
| It("should allow combining multiple options", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithBusyTimeout(10*time.Minute), |
| model.WithIdleTimeout(30*time.Minute), |
| model.WithBusyCheck(true), |
| model.WithIdleCheck(true), |
| model.WithLRULimit(5), |
| model.WithMemoryReclaimerEnabled(true), |
| model.WithMemoryReclaimerThreshold(0.80), |
| model.WithForceEvictionWhenBusy(true), |
| ) |
|
|
| Expect(wd.GetLRULimit()).To(Equal(5)) |
|
|
| enabled, threshold := wd.GetMemoryReclaimerSettings() |
| Expect(enabled).To(BeTrue()) |
| Expect(threshold).To(Equal(0.80)) |
| }) |
|
|
| It("should use default forceEvictionWhenBusy (false) when not specified", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| ) |
| |
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| wd.Mark("addr1") |
|
|
| wd.SetLRULimit(1) |
| result := wd.EnforceLRULimit(0) |
| |
| Expect(result.NeedMore).To(BeTrue()) |
| }) |
|
|
| It("should allow setting forceEvictionWhenBusy via option", func() { |
| wd = model.NewWatchDog( |
| model.WithProcessManager(pm), |
| model.WithLRULimit(2), |
| model.WithForceEvictionWhenBusy(true), |
| ) |
|
|
| |
| wd.AddAddressModelMap("addr1", "model1") |
| wd.Mark("addr1") |
| time.Sleep(10 * time.Millisecond) |
|
|
| wd.AddAddressModelMap("addr2", "model2") |
| wd.Mark("addr2") |
| |
|
|
| |
| result := wd.EnforceLRULimit(0) |
| Expect(result.NeedMore).To(BeFalse()) |
| Expect(result.EvictedCount).To(Equal(1)) |
| Expect(pm.getShutdownCalls()).To(ContainElement("model1")) |
| }) |
| }) |
| }) |
|
|