@@ -276,15 +276,20 @@ func (l *loader) evict(idleOnly bool) int {
276276// It returns the number of remaining runners.
277277func (l * loader ) evictRunner (backend , model string , mode inference.BackendMode ) int {
278278 allBackends := backend == ""
279+ found := false
279280 for r , runnerInfo := range l .runners {
280281 unused := l .references [runnerInfo .slot ] == 0
281282 if unused && (allBackends || r .backend == backend ) && r .modelID == model && r .mode == mode {
282283 l .log .Infof ("Evicting %s backend runner with model %s (%s) in %s mode" ,
283284 r .backend , r .modelID , runnerInfo .modelRef , r .mode ,
284285 )
285286 l .freeRunnerSlot (runnerInfo .slot , r )
287+ found = true
286288 }
287289 }
290+ if ! found {
291+ l .log .Warnf ("No unused runner found for backend=%s, model=%s, mode=%s" , backend , model , mode )
292+ }
288293 return len (l .runners )
289294}
290295
@@ -308,10 +313,11 @@ func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int {
308313 delete (l .runnerConfigs , key )
309314 }
310315 }
311- // Evict both, completion and embedding models . We should consider
316+ // Evict all mode types . We should consider
312317 // accepting a mode parameter in unload requests.
313318 l .evictRunner (unload .Backend , modelID , inference .BackendModeCompletion )
314319 l .evictRunner (unload .Backend , modelID , inference .BackendModeEmbedding )
320+ l .evictRunner (unload .Backend , modelID , inference .BackendModeReranking )
315321 }
316322 return len (l .runners )
317323 }
0 commit comments