Skip to content

Commit d9f56cd

Browse files
committed
fix(scheduling): unload reranking models
Signed-off-by: Dorin Geman <[email protected]>
1 parent 0699420 commit d9f56cd

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

pkg/inference/scheduling/loader.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,15 +276,20 @@ func (l *loader) evict(idleOnly bool) int {
276276
// It returns the number of remaining runners.
277277
func (l *loader) evictRunner(backend, model string, mode inference.BackendMode) int {
278278
allBackends := backend == ""
279+
found := false
279280
for r, runnerInfo := range l.runners {
280281
unused := l.references[runnerInfo.slot] == 0
281282
if unused && (allBackends || r.backend == backend) && r.modelID == model && r.mode == mode {
282283
l.log.Infof("Evicting %s backend runner with model %s (%s) in %s mode",
283284
r.backend, r.modelID, runnerInfo.modelRef, r.mode,
284285
)
285286
l.freeRunnerSlot(runnerInfo.slot, r)
287+
found = true
286288
}
287289
}
290+
if !found {
291+
l.log.Warnf("No unused runner found for backend=%s, model=%s, mode=%s", backend, model, mode)
292+
}
288293
return len(l.runners)
289294
}
290295

@@ -308,10 +313,11 @@ func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int {
308313
delete(l.runnerConfigs, key)
309314
}
310315
}
311-
// Evict both, completion and embedding models. We should consider
316+
// Evict all mode types. We should consider
312317
// accepting a mode parameter in unload requests.
313318
l.evictRunner(unload.Backend, modelID, inference.BackendModeCompletion)
314319
l.evictRunner(unload.Backend, modelID, inference.BackendModeEmbedding)
320+
l.evictRunner(unload.Backend, modelID, inference.BackendModeReranking)
315321
}
316322
return len(l.runners)
317323
}

0 commit comments

Comments
 (0)