Skip to content

Commit 6b08862

Browse files
authored
Split model http handler vs model manager (#459)
* refactor: introduce service layer for model management and separate business logic from HTTP handling * refactor: replace model manager with service layer for improved model handling * Continue migrating services * Renaming * swap names to reduce the changeset * sanitize log output for model packaging error * refactor: update comments to improve clarity in handler and manager
1 parent 9f84852 commit 6b08862

File tree

12 files changed

+925
-881
lines changed

12 files changed

+925
-881
lines changed

main.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,18 @@ func main() {
8484
}
8585
baseTransport.Proxy = http.ProxyFromEnvironment
8686

87-
modelManager := models.NewManager(
87+
clientConfig := models.ClientConfig{
88+
StoreRootPath: modelPath,
89+
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
90+
Transport: baseTransport,
91+
}
92+
modelHandler := models.NewHandler(
8893
log,
89-
models.ClientConfig{
90-
StoreRootPath: modelPath,
91-
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
92-
Transport: baseTransport,
93-
},
94+
clientConfig,
9495
nil,
9596
memEstimator,
9697
)
97-
98+
modelManager := models.NewManager(log.WithFields(logrus.Fields{"component": "model-manager"}), clientConfig)
9899
log.Infof("LLAMA_SERVER_PATH: %s", llamaServerPath)
99100

100101
// Create llama.cpp configuration from environment variables
@@ -151,6 +152,7 @@ func main() {
151152
mlx.Name: mlxBackend,
152153
},
153154
llamaCppBackend,
155+
modelHandler,
154156
modelManager,
155157
http.DefaultClient,
156158
nil,
@@ -168,8 +170,8 @@ func main() {
168170
// Register path prefixes to forward all HTTP methods (including OPTIONS) to components
169171
// Components handle method routing internally
170172
// Register both with and without trailing slash to avoid redirects
171-
router.Handle(inference.ModelsPrefix, modelManager)
172-
router.Handle(inference.ModelsPrefix+"/", modelManager)
173+
router.Handle(inference.ModelsPrefix, modelHandler)
174+
router.Handle(inference.ModelsPrefix+"/", modelHandler)
173175
router.Handle(inference.InferencePrefix+"/", scheduler)
174176
// Add path aliases: /v1 -> /engines/v1, /rerank -> /engines/rerank, /score -> /engines/score.
175177
aliasHandler := &middleware.AliasHandler{Handler: scheduler}
@@ -178,7 +180,7 @@ func main() {
178180
router.Handle("/score", aliasHandler)
179181

180182
// Add Ollama API compatibility layer (only register with trailing slash to catch sub-paths)
181-
ollamaHandler := ollama.NewHandler(log, modelManager, scheduler, nil)
183+
ollamaHandler := ollama.NewHandler(log, scheduler, nil, modelManager)
182184
router.Handle(ollama.APIPrefix+"/", ollamaHandler)
183185

184186
// Register root handler LAST - it will only catch exact "/" requests that don't match other patterns

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string,
229229

230230
// parseModel parses a model (local or remote) and returns the GGUF file and config.
231231
func (l *llamaCpp) parseModel(ctx context.Context, model string) (*parser.GGUFFile, types.Config, error) {
232-
inStore, err := l.modelManager.IsModelInStore(model)
232+
inStore, err := l.modelManager.InStore(model)
233233
if err != nil {
234234
return nil, types.Config{}, fmt.Errorf("checking if model is in local store: %w", err)
235235
}
@@ -271,7 +271,7 @@ func (l *llamaCpp) parseLocalModel(model string) (*parser.GGUFFile, types.Config
271271
}
272272

273273
func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser.GGUFFile, types.Config, error) {
274-
mdl, err := l.modelManager.GetRemoteModel(ctx, model)
274+
mdl, err := l.modelManager.GetRemote(ctx, model)
275275
if err != nil {
276276
return nil, types.Config{}, fmt.Errorf("getting remote model(%s): %w", model, err)
277277
}
@@ -292,7 +292,7 @@ func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser.
292292
if ggufDigest.String() == "" {
293293
return nil, types.Config{}, fmt.Errorf("model(%s) has no GGUF layer", model)
294294
}
295-
blobURL, err := l.modelManager.GetRemoteModelBlobURL(model, ggufDigest)
295+
blobURL, err := l.modelManager.GetRemoteBlobURL(model, ggufDigest)
296296
if err != nil {
297297
return nil, types.Config{}, fmt.Errorf("getting GGUF blob URL for model(%s): %w", model, err)
298298
}

pkg/inference/models/adapter.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package models
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/docker/model-runner/pkg/distribution/types"
7+
)
8+
9+
func ToModel(m types.Model) (*Model, error) {
10+
desc, err := m.Descriptor()
11+
if err != nil {
12+
return nil, fmt.Errorf("get descriptor: %w", err)
13+
}
14+
15+
id, err := m.ID()
16+
if err != nil {
17+
return nil, fmt.Errorf("get id: %w", err)
18+
}
19+
20+
cfg, err := m.Config()
21+
if err != nil {
22+
return nil, fmt.Errorf("get config: %w", err)
23+
}
24+
25+
created := int64(0)
26+
if desc.Created != nil {
27+
created = desc.Created.Unix()
28+
}
29+
30+
return &Model{
31+
ID: id,
32+
Tags: m.Tags(),
33+
Created: created,
34+
Config: cfg,
35+
}, nil
36+
}
37+
38+
// ToModelFromArtifact converts a types.ModelArtifact (typically from remote registry)
39+
// to the API Model representation. Remote models don't have tags.
40+
func ToModelFromArtifact(artifact types.ModelArtifact) (*Model, error) {
41+
desc, err := artifact.Descriptor()
42+
if err != nil {
43+
return nil, fmt.Errorf("get descriptor: %w", err)
44+
}
45+
46+
id, err := artifact.ID()
47+
if err != nil {
48+
return nil, fmt.Errorf("get id: %w", err)
49+
}
50+
51+
cfg, err := artifact.Config()
52+
if err != nil {
53+
return nil, fmt.Errorf("get config: %w", err)
54+
}
55+
56+
created := int64(0)
57+
if desc.Created != nil {
58+
created = desc.Created.Unix()
59+
}
60+
61+
return &Model{
62+
ID: id,
63+
Tags: nil, // Remote models don't have local tags
64+
Created: created,
65+
Config: cfg,
66+
}, nil
67+
}

pkg/inference/models/api.go

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -125,32 +125,3 @@ type Model struct {
125125
// Config describes the model.
126126
Config types.Config `json:"config"`
127127
}
128-
129-
func ToModel(m types.Model) (*Model, error) {
130-
desc, err := m.Descriptor()
131-
if err != nil {
132-
return nil, fmt.Errorf("get descriptor: %w", err)
133-
}
134-
135-
id, err := m.ID()
136-
if err != nil {
137-
return nil, fmt.Errorf("get id: %w", err)
138-
}
139-
140-
cfg, err := m.Config()
141-
if err != nil {
142-
return nil, fmt.Errorf("get config: %w", err)
143-
}
144-
145-
created := int64(0)
146-
if desc.Created != nil {
147-
created = desc.Created.Unix()
148-
}
149-
150-
return &Model{
151-
ID: id,
152-
Tags: m.Tags(),
153-
Created: created,
154-
Config: cfg,
155-
}, nil
156-
}

0 commit comments

Comments
 (0)