docker
diff --git a/‎main.go‎
Lines changed: 16 additions & 1 deletion b/‎main.go‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎pkg/inference/backends/mlx/mlx.go‎
Lines changed: 90 additions & 9 deletions b/‎pkg/inference/backends/mlx/mlx.go‎
Lines changed: 90 additions & 9 deletions
diff --git a/‎pkg/inference/backends/mlx/mlx_config.go‎
Lines changed: 84 additions & 0 deletions b/‎pkg/inference/backends/mlx/mlx_config.go‎
Lines changed: 84 additions & 0 deletions
@@ -13,6 +13,7 @@ import (
 	"github.com/docker/model-runner/pkg/gpuinfo"
 	"github.com/docker/model-runner/pkg/inference"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
+	"github.com/docker/model-runner/pkg/inference/backends/mlx"
 	"github.com/docker/model-runner/pkg/inference/backends/vllm"
 	"github.com/docker/model-runner/pkg/inference/config"
 	"github.com/docker/model-runner/pkg/inference/memory"
@@ -131,9 +132,23 @@ func main() {
 		log.Fatalf("unable to initialize %s backend: %v", vllm.Name, err)
 	}
 
+	mlxBackend, err := mlx.New(
+		log,
+		modelManager,
+		log.WithFields(logrus.Fields{"component": mlx.Name}),
+		nil,
+	)
+	if err != nil {
+		log.Fatalf("unable to initialize %s backend: %v", mlx.Name, err)
+	}
+
 	scheduler := scheduling.NewScheduler(
 		log,
-		map[string]inference.Backend{llamacpp.Name: llamaCppBackend, vllm.Name: vllmBackend},
+		map[string]inference.Backend{
+			llamacpp.Name: llamaCppBackend,
+			vllm.Name:     vllmBackend,
+			mlx.Name:      mlxBackend,
+		},
 		llamaCppBackend,
 		modelManager,
 		http.DefaultClient,
 
@@ -3,10 +3,15 @@ package mlx
 import (
 	"context"
 	"errors"
+	"fmt"
 	"net/http"
+	"os/exec"
+	"strings"
 
 	"github.com/docker/model-runner/pkg/inference"
+	"github.com/docker/model-runner/pkg/inference/backends"
 	"github.com/docker/model-runner/pkg/inference/models"
+	"github.com/docker/model-runner/pkg/inference/platform"
 	"github.com/docker/model-runner/pkg/logging"
 )
 
@@ -15,19 +20,37 @@ const (
 	Name = "mlx"
 )
 
+var ErrStatusNotFound = errors.New("Python or mlx-lm not found")
+
 // mlx is the MLX-based backend implementation.
 type mlx struct {
 	// log is the associated logger.
 	log logging.Logger
 	// modelManager is the shared model manager.
 	modelManager *models.Manager
+	// serverLog is the logger to use for the MLX server process.
+	serverLog logging.Logger
+	// config is the configuration for the MLX backend.
+	config *Config
+	// status is the state in which the MLX backend is in.
+	status string
+	// pythonPath is the path to the python3 binary.
+	pythonPath string
 }
 
 // New creates a new MLX-based backend.
-func New(log logging.Logger, modelManager *models.Manager) (inference.Backend, error) {
+func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, conf *Config) (inference.Backend, error) {
+	// If no config is provided, use the default configuration
+	if conf == nil {
+		conf = NewDefaultMLXConfig()
+	}
+
 	return &mlx{
 		log:          log,
 		modelManager: modelManager,
+		serverLog:    serverLog,
+		config:       conf,
+		status:       "not installed",
 	}, nil
 }
 
@@ -38,31 +61,89 @@ func (m *mlx) Name() string {
 
 // UsesExternalModelManagement implements
 // inference.Backend.UsesExternalModelManagement.
-func (l *mlx) UsesExternalModelManagement() bool {
+func (m *mlx) UsesExternalModelManagement() bool {
 	return false
 }
 
 // Install implements inference.Backend.Install.
 func (m *mlx) Install(ctx context.Context, httpClient *http.Client) error {
-	// TODO: Implement.
-	return errors.New("not implemented")
+	if !platform.SupportsMLX() {
+		return errors.New("MLX is only available on macOS ARM64")
+	}
+
+	// Check if Python 3 is available
+	pythonPath, err := exec.LookPath("python3")
+	if err != nil {
+		m.status = ErrStatusNotFound.Error()
+		return ErrStatusNotFound
+	}
+
+	// Store the python path for later use
+	m.pythonPath = pythonPath
+
+	// Check if mlx-lm package is installed by attempting to import it
+	cmd := exec.CommandContext(ctx, pythonPath, "-c", "import mlx_lm")
+	if err := cmd.Run(); err != nil {
+		m.status = "mlx-lm package not installed"
+		m.log.Warnf("mlx-lm package not found. Install with: uv pip install mlx-lm")
+		return fmt.Errorf("mlx-lm package not installed: %w", err)
+	}
+
+	// Get MLX version
+	cmd = exec.CommandContext(ctx, pythonPath, "-c", "import mlx; print(mlx.__version__)")
+	output, err := cmd.Output()
+	if err != nil {
+		m.log.Warnf("could not get MLX version: %v", err)
+		m.status = "running MLX version: unknown"
+	} else {
+		m.status = fmt.Sprintf("running MLX version: %s", strings.TrimSpace(string(output)))
+	}
+
+	return nil
 }
 
 // Run implements inference.Backend.Run.
-func (m *mlx) Run(ctx context.Context, socket, model string, modelRef string, mode inference.BackendMode, config *inference.BackendConfiguration) error {
-	// TODO: Implement.
-	m.log.Warn("MLX backend is not yet supported")
-	return errors.New("not implemented")
+func (m *mlx) Run(ctx context.Context, socket, model string, modelRef string, mode inference.BackendMode, backendConfig *inference.BackendConfiguration) error {
+	bundle, err := m.modelManager.GetBundle(model)
+	if err != nil {
+		return fmt.Errorf("failed to get model: %w", err)
+	}
+
+	args, err := m.config.GetArgs(bundle, socket, mode, backendConfig)
+	if err != nil {
+		return fmt.Errorf("failed to get MLX arguments: %w", err)
+	}
+
+	// Add served model name
+	args = append(args, "--served-model-name", model, modelRef)
+
+	return backends.RunBackend(ctx, backends.RunnerConfig{
+		BackendName:     "MLX",
+		Socket:          socket,
+		BinaryPath:      m.pythonPath,
+		SandboxPath:     "",
+		SandboxConfig:   "",
+		Args:            args,
+		Logger:          m.log,
+		ServerLogWriter: m.serverLog.Writer(),
+	})
 }
 
 func (m *mlx) Status() string {
-	return "not running"
+	return m.status
 }
 
 func (m *mlx) GetDiskUsage() (int64, error) {
+	// MLX doesn't have a dedicated installation directory
+	// It's installed via pip in the system Python environment
 	return 0, nil
 }
 
 func (m *mlx) GetRequiredMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (inference.RequiredMemory, error) {
+	// TODO: Implement accurate memory estimation based on model size.
+	// MLX runs on unified memory architecture (Apple Silicon), so memory estimation
+	// will need to account for the unified nature of RAM and VRAM on Apple Silicon.
+	// Returning an error prevents the scheduler from making incorrect decisions based
+	// on placeholder values.
 	return inference.RequiredMemory{}, errors.New("not implemented")
 }
@@ -0,0 +1,84 @@
+package mlx
+
+import (
+	"fmt"
+	"path/filepath"
+	"strconv"
+
+	"github.com/docker/model-runner/pkg/distribution/types"
+	"github.com/docker/model-runner/pkg/inference"
+)
+
+// Config is the configuration for the MLX backend.
+type Config struct {
+	// Args are the base arguments that are always included.
+	Args []string
+}
+
+// NewDefaultMLXConfig creates a new MLXConfig with default values.
+func NewDefaultMLXConfig() *Config {
+	return &Config{
+		Args: []string{},
+	}
+}
+
+// GetArgs implements BackendConfig.GetArgs.
+func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) {
+	// Start with the arguments from MLXConfig
+	args := append([]string{}, c.Args...)
+
+	// MLX uses Python module: python -m mlx_lm.server
+	args = append(args, "-m", "mlx_lm.server")
+
+	// Add model path (MLX works with safetensors format)
+	safetensorsPath := bundle.SafetensorsPath()
+	if safetensorsPath == "" {
+		return nil, fmt.Errorf("safetensors path required by MLX backend")
+	}
+	modelPath := filepath.Dir(safetensorsPath)
+
+	// Add model and socket arguments
+	args = append(args, "--model", modelPath, "--host", socket)
+
+	// Add mode-specific arguments
+	switch mode {
+	case inference.BackendModeCompletion:
+		// Default mode for MLX
+	case inference.BackendModeEmbedding:
+		// MLX doesn't have a specific embedding flag - embedding models are detected automatically
+	case inference.BackendModeReranking:
+		// MLX may not support reranking mode
+		return nil, fmt.Errorf("reranking mode not supported by MLX backend")
+	default:
+		return nil, fmt.Errorf("unsupported backend mode %q", mode)
+	}
+
+	// Add max-tokens if specified in model config or backend config
+	if maxLen := GetMaxTokens(bundle.RuntimeConfig(), config); maxLen != nil {
+		args = append(args, "--max-tokens", strconv.FormatUint(*maxLen, 10))
+	}
+
+	// Add arguments from backend config
+	if config != nil {
+		args = append(args, config.RuntimeFlags...)
+	}
+
+	return args, nil
+}
+
+// GetMaxTokens returns the max tokens (context size) from model config or backend config.
+// Model config takes precedence over backend config.
+// Returns nil if neither is specified (MLX will use model defaults).
+func GetMaxTokens(modelCfg types.Config, backendCfg *inference.BackendConfiguration) *uint64 {
+	// Model config takes precedence
+	if modelCfg.ContextSize != nil {
+		return modelCfg.ContextSize
+	}
+	// else use backend config
+	if backendCfg != nil && backendCfg.ContextSize > 0 {
+		val := uint64(backendCfg.ContextSize)
+		return &val
+	}
+	// Return nil to let MLX use model defaults
+	return nil
+}