Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions pkg/distribution/internal/store/bundles.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ func (s *LocalStore) BundleForModel(ref string) (types.ModelBundle, error) {
return nil, fmt.Errorf("get model ID: %w", err)
}
path := s.bundlePath(dgst)
if bdl, err := bundle.Parse(path); err != nil {
bdl, err := bundle.Parse(path)
if err != nil {
// create for first time or replace bad/corrupted bundle
return s.createBundle(path, mdl)
} else {
return bdl, nil
}

return bdl, nil
}

// createBundle unpacks the bundle to path, replacing existing bundle if one is found
Expand Down
16 changes: 16 additions & 0 deletions pkg/inference/scheduling/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,22 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
}
}

// If no explicit config exists, create a default one with the model's context size
// so that the OpenAI recorder can report the actual configuration being used.
if runnerConfig == nil {
defaultConfig := inference.BackendConfiguration{}
if l.modelManager != nil {
if bundle, err := l.modelManager.GetBundle(modelID); err != nil {
l.log.Warnf("Failed to get bundle for model %s to determine default context size: %v", modelID, err)
} else if runtimeConfig := bundle.RuntimeConfig(); runtimeConfig != nil {
if ctxSize := runtimeConfig.GetContextSize(); ctxSize != nil {
defaultConfig.ContextSize = ctxSize
}
}
}
runnerConfig = &defaultConfig
}

l.log.Infof("Loading %s backend runner with model %s in %s mode", backendName, modelID, mode)

// Acquire the loader lock and defer its release.
Expand Down