@@ -29,6 +29,7 @@ import (
2929
3030 modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
3131 "github.com/aibrix/aibrix/pkg/cache"
32+ "github.com/aibrix/aibrix/pkg/config"
3233 "github.com/aibrix/aibrix/pkg/controller/modeladapter/scheduling"
3334 "github.com/aibrix/aibrix/pkg/utils"
3435 corev1 "k8s.io/api/core/v1"
@@ -83,6 +84,18 @@ const (
8384 ModelAdapterAvailable = "ModelAdapterAvailable"
8485 // ModelAdapterUnavailable is added in a ModelAdapter when it doesn't have any pod hosting it.
8586 ModelAdapterUnavailable = "ModelAdapterUnavailable"
87+
88+ // Inference Service path and ports
89+ DefaultInferenceEnginePort = "8000"
90+ DefaultDebugInferenceEnginePort = "30081"
91+ DefaultRuntimeAPIPort = "8080"
92+
93+ ModelListPath = "/v1/models"
94+ ModelListRuntimeAPIPath = "/v1/models"
95+ LoadLoraAdapterPath = "/v1/load_lora_adapter"
96+ LoadLoraRuntimeAPIPath = "/v1/lora_adapter/load"
97+ UnloadLoraAdapterPath = "/v1/unload_lora_adapter"
98+ UnloadLoraRuntimeAPIPath = "/v1/lora_adapter/unload"
8699)
87100
88101var (
@@ -92,18 +105,25 @@ var (
92105 defaultRequeueDuration = 3 * time .Second
93106)
94107
108+ type URLConfig struct {
109+ BaseURL string
110+ ListModelsURL string
111+ LoadAdapterURL string
112+ UnloadAdapterURL string
113+ }
114+
95115// Add creates a new ModelAdapter Controller and adds it to the Manager with default RBAC.
96116// The Manager will set fields on the Controller and Start it when the Manager is Started.
97- func Add (mgr manager.Manager ) error {
98- r , err := newReconciler (mgr )
117+ func Add (mgr manager.Manager , runtimeConfig config. RuntimeConfig ) error {
118+ r , err := newReconciler (mgr , runtimeConfig )
99119 if err != nil {
100120 return err
101121 }
102122 return add (mgr , r )
103123}
104124
105125// newReconciler returns a new reconcile.Reconciler
106- func newReconciler (mgr manager.Manager ) (reconcile.Reconciler , error ) {
126+ func newReconciler (mgr manager.Manager , runtimeConfig config. RuntimeConfig ) (reconcile.Reconciler , error ) {
107127 cacher := mgr .GetCache ()
108128
109129 podInformer , err := cacher .GetInformer (context .TODO (), & corev1.Pod {})
@@ -146,6 +166,7 @@ func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) {
146166 EndpointSliceLister : endpointSliceLister ,
147167 Recorder : mgr .GetEventRecorderFor (controllerName ),
148168 scheduler : scheduler ,
169+ RuntimeConfig : runtimeConfig ,
149170 }
150171 return reconciler , nil
151172}
@@ -227,6 +248,7 @@ type ModelAdapterReconciler struct {
227248 ServiceLister corelisters.ServiceLister
228249 // EndpointSliceLister is able to list/get services from a shared informer's cache store
229250 EndpointSliceLister discoverylisters.EndpointSliceLister
251+ RuntimeConfig config.RuntimeConfig
230252}
231253
232254//+kubebuilder:rbac:groups=discovery.k8s.io,resources=endpointslices,verbs=get;list;watch;create;update;patch;delete
@@ -517,17 +539,10 @@ func (r *ModelAdapterReconciler) reconcileLoading(ctx context.Context, instance
517539 return nil
518540 }
519541
520- // Define the key you want to check
521- key := "DEBUG_MODE"
522- value , exists := getEnvKey (key )
523- host := fmt .Sprintf ("http://%s:8000" , targetPod .Status .PodIP )
524- if exists && value == "on" {
525- // 30080 is the nodePort of the base model service.
526- host = fmt .Sprintf ("http://%s:30081" , "localhost" )
527- }
542+ urls := BuildURLs (targetPod .Status .PodIP , r .RuntimeConfig )
528543
529544 // Check if the model is already loaded
530- exists , err = r .modelAdapterExists (host , instance )
545+ exists , err : = r .modelAdapterExists (urls . ListModelsURL , instance )
531546 if err != nil {
532547 return err
533548 }
@@ -537,7 +552,7 @@ func (r *ModelAdapterReconciler) reconcileLoading(ctx context.Context, instance
537552 }
538553
539554 // Load the Model adapter
540- err = r .loadModelAdapter (host , instance )
555+ err = r .loadModelAdapter (urls . LoadAdapterURL , instance )
541556 if err != nil {
542557 return err
543558 }
@@ -546,10 +561,7 @@ func (r *ModelAdapterReconciler) reconcileLoading(ctx context.Context, instance
546561}
547562
548563// Separate method to check if the model already exists
549- func (r * ModelAdapterReconciler ) modelAdapterExists (host string , instance * modelv1alpha1.ModelAdapter ) (bool , error ) {
550- // TODO: /v1/models is the vllm entrypoints, let's support multiple engine in future
551- url := fmt .Sprintf ("%s/v1/models" , host )
552-
564+ func (r * ModelAdapterReconciler ) modelAdapterExists (url string , instance * modelv1alpha1.ModelAdapter ) (bool , error ) {
553565 req , err := http .NewRequest ("GET" , url , nil )
554566 if err != nil {
555567 return false , err
@@ -599,10 +611,11 @@ func (r *ModelAdapterReconciler) modelAdapterExists(host string, instance *model
599611}
600612
601613// Separate method to load the LoRA adapter
602- func (r * ModelAdapterReconciler ) loadModelAdapter (host string , instance * modelv1alpha1.ModelAdapter ) error {
614+ func (r * ModelAdapterReconciler ) loadModelAdapter (url string , instance * modelv1alpha1.ModelAdapter ) error {
603615 artifactURL := instance .Spec .ArtifactURL
604616 if strings .HasPrefix (instance .Spec .ArtifactURL , "huggingface://" ) {
605- artifactURL , err := extractHuggingFacePath (instance .Spec .ArtifactURL )
617+ var err error
618+ artifactURL , err = extractHuggingFacePath (instance .Spec .ArtifactURL )
606619 if err != nil {
607620 // Handle error, e.g., log it and return
608621 klog .ErrorS (err , "Invalid artifact URL" , "artifactURL" , artifactURL )
@@ -620,7 +633,6 @@ func (r *ModelAdapterReconciler) loadModelAdapter(host string, instance *modelv1
620633 return err
621634 }
622635
623- url := fmt .Sprintf ("%s/v1/load_lora_adapter" , host )
624636 req , err := http .NewRequest ("POST" , url , bytes .NewBuffer (payloadBytes ))
625637 if err != nil {
626638 return err
@@ -682,15 +694,8 @@ func (r *ModelAdapterReconciler) unloadModelAdapter(instance *modelv1alpha1.Mode
682694 return err
683695 }
684696
685- url := fmt .Sprintf ("http://%s:%d/v1/unload_lora_adapter" , targetPod .Status .PodIP , 8000 )
686- key := "DEBUG_MODE"
687- value , exists := getEnvKey (key )
688- if exists && value == "on" {
689- // 30080 is the nodePort of the base model service.
690- url = "http://localhost:30081/v1/unload_lora_adapter"
691- }
692-
693- req , err := http .NewRequest ("POST" , url , bytes .NewBuffer (payloadBytes ))
697+ urls := BuildURLs (targetPod .Status .PodIP , r .RuntimeConfig )
698+ req , err := http .NewRequest ("POST" , urls .UnloadAdapterURL , bytes .NewBuffer (payloadBytes ))
694699 if err != nil {
695700 return err
696701 }
0 commit comments