Adjust autoscaler interface

Jeffwan · Jeffwan · commit 4a125c276da5 · 2024-08-21T16:20:12.000+08:00
diff --git a/pkg/controller/podautoscaler/podautoscaler_controller.go b/pkg/controller/podautoscaler/podautoscaler_controller.go
@@ -21,6 +21,8 @@ import (
 	"fmt"
 	"time"
 
+	"github.com/aibrix/aibrix/pkg/controller/podautoscaler/scaler"
+
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	apiequality "k8s.io/apimachinery/pkg/api/equality"
 	apimeta "k8s.io/apimachinery/pkg/api/meta"
@@ -90,6 +92,7 @@ type PodAutoscalerReconciler struct {
 	Scheme        *runtime.Scheme
 	EventRecorder record.EventRecorder
 	Mapper        apimeta.RESTMapper
+	Autoscaler    scaler.Scaler
 }
 
 //+kubebuilder:rbac:groups=autoscaling.aibrix.ai,resources=podautoscalers,verbs=get;list;watch;create;update;patch;delete
@@ -189,7 +192,6 @@ func (r *PodAutoscalerReconciler) reconcileKPA(ctx context.Context, pa autoscali
 		Group: targetGV.Group,
 		Kind:  pa.Spec.ScaleTargetRef.Kind,
 	}
-
 	mappings, err := r.Mapper.RESTMappings(targetGK)
 	if err != nil {
 		r.EventRecorder.Event(&pa, corev1.EventTypeWarning, "FailedGetScale", err.Error())
@@ -201,7 +203,7 @@ func (r *PodAutoscalerReconciler) reconcileKPA(ctx context.Context, pa autoscali
 	}
 
 	// TODO: retrieval targetGR for future scale update
-	scale, _, err := r.scaleForResourceMappings(ctx, pa.Namespace, pa.Spec.ScaleTargetRef.Name, mappings)
+	scale, targetGR, err := r.scaleForResourceMappings(ctx, pa.Namespace, pa.Spec.ScaleTargetRef.Name, mappings)
 	if err != nil {
 		r.EventRecorder.Event(&pa, corev1.EventTypeWarning, "FailedGetScale", err.Error())
 		setCondition(&pa, "AbleToScale", metav1.ConditionFalse, "FailedGetScale", "the HPA controller was unable to get the target's current scale: %v", err)
@@ -271,24 +273,27 @@ func (r *PodAutoscalerReconciler) reconcileKPA(ctx context.Context, pa autoscali
 
 	if rescale {
 		scale.Spec.Replicas = desiredReplicas
-		// TODO: invoke scale interface to scale the scaleTarget
-		// no need to use targetGR?
 		r.EventRecorder.Eventf(&pa, corev1.EventTypeWarning, "FailedRescale", "New size: %d; reason: %s; error: %v", desiredReplicas, rescaleReason, err.Error())
 		setCondition(&pa, "AbleToScale", metav1.ConditionFalse, "FailedUpdateScale", "the HPA controller was unable to update the target scale: %v", err)
 		r.setCurrentReplicasAndMetricsInStatus(&pa, currentReplicas)
 		if err := r.updateStatusIfNeeded(ctx, paStatusOriginal, &pa); err != nil {
 			utilruntime.HandleError(err)
 		}
-		if err := r.Client.SubResource("scale").Update(ctx, scale); err != nil {
+
+		if err := r.updateScale(ctx, pa.Namespace, targetGR, scale); err != nil {
 			return ctrl.Result{}, fmt.Errorf("failed to rescale %s: %v", scaleReference, err)
 		}
 
+		// which way to go?. not sure the best practice in controller-runtime
+		//if err := r.Client.SubResource("scale").Update(ctx, scale); err != nil {
+		//	return ctrl.Result{}, fmt.Errorf("failed to rescale %s: %v", scaleReference, err)
+		//}
+
 		logger.Info("Successfully rescaled",
 			//"PodAutoscaler", klog.KObj(pa),
 			"currentReplicas", currentReplicas,
 			"desiredReplicas", desiredReplicas,
 			"reason", rescaleReason)
-
 	}
 
 	if err := r.updateStatusIfNeeded(ctx, paStatusOriginal, &pa); err != nil {
@@ -339,9 +344,9 @@ func (r *PodAutoscalerReconciler) scaleForResourceMappings(ctx context.Context,
 	return nil, schema.GroupResource{}, firstErr
 }
 
-func updateScale(ctx context.Context, c client.Client, namespace string, targetGR schema.GroupResource, scale *autoscalingv1.Scale) error {
+func (r *PodAutoscalerReconciler) updateScale(ctx context.Context, namespace string, targetGR schema.GroupResource, scale *autoscalingv1.Scale) error {
 	// Get GVK
-	gvk, err := apiutil.GVKForObject(scale, c.Scheme())
+	gvk, err := apiutil.GVKForObject(scale, r.Client.Scheme())
 	if err != nil {
 		return err
 	}
@@ -353,8 +358,8 @@ func updateScale(ctx context.Context, c client.Client, namespace string, targetG
 	scaleObj.SetName(scale.Name)
 
 	// Update scale object
-	// TODO: change to kind name later.
-	err = c.Patch(ctx, scale, client.Apply, client.FieldOwner("operator-name"))
+	//err = r.Client.Patch(ctx, scale, client.Apply, client.FieldOwner("operator-name"))
+	err = r.Client.Patch(ctx, scale, client.Apply)
 	if err != nil {
 		return err
 	}
@@ -416,9 +421,11 @@ func (r *PodAutoscalerReconciler) updateStatus(ctx context.Context, pa *autoscal
 // when some metrics still work and HPA should perform scaling based on them.
 // If PodAutoscaler cannot do anything due to error, it returns -1 in metricDesiredReplicas as a failure signal.
 func (r *PodAutoscalerReconciler) computeReplicasForMetrics(ctx context.Context, pa autoscalingv1alpha1.PodAutoscaler, scale *autoscalingv1.Scale) (replicas int32, metrics string, timestamp time.Time, err error) {
-	panic("not implemented")
-}
+	currentTimestamp := time.Now()
+	scaleResult := r.Autoscaler.Scale(0, 0, currentTimestamp)
+	if scaleResult.ScaleValid {
+		return scaleResult.DesiredPodCount, "", currentTimestamp, nil
+	}
 
-// TODO: define the condition type to reconcile.
-// PodAutoscalerConditionType are the valid conditions of a PodAutoscaler.
-type PodAutoscalerConditionType string
+	return 0, "", currentTimestamp, fmt.Errorf("can not calculate metrics for scale %s", scale.Name)
+}
diff --git a/pkg/controller/podautoscaler/scaler/interfaces.go b/pkg/controller/podautoscaler/scaler/interfaces.go
@@ -19,6 +19,9 @@ package scaler
 import (
 	"sync"
 	"time"
+
+	"github.com/aibrix/aibrix/pkg/controller/podautoscaler/metrics"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
 /**
@@ -35,17 +38,17 @@ Our implementation specifically mimics and adapts the autoscaling functionality
 
 // Autoscaler represents an instance of the autoscaling engine.
 // It encapsulates all the necessary data and state needed for scaling decisions.
-// Refer to:  KpaScaler
+// Refer to:  KpaAutoscaler
 type Autoscaler struct {
 	// specMux guards the current DeciderSpec.
-	specMux     sync.RWMutex
-	podCounter  int
-	deciderSpec *DeciderSpec
-	Status      DeciderStatus
+	specMux        sync.RWMutex
+	metricsClient  metrics.MetricsClient
+	resourceClient client.Client
+	scaler         Scaler
 }
 
 // Scaler is an interface that defines the scaling operations.
-// Any autoscaler implementation, such as KpaScaler (Kubernetes Pod Autoscaler),
+// Any autoscaler implementation, such as KpaAutoscaler (Kubernetes Pod Autoscaler),
 // needs to implement this interface to respond to scale events.
 type Scaler interface {
 	// Scale calculates the necessary scaling action based on the observed metrics
@@ -59,62 +62,10 @@ type Scaler interface {
 	// Returns:
 	// ScaleResult which contains the recommended number of pods to scale up or down to.
 	//
-	// Refer to:  KpaScaler.Scale Implementation
+	// Refer to:  KpaAutoscaler.Scale Implementation
 	Scale(observedStableValue float64, observedPanicValue float64, now time.Time) ScaleResult
 }
 
-// DeciderSpec defines parameters for scaling decisions.
-type DeciderSpec struct {
-	// Maximum rate at which to scale up
-	MaxScaleUpRate float64
-	// Maximum rate at which to scale down
-	MaxScaleDownRate float64
-	// The metric used for scaling, i.e. CPU, Memory, QPS.
-	ScalingMetric string
-	// The value of scaling metric per pod that we target to maintain.
-	TargetValue float64
-	// The total value of scaling metric that a pod can maintain.
-	TotalValue float64
-	// The burst capacity that user wants to maintain without queuing at the POD level.
-	// Note, that queueing still might happen due to the non-ideal load balancing.
-	TargetBurstCapacity float64
-	// ActivationScale is the minimum, non-zero value that a service should scale to.
-	// For example, if ActivationScale = 2, when a service scaled from zero it would
-	// scale up two replicas in this case. In essence, this allows one to set both a
-	// min-scale value while also preserving the ability to scale to zero.
-	// ActivationScale must be >= 2.
-	ActivationScale int32
-
-	// TODO: Note that the following attributes are specific to Knative; but we retain them here temporarily.
-	// PanicThreshold is the threshold at which panic mode is entered. It represents
-	// a factor of the currently observed load over the panic window over the ready
-	// pods. I.e. if this is 2, panic mode will be entered if the observed metric
-	// is twice as high as the current population can handle.
-	PanicThreshold float64
-	// StableWindow is needed to determine when to exit panic mode.
-	StableWindow time.Duration
-	// ScaleDownDelay is the time that must pass at reduced concurrency before a
-	// scale-down decision is applied.
-	ScaleDownDelay time.Duration
-}
-
-// DeciderStatus is the current scale recommendation.
-type DeciderStatus struct {
-	// DesiredScale is the target number of instances that autoscaler
-	// this revision needs.
-	DesiredScale int32
-
-	// TODO: ExcessBurstCapacity might be a general attribute since it describes
-	//  how much capacity users want to keep for preparing for burst traffic.
-
-	// ExcessBurstCapacity is the difference between spare capacity
-	// (how much more load the pods in the revision deployment can take before being
-	// overloaded) and the configured target burst capacity.
-	// If this number is negative: Activator will be threaded in
-	// the request path by the PodAutoscaler controller.
-	ExcessBurstCapacity int32
-}
-
 // ScaleResult contains the results of a scaling decision.
 type ScaleResult struct {
 	// DesiredPodCount is the number of pods Autoscaler suggests for the revision.
diff --git a/pkg/controller/podautoscaler/scaler/kpa.go b/pkg/controller/podautoscaler/scaler/kpa.go
@@ -26,11 +26,11 @@ import (
 )
 
 /**
-This implementation of the algorithm is based on both the Knative KpaScaler code and its documentation.
+This implementation of the algorithm is based on both the Knative KpaAutoscaler code and its documentation.
 
-According to Knative documentation, the KpaScaler Scale policy includes both a stable mode and a panic mode.
-If the metric usage does not exceed the panic threshold, KpaScaler tries to align the per-pod metric usage with the stable target value.
-If metric usage exceeds the panic target during the panic window, KpaScaler enters panic mode and tries to maintain the per-pod metric usage at the panic target.
+According to Knative documentation, the KpaAutoscaler Scale policy includes both a stable mode and a panic mode.
+If the metric usage does not exceed the panic threshold, KpaAutoscaler tries to align the per-pod metric usage with the stable target value.
+If metric usage exceeds the panic target during the panic window, KpaAutoscaler enters panic mode and tries to maintain the per-pod metric usage at the panic target.
 If the metric no longer exceeds the panic threshold, exit the panic mode.
 
                                                        |
@@ -47,45 +47,100 @@ If the metric no longer exceeds the panic threshold, exit the panic mode.
 
 */
 
-type KpaScaler struct {
-	scaler       *Autoscaler
+// DeciderSpec defines parameters for scaling decisions.
+type DeciderSpec struct {
+	// Maximum rate at which to scale up
+	MaxScaleUpRate float64
+	// Maximum rate at which to scale down
+	MaxScaleDownRate float64
+	// The metric used for scaling, i.e. CPU, Memory, QPS.
+	ScalingMetric string
+	// The value of scaling metric per pod that we target to maintain.
+	TargetValue float64
+	// The total value of scaling metric that a pod can maintain.
+	TotalValue float64
+	// The burst capacity that user wants to maintain without queuing at the POD level.
+	// Note, that queueing still might happen due to the non-ideal load balancing.
+	TargetBurstCapacity float64
+	// ActivationScale is the minimum, non-zero value that a service should scale to.
+	// For example, if ActivationScale = 2, when a service scaled from zero it would
+	// scale up two replicas in this case. In essence, this allows one to set both a
+	// min-scale value while also preserving the ability to scale to zero.
+	// ActivationScale must be >= 2.
+	ActivationScale int32
+
+	// TODO: Note that the following attributes are specific to Knative; but we retain them here temporarily.
+	// PanicThreshold is the threshold at which panic mode is entered. It represents
+	// a factor of the currently observed load over the panic window over the ready
+	// pods. I.e. if this is 2, panic mode will be entered if the observed metric
+	// is twice as high as the current population can handle.
+	PanicThreshold float64
+	// StableWindow is needed to determine when to exit panic mode.
+	StableWindow time.Duration
+	// ScaleDownDelay is the time that must pass at reduced concurrency before a
+	// scale-down decision is applied.
+	ScaleDownDelay time.Duration
+}
+
+// DeciderStatus is the current scale recommendation.
+type DeciderStatus struct {
+	// DesiredScale is the target number of instances that autoscaler
+	// this revision needs.
+	DesiredScale int32
+
+	// TODO: ExcessBurstCapacity might be a general attribute since it describes
+	//  how much capacity users want to keep for preparing for burst traffic.
+
+	// ExcessBurstCapacity is the difference between spare capacity
+	// (how much more load the pods in the revision deployment can take before being
+	// overloaded) and the configured target burst capacity.
+	// If this number is negative: Activator will be threaded in
+	// the request path by the PodAutoscaler controller.
+	ExcessBurstCapacity int32
+}
+
+type KpaAutoscaler struct {
+	*Autoscaler
 	panicTime    time.Time
 	maxPanicPods int32
 	delayWindow  *aggregation.TimeWindow
+	podCounter   int
+	deciderSpec  *DeciderSpec
+	Status       DeciderStatus
 }
 
-func NewKpaScaler(readyPodsCount int, spec *DeciderSpec, panicTime time.Time,
-	maxPanicPods int32, delayWindow *aggregation.TimeWindow) (*KpaScaler, error) {
+var _ Scaler = (*KpaAutoscaler)(nil)
+
+func NewKpaAutoscaler(readyPodsCount int, spec *DeciderSpec, panicTime time.Time,
+	maxPanicPods int32, delayWindow *aggregation.TimeWindow) (*KpaAutoscaler, error) {
 	if spec == nil {
 		return nil, errors.New("spec cannot be nil")
 	}
 	if delayWindow == nil {
 		return nil, errors.New("delayWindow cannot be nil")
 	}
-	scaler := &Autoscaler{
-		podCounter:  readyPodsCount,
-		deciderSpec: spec,
-	}
-	return &KpaScaler{
-		scaler:       scaler,
+	autoscaler := &Autoscaler{}
+	return &KpaAutoscaler{
+		Autoscaler:   autoscaler,
+		podCounter:   readyPodsCount,
 		panicTime:    panicTime,
 		maxPanicPods: maxPanicPods,
 		delayWindow:  delayWindow,
+		deciderSpec:  spec,
 	}, nil
 }
 
-// Scale implements Scaler interface in KpaScaler.
-func (k *KpaScaler) Scale(observedStableValue float64, observedPanicValue float64, now time.Time) ScaleResult {
+// Scale implements Scaler interface in KpaAutoscaler.
+func (k *KpaAutoscaler) Scale(observedStableValue float64, observedPanicValue float64, now time.Time) ScaleResult {
 	/**
 	`observedStableValue` and `observedPanicValue` are calculated using different window sizes in the `MetricClient`.
 	 For reference, see the KNative implementation at `pkg/autoscaler/metrics/collector.go：185`.
 	*/
-	a := k.scaler
-	a.specMux.RLock()
-	spec := a.deciderSpec
-	a.specMux.RUnlock()
+	k.specMux.RLock()
+	spec := k.deciderSpec
+	k.specMux.RUnlock()
 
-	originalReadyPodsCount := a.podCounter
+	originalReadyPodsCount := k.podCounter
 	// Use 1 if there are zero current pods.
 	readyPodsCount := math.Max(1, float64(originalReadyPodsCount))
 
@@ -110,12 +165,12 @@ func (k *KpaScaler) Scale(observedStableValue float64, observedPanicValue float6
 	desiredPanicPodCount := int32(math.Min(math.Max(dppc, maxScaleDown), maxScaleUp))
 
 	//	If ActivationScale > 1, then adjust the desired pod counts
-	if a.deciderSpec.ActivationScale > 1 {
-		if dspc > 0 && a.deciderSpec.ActivationScale > desiredStablePodCount {
-			desiredStablePodCount = a.deciderSpec.ActivationScale
+	if k.deciderSpec.ActivationScale > 1 {
+		if dspc > 0 && k.deciderSpec.ActivationScale > desiredStablePodCount {
+			desiredStablePodCount = k.deciderSpec.ActivationScale
 		}
-		if dppc > 0 && a.deciderSpec.ActivationScale > desiredPanicPodCount {
-			desiredPanicPodCount = a.deciderSpec.ActivationScale
+		if dppc > 0 && k.deciderSpec.ActivationScale > desiredPanicPodCount {
+			desiredPanicPodCount = k.deciderSpec.ActivationScale
 		}
 	}
 
diff --git a/pkg/controller/podautoscaler/scaler/kpa_test.go b/pkg/controller/podautoscaler/scaler/kpa_test.go
@@ -25,7 +25,7 @@ import (
 )
 
 func TestScale(t *testing.T) {
-	kpaScaler, err := NewKpaScaler(5,
+	kpaScaler, err := NewKpaAutoscaler(5,
 		&DeciderSpec{
 			MaxScaleUpRate:   1.5,
 			MaxScaleDownRate: 0.75,
@@ -39,7 +39,7 @@ func TestScale(t *testing.T) {
 		time.Time{}, 10, aggregation.NewTimeWindow(30*time.Second, 1*time.Second),
 	)
 	if err != nil {
-		t.Errorf("Failed to create KpaScaler: %v", err)
+		t.Errorf("Failed to create KpaAutoscaler: %v", err)
 	}
 
 	observedStableValue := 120.0
diff --git a/pkg/controller/podautoscaler/scaler/scaler.go b/pkg/controller/podautoscaler/scaler/scaler.go
@@ -13,23 +13,9 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
-// ReplicasScaler bundles all needed information to calculate the target amount of replicas
-type ReplicasScaler struct {
-	metricsClient  metrics.MetricsClient
-	resourceClient client.Client
-	// put necessary configuration information there.
-}
-
-func newReplicasScaler(metricsClient metrics.MetricsClient, client client.Client) *ReplicasScaler {
-	return &ReplicasScaler{
-		metricsClient:  metricsClient,
-		resourceClient: client,
-	}
-}
-
-func (c *ReplicasScaler) getReadyPodsCount(namespace string, selector labels.Selector) (int64, error) {
+func (a *Autoscaler) getReadyPodsCount(namespace string, selector labels.Selector) (int64, error) {
 	podList := &v1.PodList{}
-	if err := c.resourceClient.List(context.Background(), podList,
+	if err := a.resourceClient.List(context.Background(), podList,
 		&client.ListOptions{Namespace: namespace, LabelSelector: selector}); err != nil {
 		return 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
 	}

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ import (`
`25`	`25`	`)`
`26`	`26`
`27`	`27`	`func TestScale(t *testing.T) {`
`28`		`- kpaScaler, err := NewKpaScaler(5,`
	`28`	`+ kpaScaler, err := NewKpaAutoscaler(5,`
`29`	`29`	`&DeciderSpec{`
`30`	`30`	`MaxScaleUpRate: 1.5,`
`31`	`31`	`MaxScaleDownRate: 0.75,`
`@@ -39,7 +39,7 @@ func TestScale(t *testing.T) {`
`39`	`39`	`time.Time{}, 10, aggregation.NewTimeWindow(30time.Second, 1time.Second),`
`40`	`40`	`)`
`41`	`41`	`if err != nil {`
`42`		`- t.Errorf("Failed to create KpaScaler: %v", err)`
	`42`	`+ t.Errorf("Failed to create KpaAutoscaler: %v", err)`
`43`	`43`	`}`
`44`	`44`
`45`	`45`	`observedStableValue := 120.0`