Skip to content

feat: error handler improvements #1033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 15, 2022
10 changes: 8 additions & 2 deletions docs/documentation/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,9 @@ In order to facilitate error reporting Reconciler can implement the following
[interface](https://github.com/java-operator-sdk/java-operator-sdk/blob/main/operator-framework-core/src/main/java/io/javaoperatorsdk/operator/api/reconciler/ErrorStatusHandler.java):

```java
public interface ErrorStatusHandler<T extends HasMetadata> {
public interface ErrorStatusHandler<P extends HasMetadata> {

Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
ErrorStatusUpdateControl<P> updateErrorStatus(P resource, Context<P> context, Exception e);

}
```
Expand All @@ -294,6 +294,12 @@ will also produce an event, and will result in a reconciliation if the controlle
The scope of this feature is only the `reconcile` method of the reconciler, since there should not be updates on custom
resource after it is marked for deletion.

Retry can be skipped for the cases of unrecoverable errors:

```java
ErrorStatusUpdateControl.updateStatus(customResource).withNoRetry();
```

### Correctness and Automatic Retries

There is a possibility to turn off the automatic retries. This is not desirable, unless there is a very specific reason.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.util.Map;
import java.util.Optional;

import io.javaoperatorsdk.operator.OperatorException;
import io.javaoperatorsdk.operator.api.monitoring.Metrics;
import io.javaoperatorsdk.operator.api.reconciler.RetryInfo;
import io.javaoperatorsdk.operator.processing.event.Event;
Expand Down Expand Up @@ -33,7 +34,13 @@ public <T> T timeControllerExecution(ControllerExecution<T> execution) {
.publishPercentileHistogram()
.register(registry);
try {
final var result = timer.record(execution::execute);
final var result = timer.record(() -> {
try {
return execution.execute();
} catch (Exception e) {
throw new OperatorException(e);
}
});
final var successType = execution.successTypeName(result);
registry
.counter(execName + ".success", "controller", name, "type", successType)
Expand All @@ -58,6 +65,7 @@ public void cleanupDoneFor(ResourceID customResourceUid) {
incrementCounter(customResourceUid, "events.delete");
}

@Override
public void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfoNullable) {
Optional<RetryInfo> retryInfo = Optional.ofNullable(retryInfoNullable);
incrementCounter(resourceID, RECONCILIATIONS + "started",
Expand All @@ -72,7 +80,7 @@ public void finishedReconciliation(ResourceID resourceID) {
incrementCounter(resourceID, RECONCILIATIONS + "success");
}

public void failedReconciliation(ResourceID resourceID, RuntimeException exception) {
public void failedReconciliation(ResourceID resourceID, Exception exception) {
var cause = exception.getCause();
if (cause == null) {
cause = exception;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ public OperatorException(String message) {
super(message);
}

public OperatorException(Throwable cause) {
super(cause);
}

public OperatorException(String message, Throwable cause) {
super(message, cause);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ default void receivedEvent(Event event) {}

default void reconcileCustomResource(ResourceID resourceID, RetryInfo retryInfo) {}

default void failedReconciliation(ResourceID resourceID, RuntimeException exception) {}
default void failedReconciliation(ResourceID resourceID, Exception exception) {}

default void cleanupDoneFor(ResourceID customResourceUid) {}

Expand All @@ -27,10 +27,10 @@ interface ControllerExecution<T> {

String successTypeName(T result);

T execute();
T execute() throws Exception;
}

default <T> T timeControllerExecution(ControllerExecution<T> execution) {
default <T> T timeControllerExecution(ControllerExecution<T> execution) throws Exception {
return execution.execute();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

public class DefaultContext<P extends HasMetadata> implements Context<P> {

private final RetryInfo retryInfo;
private RetryInfo retryInfo;
private final Controller<P> controller;
private final P primaryResource;
private final ControllerConfiguration<P> controllerConfiguration;
Expand Down Expand Up @@ -44,4 +44,9 @@ public ControllerConfiguration<P> getControllerConfiguration() {
public ManagedDependentResourceContext managedDependentResourceContext() {
return managedDependentResourceContext;
}

public DefaultContext<P> setRetryInfo(RetryInfo retryInfo) {
this.retryInfo = retryInfo;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
package io.javaoperatorsdk.operator.api.reconciler;

import java.util.Optional;

import io.fabric8.kubernetes.api.model.HasMetadata;

public interface ErrorStatusHandler<T extends HasMetadata> {
public interface ErrorStatusHandler<P extends HasMetadata> {

/**
* <p>
* Reconciler can implement this interface in order to update the status sub-resource in the case
* an exception in thrown. In that case
* {@link #updateErrorStatus(HasMetadata, RetryInfo, RuntimeException)} is called automatically.
* {@link #updateErrorStatus(HasMetadata, Context, Exception)} is called automatically.
* <p>
* The result of the method call is used to make a status update on the custom resource. This is
* always a sub-resource update request, so no update on custom resource itself (like spec of
Expand All @@ -21,10 +19,10 @@ public interface ErrorStatusHandler<T extends HasMetadata> {
* should not be updates on custom resource after it is marked for deletion.
*
* @param resource to update the status on
* @param retryInfo the current retry status
* @param context the current context
* @param e exception thrown from the reconciler
* @return the updated resource
*/
Optional<T> updateErrorStatus(T resource, RetryInfo retryInfo, RuntimeException e);
ErrorStatusUpdateControl<P> updateErrorStatus(P resource, Context<P> context, Exception e);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package io.javaoperatorsdk.operator.api.reconciler;

import java.util.Optional;

import io.fabric8.kubernetes.api.model.HasMetadata;

public class ErrorStatusUpdateControl<P extends HasMetadata> {

private final P resource;
private boolean noRetry = false;

public static <T extends HasMetadata> ErrorStatusUpdateControl<T> updateStatus(T resource) {
return new ErrorStatusUpdateControl<>(resource);
}

public static <T extends HasMetadata> ErrorStatusUpdateControl<T> noStatusUpdate() {
return new ErrorStatusUpdateControl<>(null);
}

private ErrorStatusUpdateControl(P resource) {
this.resource = resource;
}

/**
* Instructs the controller to not retry the error. This is useful for non-recoverable errors.
*
* @return ErrorStatusUpdateControl
*/
public ErrorStatusUpdateControl<P> withNoRetry() {
this.noRetry = true;
return this;
}

public Optional<P> getResource() {
return Optional.ofNullable(resource);
}

public boolean isNoRetry() {
return noRetry;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public interface Reconciler<R extends HasMetadata> {
* @return UpdateControl to manage updates on the custom resource (usually the status) after
* reconciliation.
*/
UpdateControl<R> reconcile(R resource, Context<R> context);
UpdateControl<R> reconcile(R resource, Context<R> context) throws Exception;

/**
* Note that this method is used in combination with finalizers. If automatic finalizer handling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,32 +60,36 @@ public Controller(Reconciler<P> reconciler,
public DeleteControl cleanup(P resource, Context<P> context) {
dependents.cleanup(resource, context);

return metrics().timeControllerExecution(
new ControllerExecution<>() {
@Override
public String name() {
return "cleanup";
}
try {
return metrics().timeControllerExecution(
new ControllerExecution<>() {
@Override
public String name() {
return "cleanup";
}

@Override
public String controllerName() {
return configuration.getName();
}
@Override
public String controllerName() {
return configuration.getName();
}

@Override
public String successTypeName(DeleteControl deleteControl) {
return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved";
}
@Override
public String successTypeName(DeleteControl deleteControl) {
return deleteControl.isRemoveFinalizer() ? "delete" : "finalizerNotRemoved";
}

@Override
public DeleteControl execute() {
return reconciler.cleanup(resource, context);
}
});
@Override
public DeleteControl execute() {
return reconciler.cleanup(resource, context);
}
});
} catch (Exception e) {
throw new OperatorException(e);
}
}

@Override
public UpdateControl<P> reconcile(P resource, Context<P> context) {
public UpdateControl<P> reconcile(P resource, Context<P> context) throws Exception {
dependents.reconcile(resource, context);

return metrics().timeControllerExecution(
Expand Down Expand Up @@ -113,7 +117,7 @@ public String successTypeName(UpdateControl<P> result) {
}

@Override
public UpdateControl<P> execute() {
public UpdateControl<P> execute() throws Exception {
return reconciler.reconcile(resource, context);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ TimerEventSource<R> retryEventSource() {
* according to the retry timing if there was an exception.
*/
private void handleRetryOnException(
ExecutionScope<R> executionScope, RuntimeException exception) {
ExecutionScope<R> executionScope, Exception exception) {
RetryExecution execution = getOrInitRetryExecution(executionScope);
var customResourceID = executionScope.getCustomResourceID();
boolean eventPresent = eventMarker.eventPresent(customResourceID);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ final class PostExecutionControl<R extends HasMetadata> {

private final boolean onlyFinalizerHandled;
private final R updatedCustomResource;
private final RuntimeException runtimeException;
private final Exception runtimeException;

private Long reScheduleDelay = null;

private PostExecutionControl(
boolean onlyFinalizerHandled,
R updatedCustomResource,
RuntimeException runtimeException) {
Exception runtimeException) {
this.onlyFinalizerHandled = onlyFinalizerHandled;
this.updatedCustomResource = updatedCustomResource;
this.runtimeException = runtimeException;
Expand All @@ -35,7 +35,7 @@ public static <R extends HasMetadata> PostExecutionControl<R> customResourceUpda
}

public static <R extends HasMetadata> PostExecutionControl<R> exceptionDuringExecution(
RuntimeException exception) {
Exception exception) {
return new PostExecutionControl<>(false, null, exception);
}

Expand All @@ -60,7 +60,7 @@ public PostExecutionControl<R> withReSchedule(long delay) {
return this;
}

public Optional<RuntimeException> getRuntimeException() {
public Optional<Exception> getRuntimeException() {
return Optional.ofNullable(runtimeException);
}

Expand Down
Loading