Skip to content

Commit c327f8e

Browse files
authored
Merge branch 'main' into add-cvo-test-extension
2 parents 0946ee4 + 3ded19e commit c327f8e

File tree

25 files changed

+304
-14170
lines changed

25 files changed

+304
-14170
lines changed

pkg/defaultmonitortests/types.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ func newDefaultMonitorTests(info monitortestframework.MonitorTestInitializationI
144144
monitorTestRegistry.AddMonitorTestOrDie(faultyloadbalancer.MonitorName, "kube-apiserver", faultyloadbalancer.NewMonitorTest())
145145
monitorTestRegistry.AddMonitorTestOrDie(staticpodinstall.MonitorName, "kube-apiserver", staticpodinstall.NewStaticPodInstallMonitorTest())
146146
monitorTestRegistry.AddMonitorTestOrDie(containerfailures.MonitorName, "Node / Kubelet", containerfailures.NewContainerFailuresTests())
147+
monitorTestRegistry.AddMonitorTestOrDie(legacytestframeworkmonitortests.PathologicalMonitorName, "Test Framework", legacytestframeworkmonitortests.NewLegacyPathologicalMonitorTests(info))
147148

148149
return monitorTestRegistry
149150
}
@@ -193,7 +194,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio
193194

194195
monitorTestRegistry.AddMonitorTestOrDie("legacy-storage-invariants", "Storage", legacystoragemonitortests.NewLegacyTests())
195196

196-
monitorTestRegistry.AddMonitorTestOrDie("legacy-test-framework-invariants", "Test Framework", legacytestframeworkmonitortests.NewLegacyTests(info))
197+
monitorTestRegistry.AddMonitorTestOrDie(legacytestframeworkmonitortests.AlertsMonitorName, "Test Framework", legacytestframeworkmonitortests.NewLegacyAlertsMonitorTests(info))
197198
monitorTestRegistry.AddMonitorTestOrDie("timeline-serializer", "Test Framework", timelineserializer.NewTimelineSerializer())
198199
monitorTestRegistry.AddMonitorTestOrDie("interval-serializer", "Test Framework", intervalserializer.NewIntervalSerializer())
199200
monitorTestRegistry.AddMonitorTestOrDie("tracked-resources-serializer", "Test Framework", trackedresourcesserializer.NewTrackedResourcesSerializer())

pkg/monitortestlibrary/platformidentification/operator_mapping.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ var (
111111
"monitoring",
112112
"network",
113113
"node-tuning",
114+
"olm",
114115
"openshift-apiserver",
115116
"openshift-controller-manager",
116117
"openshift-samples",
@@ -154,6 +155,7 @@ func init() {
154155
utilruntime.Must(addOperatorMapping("monitoring", "Monitoring"))
155156
utilruntime.Must(addOperatorMapping("network", "Networking"))
156157
utilruntime.Must(addOperatorMapping("node-tuning", "Node Tuning Operator"))
158+
utilruntime.Must(addOperatorMapping("olm", "OLM"))
157159
utilruntime.Must(addOperatorMapping("openshift-apiserver", "openshift-apiserver"))
158160
utilruntime.Must(addOperatorMapping("openshift-controller-manager", "openshift-controller-manager"))
159161
utilruntime.Must(addOperatorMapping("openshift-samples", "Samples"))

pkg/monitortests/cli/adm_upgrade/status/monitortest.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,13 @@ func (w *monitor) CollectData(ctx context.Context, storageDir string, beginning,
162162

163163
// TODO: Maybe utilize Intervals somehow and do tests in ComputeComputedIntervals and EvaluateTestsFromConstructedIntervals
164164

165-
wasUpdated := func() (bool, error) {
165+
updateCount := func() (int, error) {
166166
cv, err := w.configv1client.ConfigV1().ClusterVersions().Get(ctx, "version", metav1.GetOptions{})
167167
if err != nil {
168-
return false, fmt.Errorf("failed to get cluster version: %w", err)
168+
return 0, fmt.Errorf("failed to get cluster version: %w", err)
169169
}
170-
return len(cv.Status.History) > len(w.initialClusterVersion.Status.History), nil
170+
hops := len(cv.Status.History) - len(w.initialClusterVersion.Status.History)
171+
return hops, nil
171172
}
172173

173174
return nil, []*junitapi.JUnitTestCase{
@@ -176,7 +177,7 @@ func (w *monitor) CollectData(ctx context.Context, storageDir string, beginning,
176177
w.controlPlane(),
177178
w.workers(),
178179
w.health(),
179-
w.updateLifecycle(wasUpdated),
180+
w.updateLifecycle(updateCount),
180181
}, nil
181182
}
182183

pkg/monitortests/cli/adm_upgrade/status/updatelifecycle.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,25 @@ import (
88
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
99
)
1010

11-
type wasUpdatedFn func() (bool, error)
11+
// wasUpdatedFn returns how many times was the cluster updated while the test was running
12+
type wasUpdatedFn func() (int, error)
1213

1314
func (w *monitor) updateLifecycle(wasUpdated wasUpdatedFn) *junitapi.JUnitTestCase {
1415
health := &junitapi.JUnitTestCase{
1516
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
1617
}
1718

18-
clusterUpdated, err := wasUpdated()
19+
clusterUpdateCount, err := wasUpdated()
1920
if err != nil {
2021
health.FailureOutput = &junitapi.FailureOutput{
21-
Message: fmt.Sprintf("failed to get cluster version: %v", err),
22+
Message: fmt.Sprintf("failed to determined whether the cluster was updated: %v", err),
23+
}
24+
return health
25+
}
26+
27+
if clusterUpdateCount > 1 {
28+
health.SkipMessage = &junitapi.SkipMessage{
29+
Message: fmt.Sprintf("Cluster updated more than once (%d times)", clusterUpdateCount),
2230
}
2331
return health
2432
}
@@ -95,7 +103,7 @@ func (w *monitor) updateLifecycle(wasUpdated wasUpdatedFn) *junitapi.JUnitTestCa
95103
}
96104
}
97105

98-
if !clusterUpdated {
106+
if clusterUpdateCount == 0 {
99107
// TODO: MCO churn sometimes briefly tricks our code into thinking the cluster is updating, we'll tolerate for
100108
// now but we should try fixing this
101109
// if observed.output.updating || observed.output.controlPlane != nil || observed.output.workers != nil || observed.output.health != nil {

pkg/monitortests/cli/adm_upgrade/status/updatelifecycle_test.go

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
105105
t.Parallel()
106106

107107
testCases := []struct {
108-
name string
109-
snapshots []snapshot
110-
wasUpdated bool
111-
expected *junitapi.JUnitTestCase
108+
name string
109+
snapshots []snapshot
110+
updateCount int
111+
expected *junitapi.JUnitTestCase
112112
}{
113113
{
114114
name: "no snapshots -> test skipped",
@@ -128,7 +128,7 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
128128
{when: time.Now(), out: lifecycle04controlPlaneUpdated},
129129
{when: time.Now(), out: lifecycle05after},
130130
},
131-
wasUpdated: true,
131+
updateCount: 1,
132132
expected: &junitapi.JUnitTestCase{
133133
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
134134
},
@@ -140,7 +140,7 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
140140
{when: time.Now(), out: lifecycle02updating},
141141
{when: time.Now(), out: lifecycle05after},
142142
},
143-
wasUpdated: true,
143+
updateCount: 1,
144144
expected: &junitapi.JUnitTestCase{
145145
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
146146
},
@@ -154,7 +154,7 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
154154
{when: time.Now(), out: lifecycle02updating},
155155
{when: time.Now(), out: lifecycle05after},
156156
},
157-
wasUpdated: true,
157+
updateCount: 1,
158158
expected: &junitapi.JUnitTestCase{
159159
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
160160
FailureOutput: &junitapi.FailureOutput{
@@ -169,19 +169,38 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
169169
{when: time.Now(), out: lifecycle01before},
170170
{when: time.Now(), out: lifecycle01before},
171171
},
172-
wasUpdated: false,
172+
updateCount: 0,
173173
expected: &junitapi.JUnitTestCase{
174174
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
175175
},
176176
},
177+
{
178+
name: "test is skipped when multiple hops were observed",
179+
snapshots: []snapshot{
180+
{when: time.Now(), out: lifecycle01before},
181+
{when: time.Now(), out: lifecycle02updating},
182+
{when: time.Now(), out: lifecycle05after},
183+
{when: time.Now(), out: lifecycle02updating},
184+
{when: time.Now(), out: lifecycle05after},
185+
{when: time.Now(), out: lifecycle02updating},
186+
{when: time.Now(), out: lifecycle05after},
187+
},
188+
updateCount: 3,
189+
expected: &junitapi.JUnitTestCase{
190+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
191+
SkipMessage: &junitapi.SkipMessage{
192+
Message: "Cluster updated more than once (3 times)",
193+
},
194+
},
195+
},
177196
{
178197
name: "update observed when cluster was not updated",
179198
snapshots: []snapshot{
180199
{when: time.Now(), out: lifecycle01before},
181200
{when: time.Now(), out: lifecycle02updating},
182201
{when: time.Now(), out: lifecycle01before},
183202
},
184-
wasUpdated: false,
203+
updateCount: 0,
185204
expected: &junitapi.JUnitTestCase{
186205
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
187206
// TODO: MCO churn sometimes briefly tricks our code into thinking the cluster is updating, we'll tolerate for
@@ -192,14 +211,13 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
192211
},
193212
},
194213
{
195-
196214
name: "completed update goes back to updating",
197215
snapshots: []snapshot{
198216
{when: time.Now(), out: lifecycle03controlPlaneNodesUpdated},
199217
{when: time.Now(), out: lifecycle05after},
200218
{when: time.Now(), out: lifecycle03controlPlaneNodesUpdated},
201219
},
202-
wasUpdated: true,
220+
updateCount: 1,
203221
expected: &junitapi.JUnitTestCase{
204222
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status snapshots reflect the cluster upgrade lifecycle",
205223
// TODO: MCO churn sometimes briefly tricks our code into thinking the cluster is updating, we'll tolerate for
@@ -223,8 +241,8 @@ func TestMonitor_UpdateLifecycle(t *testing.T) {
223241
// Process snapshots into models for the health check to work with
224242
_ = m.expectedLayout()
225243

226-
wasUpdated := func() (bool, error) {
227-
return tc.wasUpdated, nil
244+
wasUpdated := func() (int, error) {
245+
return tc.updateCount, nil
228246
}
229247

230248
result := m.updateLifecycle(wasUpdated)

pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,13 @@ func testUpgradeOperatorStateTransitions(events monitorapi.Intervals, clientConf
401401
if condition.Type == configv1.OperatorDegraded && condition.Status == configv1.ConditionTrue {
402402
return "https://issues.redhat.com/browse/OCPBUGS-39026", nil
403403
}
404+
case "olm":
405+
if condition.Type == configv1.OperatorAvailable &&
406+
condition.Status == configv1.ConditionFalse &&
407+
(condition.Reason == "OperatorcontrollerDeploymentOperatorControllerControllerManager_Deploying" ||
408+
condition.Reason == "CatalogdDeploymentCatalogdControllerManager_Deploying") {
409+
return "https://issues.redhat.com/browse/OCPBUGS-62517", nil
410+
}
404411
case "openshift-apiserver":
405412
if condition.Type == configv1.OperatorAvailable && condition.Status == configv1.ConditionFalse &&
406413
(condition.Reason == "APIServerDeployment_NoDeployment" ||

pkg/monitortests/testframework/legacytestframeworkmonitortests/monitortest.go renamed to pkg/monitortests/testframework/legacytestframeworkmonitortests/alerts_monitortest.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66

77
"github.com/openshift/origin/pkg/monitortestframework"
88

9-
"github.com/openshift/origin/pkg/monitortestlibrary/pathologicaleventlibrary"
109
"github.com/sirupsen/logrus"
1110

1211
"github.com/openshift/origin/pkg/alerts"
@@ -16,37 +15,41 @@ import (
1615
"k8s.io/client-go/rest"
1716
)
1817

19-
type legacyMonitorTests struct {
18+
const (
19+
AlertsMonitorName = "legacy-test-framework-invariants-alerts"
20+
)
21+
22+
type legacyAlertsMonitorTests struct {
2023
adminRESTConfig *rest.Config
2124
duration time.Duration
2225
recordedResources monitorapi.ResourcesMap
2326
clusterStabilityDuringTest *monitortestframework.ClusterStabilityDuringTest
2427
}
2528

26-
func NewLegacyTests(info monitortestframework.MonitorTestInitializationInfo) monitortestframework.MonitorTest {
27-
return &legacyMonitorTests{clusterStabilityDuringTest: &info.ClusterStabilityDuringTest}
29+
func NewLegacyAlertsMonitorTests(info monitortestframework.MonitorTestInitializationInfo) monitortestframework.MonitorTest {
30+
return &legacyAlertsMonitorTests{clusterStabilityDuringTest: &info.ClusterStabilityDuringTest}
2831
}
2932

30-
func (w *legacyMonitorTests) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
33+
func (w *legacyAlertsMonitorTests) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
3134
return nil
3235
}
3336

34-
func (w *legacyMonitorTests) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
37+
func (w *legacyAlertsMonitorTests) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
3538
w.adminRESTConfig = adminRESTConfig
3639
return nil
3740
}
3841

39-
func (w *legacyMonitorTests) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
42+
func (w *legacyAlertsMonitorTests) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
4043
w.duration = end.Sub(beginning)
4144
return nil, nil, nil
4245
}
4346

44-
func (w *legacyMonitorTests) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
47+
func (w *legacyAlertsMonitorTests) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
4548
w.recordedResources = recordedResources
4649
return nil, nil
4750
}
4851

49-
func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
52+
func (w *legacyAlertsMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
5053
jobType, err := platformidentification.GetJobType(context.TODO(), w.adminRESTConfig)
5154
if err != nil {
5255
// JobType will be nil here, but we want test cases to all fail if this is the case, so we rely on them to nil check
@@ -57,22 +60,20 @@ func (w *legacyMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.C
5760

5861
isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{})
5962
if isUpgrade {
60-
junits = append(junits, pathologicaleventlibrary.TestDuplicatedEventForUpgrade(finalIntervals, w.adminRESTConfig)...)
6163
junits = append(junits, testAlerts(finalIntervals, alerts.AllowedAlertsDuringUpgrade, jobType, w.clusterStabilityDuringTest,
6264
w.adminRESTConfig, w.duration, w.recordedResources)...)
6365
} else {
64-
junits = append(junits, pathologicaleventlibrary.TestDuplicatedEventForStableSystem(finalIntervals, w.adminRESTConfig)...)
6566
junits = append(junits, testAlerts(finalIntervals, alerts.AllowedAlertsDuringConformance, jobType, w.clusterStabilityDuringTest,
6667
w.adminRESTConfig, w.duration, w.recordedResources)...)
6768
}
6869

6970
return junits, nil
7071
}
7172

72-
func (*legacyMonitorTests) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
73+
func (*legacyAlertsMonitorTests) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
7374
return nil
7475
}
7576

76-
func (*legacyMonitorTests) Cleanup(ctx context.Context) error {
77+
func (*legacyAlertsMonitorTests) Cleanup(ctx context.Context) error {
7778
return nil
7879
}

pkg/monitortests/testframework/legacytestframeworkmonitortests/pathological_events.go

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package legacytestframeworkmonitortests
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/openshift/origin/pkg/monitortestframework"
8+
9+
"github.com/openshift/origin/pkg/monitor/monitorapi"
10+
"github.com/openshift/origin/pkg/monitortestlibrary/pathologicaleventlibrary"
11+
"github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
12+
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
13+
"k8s.io/client-go/rest"
14+
)
15+
16+
const (
17+
PathologicalMonitorName = "legacy-test-framework-invariants-pathological"
18+
)
19+
20+
type legacyPathologicalMonitorTests struct {
21+
adminRESTConfig *rest.Config
22+
duration time.Duration
23+
}
24+
25+
func NewLegacyPathologicalMonitorTests(info monitortestframework.MonitorTestInitializationInfo) monitortestframework.MonitorTest {
26+
return &legacyPathologicalMonitorTests{}
27+
}
28+
29+
func (w *legacyPathologicalMonitorTests) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
30+
return nil
31+
}
32+
33+
func (w *legacyPathologicalMonitorTests) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
34+
return nil
35+
}
36+
37+
func (w *legacyPathologicalMonitorTests) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
38+
w.duration = end.Sub(beginning)
39+
return nil, nil, nil
40+
}
41+
42+
func (w *legacyPathologicalMonitorTests) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
43+
return nil, nil
44+
}
45+
46+
func (w *legacyPathologicalMonitorTests) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
47+
junits := []*junitapi.JUnitTestCase{}
48+
isUpgrade := platformidentification.DidUpgradeHappenDuringCollection(finalIntervals, time.Time{}, time.Time{})
49+
if isUpgrade {
50+
junits = append(junits, pathologicaleventlibrary.TestDuplicatedEventForUpgrade(finalIntervals, w.adminRESTConfig)...)
51+
} else {
52+
junits = append(junits, pathologicaleventlibrary.TestDuplicatedEventForStableSystem(finalIntervals, w.adminRESTConfig)...)
53+
}
54+
55+
return junits, nil
56+
}
57+
58+
func (*legacyPathologicalMonitorTests) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
59+
return nil
60+
}
61+
62+
func (*legacyPathologicalMonitorTests) Cleanup(ctx context.Context) error {
63+
return nil
64+
}

0 commit comments

Comments
 (0)