Skip to content

Commit c0c29ed

Browse files
committed
fix(scheduler): remove node from cache when node-selector label is removed
When using --node-selector in volcano scheduler, removing the matching label from a node did not evict the node from the scheduler cache. SyncNode() checked nodeCanAddCache() but only returned nil without calling RemoveNode(), causing the scheduler to continue scheduling pods to nodes that no longer match the selector. Add RemoveNode() call when nodeCanAddCache() returns false, so the node is properly evicted from cache when its labels no longer match the configured node selector. Signed-off-by: brick-pixel <brick-pixel@users.noreply.github.com>
1 parent 2946de8 commit c0c29ed

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

pkg/scheduler/cache/event_handlers.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,11 @@ func (sc *SchedulerCache) SyncNode(nodeName string) error {
621621
}
622622

623623
if !sc.nodeCanAddCache(node) {
624+
if deleteErr := sc.RemoveNode(nodeName); deleteErr != nil {
625+
klog.Errorf("Failed to remove node <%s> from cache after label change: %s", nodeName, deleteErr.Error())
626+
return deleteErr
627+
}
628+
klog.V(3).Infof("Node <%s> no longer matches node selector, removed from cache.", nodeName)
624629
return nil
625630
}
626631
nodeCopy := node.DeepCopy()

pkg/scheduler/cache/event_handlers_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,3 +1573,26 @@ func TestSchedulerCache_SyncHyperNode(t *testing.T) {
15731573
})
15741574
}
15751575
}
1576+
1577+
func TestSchedulerCache_SyncNode_RemoveOnLabelChange(t *testing.T) {
1578+
// Simulate: node was in cache with matching label, then label is removed
1579+
n1WithLabel := util.BuildNode("n1", nil, map[string]string{"train": "true"})
1580+
n1WithoutLabel := util.BuildNode("n1", nil, map[string]string{})
1581+
1582+
sc := NewDefaultMockSchedulerCache("volcano")
1583+
sc.nodeSelectorLabels = map[string]sets.Empty{
1584+
"train:true": {},
1585+
}
1586+
1587+
// Step 1: Add node with matching label to cache
1588+
sc.nodeInformer.Informer().GetIndexer().Add(n1WithLabel)
1589+
err := sc.SyncNode("n1")
1590+
assert.NoError(t, err)
1591+
assert.Contains(t, sc.Nodes, "n1", "node should be in cache after sync with matching label")
1592+
1593+
// Step 2: Update node to remove the label, then sync again
1594+
sc.nodeInformer.Informer().GetIndexer().Update(n1WithoutLabel)
1595+
err = sc.SyncNode("n1")
1596+
assert.NoError(t, err)
1597+
assert.NotContains(t, sc.Nodes, "n1", "node should be removed from cache after label removal")
1598+
}

0 commit comments

Comments
 (0)