Skip to content

Commit dafc0a1

Browse files
authored
Merge pull request #1209 from mtulio/fix-1208-byosg-update
Fix leak managed/owned security group on Service update with BYO SG on CLB
2 parents 69db463 + c7afd03 commit dafc0a1

3 files changed

Lines changed: 556 additions & 25 deletions

File tree

pkg/providers/v1/aws.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2950,19 +2950,9 @@ func (c *Cloud) updateInstanceSecurityGroupsForLoadBalancer(ctx context.Context,
29502950
loadBalancerSecurityGroupID := lbSecurityGroupIDs[0]
29512951

29522952
// Get the actual list of groups that allow ingress from the load-balancer
2953-
actualGroups := make(map[*ec2types.SecurityGroup]bool)
2954-
{
2955-
describeRequest := &ec2.DescribeSecurityGroupsInput{}
2956-
describeRequest.Filters = []ec2types.Filter{
2957-
newEc2Filter("ip-permission.group-id", loadBalancerSecurityGroupID),
2958-
}
2959-
response, err := c.ec2.DescribeSecurityGroups(ctx, describeRequest)
2960-
if err != nil {
2961-
return fmt.Errorf("error querying security groups for ELB: %q", err)
2962-
}
2963-
for _, sg := range response {
2964-
actualGroups[&sg] = c.tagging.hasClusterTag(sg.Tags)
2965-
}
2953+
actualGroups, _, err := c.buildSecurityGroupRuleReferences(ctx, loadBalancerSecurityGroupID)
2954+
if err != nil {
2955+
return fmt.Errorf("error building security group rule references: %w", err)
29662956
}
29672957

29682958
// Open the firewall from the load balancer to the instance

pkg/providers/v1/aws_loadbalancer.go

Lines changed: 125 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"time"
3030

3131
"github.com/aws/aws-sdk-go-v2/aws"
32+
"github.com/aws/aws-sdk-go-v2/service/ec2"
3233
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
3334

3435
elb "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing"
@@ -1252,24 +1253,25 @@ func (c *Cloud) ensureLoadBalancer(ctx context.Context, namespacedName types.Nam
12521253

12531254
{
12541255
// Sync security groups
1255-
expected := sets.New[string](securityGroupIDs...)
1256-
actual := stringSetFromList(loadBalancer.SecurityGroups)
1256+
expected := sets.New(securityGroupIDs...)
1257+
actual := sets.New(loadBalancer.SecurityGroups...)
12571258

12581259
if !expected.Equal(actual) {
12591260
// This call just replaces the security groups, unlike e.g. subnets (!)
1260-
request := &elb.ApplySecurityGroupsToLoadBalancerInput{}
1261-
request.LoadBalancerName = aws.String(loadBalancerName)
1262-
if securityGroupIDs == nil {
1263-
request.SecurityGroups = nil
1264-
} else {
1265-
request.SecurityGroups = securityGroupIDs
1266-
}
1267-
klog.V(2).Info("Applying updated security groups to load balancer")
1268-
_, err := c.elb.ApplySecurityGroupsToLoadBalancer(ctx, request)
1269-
if err != nil {
1261+
klog.V(2).Infof("Applying updated security groups to load balancer %q", loadBalancerName)
1262+
if _, err := c.elb.ApplySecurityGroupsToLoadBalancer(ctx, &elb.ApplySecurityGroupsToLoadBalancerInput{
1263+
LoadBalancerName: aws.String(loadBalancerName),
1264+
SecurityGroups: securityGroupIDs,
1265+
}); err != nil {
12701266
return nil, fmt.Errorf("error applying AWS loadbalancer security groups: %q", err)
12711267
}
12721268
dirty = true
1269+
1270+
// Ensure the replaced security groups are removed from AWS when owned by the controller.
1271+
// Only remove groups that were in `actual` but not in `expected` (the ones being replaced).
1272+
if errs := c.removeOwnedSecurityGroups(ctx, loadBalancerName, actual.Difference(expected).UnsortedList()); len(errs) > 0 {
1273+
return nil, fmt.Errorf("error removing owned security groups: %v", errs)
1274+
}
12731275
}
12741276
}
12751277

@@ -1880,3 +1882,114 @@ func ValidateHealthCheck(s *elbtypes.HealthCheck) error {
18801882

18811883
return nil
18821884
}
1885+
1886+
// buildSecurityGroupRuleReferences finds all security groups that have ingress rules
1887+
// referencing the specified security group ID, and categorizes them based on cluster tagging.
1888+
// This is used to identify dependencies before removing a security group.
1889+
//
1890+
// Parameters:
1891+
// - ctx: The context for the request.
1892+
// - sgID: The ID of the security group to find references for.
1893+
//
1894+
// Returns:
1895+
// - map[*ec2types.SecurityGroup]bool: All security groups with ingress rules referencing sgID, mapped to their cluster tag status (true/false).
1896+
// - map[*ec2types.SecurityGroup]IPPermissionSet: Only cluster-tagged security groups mapped to their ingress rules that reference sgID.
1897+
// - error: An error if the AWS DescribeSecurityGroups API call fails.
1898+
func (c *Cloud) buildSecurityGroupRuleReferences(ctx context.Context, sgID string) (map[*ec2types.SecurityGroup]bool, map[*ec2types.SecurityGroup]IPPermissionSet, error) {
1899+
groupsHasTags := make(map[*ec2types.SecurityGroup]bool)
1900+
groupsLinkedPermissions := make(map[*ec2types.SecurityGroup]IPPermissionSet)
1901+
sgsOut, err := c.ec2.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{
1902+
Filters: []ec2types.Filter{
1903+
newEc2Filter("ip-permission.group-id", sgID),
1904+
},
1905+
})
1906+
if err != nil {
1907+
return groupsHasTags, groupsLinkedPermissions, fmt.Errorf("error querying security groups for ELB: %w", err)
1908+
}
1909+
1910+
for _, sg := range sgsOut {
1911+
groupsHasTags[&sg] = c.tagging.hasClusterTag(sg.Tags)
1912+
1913+
groupsLinkedPermissions[&sg] = NewIPPermissionSet()
1914+
for _, rule := range sg.IpPermissions {
1915+
if rule.UserIdGroupPairs != nil {
1916+
for _, pair := range rule.UserIdGroupPairs {
1917+
if pair.GroupId != nil && aws.ToString(pair.GroupId) == sgID {
1918+
groupsLinkedPermissions[&sg].Insert(rule)
1919+
}
1920+
}
1921+
}
1922+
}
1923+
1924+
}
1925+
return groupsHasTags, groupsLinkedPermissions, nil
1926+
}
1927+
1928+
// removeOwnedSecurityGroups removes the CLB owned/managed security groups from AWS.
1929+
// It revokes ingress rules that reference the security groups to be removed,
1930+
// then deletes the security groups that are owned by the controller.
1931+
// This is used when updating load balancer security groups to clean up orphaned ones.
1932+
//
1933+
// Parameters:
1934+
// - `ctx`: The context for the operation.
1935+
// - `loadBalancerName`: The name of the load balancer (used for logging and deletion operations).
1936+
// - `securityGroups`: The list of security group IDs to process for removal.
1937+
//
1938+
// Returns:
1939+
// - `[]error`: Collection of all errors encountered during the removal process.
1940+
func (c *Cloud) removeOwnedSecurityGroups(ctx context.Context, loadBalancerName string, securityGroups []string) []error {
1941+
allErrs := []error{}
1942+
sgMap := make(map[string]struct{})
1943+
1944+
// Validate each security group reference, building a list to be deleted.
1945+
for _, sg := range securityGroups {
1946+
isOwned, err := c.isOwnedSecurityGroup(ctx, sg)
1947+
if err != nil {
1948+
allErrs = append(allErrs, fmt.Errorf("unable to validate if security group %q is owned by the controller: %w", sg, err))
1949+
continue
1950+
}
1951+
1952+
groupsWithClusterTag, groupsLinkedPermissions, err := c.buildSecurityGroupRuleReferences(ctx, sg)
1953+
if err != nil {
1954+
allErrs = append(allErrs, fmt.Errorf("error building security group rule references for %q: %w", sg, err))
1955+
continue
1956+
}
1957+
1958+
// Revoke ingress rules referencing the security group to be deleted
1959+
// from cluster-tagged security groups, when the referenced security
1960+
// group has no cluster tag, skip the revoke assuming it is user-managed.
1961+
for sgTarget, sgPerms := range groupsLinkedPermissions {
1962+
if !groupsWithClusterTag[sgTarget] {
1963+
klog.Warningf("security group %q has no cluster tag, skipping remove lifecycle after update", sg)
1964+
continue
1965+
}
1966+
1967+
klog.V(2).Infof("revoking security group ingress references of %q from %q", sg, aws.ToString(sgTarget.GroupId))
1968+
if _, err := c.ec2.RevokeSecurityGroupIngress(ctx, &ec2.RevokeSecurityGroupIngressInput{
1969+
GroupId: sgTarget.GroupId,
1970+
IpPermissions: sgPerms.List(),
1971+
}); err != nil {
1972+
allErrs = append(allErrs, fmt.Errorf("error revoking security group ingress rules from %q: %w", aws.ToString(sgTarget.GroupId), err))
1973+
continue
1974+
}
1975+
}
1976+
1977+
// Skip security group removal when the security group is not owned by the controller.
1978+
if !isOwned {
1979+
klog.Warningf("security group %q is not owned by the controller, skipping remove lifecycle after update", sg)
1980+
continue
1981+
}
1982+
1983+
klog.V(2).Infof("making loadbalancer owned security group %q ready for deletion", sg)
1984+
sgMap[sg] = struct{}{}
1985+
}
1986+
if len(sgMap) == 0 {
1987+
return allErrs
1988+
}
1989+
1990+
if err := c.deleteSecurityGroupsWithBackoff(ctx, loadBalancerName, sgMap); err != nil {
1991+
return append(allErrs, fmt.Errorf("error deleting security groups %v: %v", sgMap, err))
1992+
}
1993+
klog.V(2).Infof("deleted %d loadbalancer owned security groups", len(sgMap))
1994+
return nil
1995+
}

0 commit comments

Comments
 (0)