Skip to content

Commit 335bd7a

Browse files
committed
Remove any Egress IP from Azure public LB backend
The consensus is to not add egress IP to public load balancer backend pool regardless of the presence of an OutBoundRule. During upgrade this PR let cobtroller removes any egress IP added to public load balancer backend pool previously. Signed-off-by: Arnab Ghosh <[email protected]>
1 parent 9699d33 commit 335bd7a

File tree

7 files changed

+119
-12
lines changed

7 files changed

+119
-12
lines changed

pkg/cloudprovider/aws.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ func (a *AWS) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets.Set[s
222222
return []*NodeEgressIPConfiguration{config}, nil
223223
}
224224

225+
func (a *AWS) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
226+
// We dont add Egress IP to AWS public LB backend; nothing to do
227+
return nil
228+
}
229+
225230
// Unfortunately the AWS API (WaitUntilInstanceRunning) only handles equality
226231
// assertion: so on delete we can't specify and assert that the IP which is
227232
// being removed is completely removed, we are forced to do the inverse, i.e:

pkg/cloudprovider/azure.go

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,11 @@ import (
99
"sync"
1010
"time"
1111

12-
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
13-
"k8s.io/utils/ptr"
14-
1512
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
1613
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
1714
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
1815
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
16+
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
1917
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2018
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute"
2119
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
@@ -26,6 +24,7 @@ import (
2624
"k8s.io/apimachinery/pkg/util/sets"
2725
"k8s.io/klog/v2"
2826
utilnet "k8s.io/utils/net"
27+
"k8s.io/utils/ptr"
2928
)
3029

3130
const (
@@ -51,6 +50,8 @@ type Azure struct {
5150
nodeMapLock sync.Mutex
5251
nodeLockMap map[string]*sync.Mutex
5352
azureWorkloadIdentityEnabled bool
53+
lbBackendPoolSynced map[string]bool
54+
lbBackendPoolSyncedLock sync.Mutex
5455
}
5556

5657
type azureCredentialsConfig struct {
@@ -172,11 +173,11 @@ func (a *Azure) AssignPrivateIP(ip net.IP, node *corev1.Node) error {
172173
defer nodeLock.Unlock()
173174
instance, err := a.getInstance(node)
174175
if err != nil {
175-
return err
176+
return fmt.Errorf("error while retrieving instance details from Azure: %w", err)
176177
}
177178
networkInterfaces, err := a.getNetworkInterfaces(instance)
178179
if err != nil {
179-
return err
180+
return fmt.Errorf("error while retrieving interface details from Azure: %w", err)
180181
}
181182
if networkInterfaces[0].Properties == nil {
182183
return fmt.Errorf("nil network interface properties")
@@ -219,9 +220,18 @@ func (a *Azure) AssignPrivateIP(ip net.IP, node *corev1.Node) error {
219220
"omitting backend address pool when adding secondary IP", ipc)
220221
poller, err := a.createOrUpdate(networkInterface)
221222
if err != nil {
222-
return err
223+
return fmt.Errorf("error while updating network interface: %w", err)
224+
}
225+
if err = a.waitForCompletion(poller); err != nil {
226+
return fmt.Errorf("error while updating network interface: %w", err)
223227
}
224-
return a.waitForCompletion(poller)
228+
// setting lbBackendPoolSynced to true here to make sure that we dont try to
229+
// sync LB backend for any new egress IP later
230+
cacheKey := getIPCacheKey(ip, node)
231+
a.lbBackendPoolSyncedLock.Lock()
232+
a.lbBackendPoolSynced[cacheKey] = true
233+
a.lbBackendPoolSyncedLock.Unlock()
234+
return nil
225235
}
226236

227237
func (a *Azure) ReleasePrivateIP(ip net.IP, node *corev1.Node) error {
@@ -231,11 +241,11 @@ func (a *Azure) ReleasePrivateIP(ip net.IP, node *corev1.Node) error {
231241
defer nodeLock.Unlock()
232242
instance, err := a.getInstance(node)
233243
if err != nil {
234-
return err
244+
return fmt.Errorf("error while retrieving instance details from Azure: %w", err)
235245
}
236246
networkInterfaces, err := a.getNetworkInterfaces(instance)
237247
if err != nil {
238-
return err
248+
return fmt.Errorf("error while retrieving interface details from Azure: %w", err)
239249
}
240250
// Perform the operation against the first interface listed, which will be
241251
// the primary interface (if it's defined as such) or the first one returned
@@ -262,9 +272,16 @@ func (a *Azure) ReleasePrivateIP(ip net.IP, node *corev1.Node) error {
262272
// Send the request
263273
poller, err := a.createOrUpdate(networkInterface)
264274
if err != nil {
265-
return err
275+
return fmt.Errorf("error while updating network interface: %w", err)
276+
}
277+
if err = a.waitForCompletion(poller); err != nil {
278+
return fmt.Errorf("error while updating network interface: %w", err)
266279
}
267-
return a.waitForCompletion(poller)
280+
cacheKey := getIPCacheKey(ip, node)
281+
a.lbBackendPoolSyncedLock.Lock()
282+
delete(a.lbBackendPoolSynced, cacheKey)
283+
a.lbBackendPoolSyncedLock.Unlock()
284+
return nil
268285
}
269286

270287
func (a *Azure) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets.Set[string]) ([]*NodeEgressIPConfiguration, error) {
@@ -302,6 +319,62 @@ func (a *Azure) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets.Set
302319
return []*NodeEgressIPConfiguration{config}, nil
303320
}
304321

322+
// The consensus is to not add egress IP to public load balancer
323+
// backend pool regardless of the presence of an OutBoundRule.
324+
// During upgrade this function removes any egress IP added to
325+
// public load balancer backend pool previously.
326+
func (a *Azure) SyncLBBackend(ip net.IP, node *corev1.Node) error {
327+
ipc := ip.String()
328+
cacheKey := getIPCacheKey(ip, node)
329+
a.lbBackendPoolSyncedLock.Lock()
330+
defer a.lbBackendPoolSyncedLock.Unlock()
331+
if synced, ok := a.lbBackendPoolSynced[cacheKey]; ok && synced {
332+
// nothing to do. Return immediately if LB backend has already synced
333+
return nil
334+
}
335+
klog.Infof("Acquiring node lock for modifying load balancer backend pool, node: %s, ip: %s", node.Name, ipc)
336+
nodeLock := a.getNodeLock(node.Name)
337+
nodeLock.Lock()
338+
defer nodeLock.Unlock()
339+
instance, err := a.getInstance(node)
340+
if err != nil {
341+
return fmt.Errorf("error while retrieving instance details from Azure: %w", err)
342+
}
343+
networkInterfaces, err := a.getNetworkInterfaces(instance)
344+
if err != nil {
345+
return fmt.Errorf("error while retrieving interface details from Azure: %w", err)
346+
}
347+
if networkInterfaces[0].Properties == nil {
348+
return fmt.Errorf("nil network interface properties")
349+
}
350+
// Perform the operation against the first interface listed, which will be
351+
// the primary interface (if it's defined as such) or the first one returned
352+
// following the order Azure specifies.
353+
networkInterface := networkInterfaces[0]
354+
var loadBalancerBackendPoolModified bool
355+
// omit Egress IP from LB backend pool
356+
ipConfigurations := networkInterface.Properties.IPConfigurations
357+
for _, ipCfg := range ipConfigurations {
358+
if ptr.Deref(ipCfg.Properties.PrivateIPAddress, "") == ipc &&
359+
ipCfg.Properties.LoadBalancerBackendAddressPools != nil {
360+
ipCfg.Properties.LoadBalancerBackendAddressPools = nil
361+
loadBalancerBackendPoolModified = true
362+
}
363+
}
364+
if loadBalancerBackendPoolModified {
365+
networkInterface.Properties.IPConfigurations = ipConfigurations
366+
poller, err := a.createOrUpdate(networkInterface)
367+
if err != nil {
368+
return fmt.Errorf("error while updating network interface: %w", err)
369+
}
370+
if err = a.waitForCompletion(poller); err != nil {
371+
return fmt.Errorf("error while updating network interface: %w", err)
372+
}
373+
a.lbBackendPoolSynced[cacheKey] = true
374+
}
375+
return nil
376+
}
377+
305378
func (a *Azure) createOrUpdate(networkInterface armnetwork.Interface) (*runtime.Poller[armnetwork.InterfacesClientCreateOrUpdateResponse], error) {
306379
ctx, cancel := context.WithTimeout(a.ctx, defaultAzureOperationTimeout)
307380
defer cancel()
@@ -604,3 +677,7 @@ func ParseCloudEnvironment(env azureapi.Environment) cloud.Configuration {
604677
}
605678
return cloudConfig
606679
}
680+
681+
func getIPCacheKey(ip net.IP, node *corev1.Node) string {
682+
return fmt.Sprintf("%s|%s", node.Name, ip.String())
683+
}

pkg/cloudprovider/cloudprovider.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ type CloudProviderIntf interface {
6767
// CleanupNode removes any internal state associated with the node.
6868
// This should be called when a node is deleted.
6969
CleanupNode(nodeName string)
70+
71+
// SyncLBBackend removes any egress IP which is already added to backend pool of
72+
// a public load balancer. This is mostly Azure specific and may be removed later.
73+
SyncLBBackend(ip net.IP, node *corev1.Node) error
7074
}
7175

7276
// CloudProviderWithMoveIntf is additional interface that can be added to cloud
@@ -161,6 +165,7 @@ func NewCloudProviderClient(cfg CloudProviderConfig, platformStatus *configv1.Pl
161165
platformStatus: azurePlatformStatus,
162166
nodeLockMap: make(map[string]*sync.Mutex),
163167
azureWorkloadIdentityEnabled: featureGates.Enabled(apifeatures.FeatureGateAzureWorkloadIdentity),
168+
lbBackendPoolSynced: make(map[string]bool),
164169
}
165170
case PlatformTypeAWS:
166171
cloudProviderIntf = &AWS{

pkg/cloudprovider/cloudprovider_fake.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,7 @@ func (f *FakeCloudProvider) GetNodeEgressIPConfiguration(node *corev1.Node, cpic
7272

7373
func (f *FakeCloudProvider) CleanupNode(nodeName string) {
7474
}
75+
76+
func (f *FakeCloudProvider) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
77+
return nil
78+
}

pkg/cloudprovider/gcp.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,11 @@ func (g *GCP) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets.Set[s
201201
return nil, nil
202202
}
203203

204+
func (g *GCP) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
205+
// We dont add Egress IP to GCP public LB backend; nothing to do
206+
return nil
207+
}
208+
204209
// The GCP zone operations API call. All GCP infrastructure modifications are
205210
// assigned a unique operation ID and are queued in a global/zone operations
206211
// queue. In the case of assignments of private IP addresses to instances, the

pkg/cloudprovider/openstack.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,11 @@ func (o *OpenStack) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets
545545
return nil, fmt.Errorf("no suitable interface configurations found")
546546
}
547547

548+
func (o *OpenStack) SyncLBBackend(_ net.IP, _ *corev1.Node) error {
549+
// We dont add Egress IP to OpenStack public LB backend; nothing to do
550+
return nil
551+
}
552+
548553
// getNeutronPortNodeEgressIPConfiguration renders the NeutronPortNodeEgressIPConfiguration for a given port.
549554
// The interface is keyed by a neutron UUID.
550555
// If multiple IPv4 repectively multiple IPv6 subnets are attached to the same port, throw an error.

pkg/controller/cloudprivateipconfig/cloudprivateipconfig_controller.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,14 @@ func (c *CloudPrivateIPConfigController) SyncHandler(key string) error {
180180

181181
nodeNameToAdd, nodeNameToDel := c.computeOp(cloudPrivateIPConfig)
182182
switch {
183-
// Dequeue on NOOP, there's nothing to do
184183
case nodeNameToAdd == "" && nodeNameToDel == "":
184+
node, err := c.nodesLister.Get(cloudPrivateIPConfig.Spec.Node)
185+
if err != nil {
186+
return err
187+
}
188+
if err := c.cloudProviderClient.SyncLBBackend(ip, node); err != nil {
189+
return fmt.Errorf("error while syncing egress IP %q added to LB backend pool: %w", key, err)
190+
}
185191
return nil
186192
case nodeNameToAdd != "" && nodeNameToDel != "":
187193
klog.Infof("CloudPrivateIPConfig: %q will be moved from node %q to node %q", key, nodeNameToDel, nodeNameToAdd)

0 commit comments

Comments
 (0)