Skip to content

Commit dac9638

Browse files
committed
Remove any Egress IP from Azure public LB backend
The consensus is to not add egress IP to public load balancer backend pool regardless of the presence of an OutBoundRule. During upgrade this PR let cobtroller removes any egress IP added to public load balancer backend pool previously. Signed-off-by: Arnab Ghosh <[email protected]>
1 parent 9699d33 commit dac9638

File tree

8 files changed

+198
-24
lines changed

8 files changed

+198
-24
lines changed

cmd/cloud-network-config-controller/main.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
kubeinformers "k8s.io/client-go/informers"
2828
"k8s.io/client-go/kubernetes"
2929
"k8s.io/client-go/rest"
30+
"k8s.io/client-go/tools/cache"
3031
"k8s.io/client-go/tools/clientcmd"
3132
"k8s.io/client-go/tools/leaderelection"
3233
"k8s.io/client-go/tools/leaderelection/resourcelock"
@@ -139,14 +140,29 @@ func main() {
139140
klog.Exitf("Error building cloudnetwork clientset: %s", err.Error())
140141
}
141142

142-
cloudProviderClient, err := cloudprovider.NewCloudProviderClient(platformCfg, platformStatus, featureGates)
143+
kubeInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(kubeClient, time.Minute*2, kubeinformers.WithNamespace(controllerNamespace))
144+
cloudNetworkInformerFactory := cloudnetworkinformers.NewSharedInformerFactory(cloudNetworkClient, time.Minute*2)
145+
cloudPrivateIPConfigLister := cloudNetworkInformerFactory.Cloud().V1().CloudPrivateIPConfigs().Lister()
146+
nodeLister := kubeInformerFactory.Core().V1().Nodes().Lister()
147+
cloudNetworkInformerFactory.Start(stopCh)
148+
kubeInformerFactory.Start(stopCh)
149+
if ok := cache.WaitForCacheSync(stopCh,
150+
cloudNetworkInformerFactory.Cloud().V1().CloudPrivateIPConfigs().Informer().HasSynced,
151+
kubeInformerFactory.Core().V1().Nodes().Informer().HasSynced,
152+
); !ok {
153+
klog.Fatal("Timed out waiting for informer caches to sync")
154+
}
155+
156+
cloudProviderClient, err := cloudprovider.NewCloudProviderClient(platformCfg,
157+
platformStatus,
158+
featureGates,
159+
cloudPrivateIPConfigLister,
160+
nodeLister,
161+
)
143162
if err != nil {
144163
klog.Fatalf("Error building cloud provider client, err: %v", err)
145164
}
146165

147-
kubeInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(kubeClient, time.Minute*2, kubeinformers.WithNamespace(controllerNamespace))
148-
cloudNetworkInformerFactory := cloudnetworkinformers.NewSharedInformerFactory(cloudNetworkClient, time.Minute*2)
149-
150166
cloudPrivateIPConfigController, err := cloudprivateipconfigcontroller.NewCloudPrivateIPConfigController(
151167
ctx,
152168
cloudProviderClient,
@@ -179,9 +195,6 @@ func main() {
179195
klog.Fatalf("Error getting secret controller, err: %v", err)
180196
}
181197

182-
cloudNetworkInformerFactory.Start(stopCh)
183-
kubeInformerFactory.Start(stopCh)
184-
185198
wg.Add(1)
186199
go func() {
187200
defer wg.Done()

pkg/cloudprovider/aws.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,14 @@ type AWS struct {
3333
client *ec2.EC2
3434
}
3535

36+
func (a *AWS) init() error {
37+
if err := a.initCredentials(); err != nil {
38+
return err
39+
}
40+
41+
return nil
42+
}
43+
3644
func (a *AWS) initCredentials() error {
3745
f, err := sharedCredentialsFileFromDirectory(a.cfg.CredentialDir)
3846
if err != nil {

pkg/cloudprovider/azure.go

Lines changed: 137 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,27 @@ import (
99
"sync"
1010
"time"
1111

12-
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
13-
"k8s.io/utils/ptr"
14-
1512
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
1613
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
1714
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
1815
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
16+
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
1917
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2018
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute"
2119
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
2220
azureapi "github.com/Azure/go-autorest/autorest/azure"
2321
"github.com/Azure/msi-dataplane/pkg/dataplane"
2422
configv1 "github.com/openshift/api/config/v1"
23+
cloudnetworklisters "github.com/openshift/client-go/cloudnetwork/listers/cloudnetwork/v1"
24+
"github.com/openshift/cloud-network-config-controller/pkg/cloudprivateipconfig"
2525
corev1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
27+
"k8s.io/apimachinery/pkg/labels"
2628
"k8s.io/apimachinery/pkg/util/sets"
29+
corelisters "k8s.io/client-go/listers/core/v1"
2730
"k8s.io/klog/v2"
2831
utilnet "k8s.io/utils/net"
32+
"k8s.io/utils/ptr"
2933
)
3034

3135
const (
@@ -51,6 +55,10 @@ type Azure struct {
5155
nodeMapLock sync.Mutex
5256
nodeLockMap map[string]*sync.Mutex
5357
azureWorkloadIdentityEnabled bool
58+
cloudPrivateIPConfigLister cloudnetworklisters.CloudPrivateIPConfigLister
59+
nodeLister corelisters.NodeLister
60+
lbBackendPoolSynced map[string]bool
61+
lbBackendPoolSyncedLock sync.Mutex
5462
}
5563

5664
type azureCredentialsConfig struct {
@@ -113,6 +121,20 @@ func (a *Azure) readAzureCredentialsConfig() (*azureCredentialsConfig, error) {
113121

114122
return &cfg, nil
115123
}
124+
125+
func (a *Azure) init() error {
126+
if err := a.initCredentials(); err != nil {
127+
return err
128+
}
129+
130+
// This is mostly Azure specific and may be removed later.
131+
if err := a.SyncLBBackend(); err != nil {
132+
return err
133+
}
134+
135+
return nil
136+
}
137+
116138
func (a *Azure) initCredentials() error {
117139
cfg, err := a.readAzureCredentialsConfig()
118140
if err != nil {
@@ -172,11 +194,11 @@ func (a *Azure) AssignPrivateIP(ip net.IP, node *corev1.Node) error {
172194
defer nodeLock.Unlock()
173195
instance, err := a.getInstance(node)
174196
if err != nil {
175-
return err
197+
return fmt.Errorf("error while retrieving instance details from Azure: %w", err)
176198
}
177199
networkInterfaces, err := a.getNetworkInterfaces(instance)
178200
if err != nil {
179-
return err
201+
return fmt.Errorf("error while retrieving interface details from Azure: %w", err)
180202
}
181203
if networkInterfaces[0].Properties == nil {
182204
return fmt.Errorf("nil network interface properties")
@@ -219,9 +241,18 @@ func (a *Azure) AssignPrivateIP(ip net.IP, node *corev1.Node) error {
219241
"omitting backend address pool when adding secondary IP", ipc)
220242
poller, err := a.createOrUpdate(networkInterface)
221243
if err != nil {
222-
return err
244+
return fmt.Errorf("error while updating network interface: %w", err)
245+
}
246+
if err = a.waitForCompletion(poller); err != nil {
247+
return fmt.Errorf("error while updating network interface: %w", err)
223248
}
224-
return a.waitForCompletion(poller)
249+
// setting lbBackendPoolSynced to true here to make sure that we dont try to
250+
// sync LB backend for any new egress IP later
251+
cacheKey := getIPCacheKey(ip, node.Name)
252+
a.lbBackendPoolSyncedLock.Lock()
253+
a.lbBackendPoolSynced[cacheKey] = true
254+
a.lbBackendPoolSyncedLock.Unlock()
255+
return nil
225256
}
226257

227258
func (a *Azure) ReleasePrivateIP(ip net.IP, node *corev1.Node) error {
@@ -231,11 +262,11 @@ func (a *Azure) ReleasePrivateIP(ip net.IP, node *corev1.Node) error {
231262
defer nodeLock.Unlock()
232263
instance, err := a.getInstance(node)
233264
if err != nil {
234-
return err
265+
return fmt.Errorf("error while retrieving instance details from Azure: %w", err)
235266
}
236267
networkInterfaces, err := a.getNetworkInterfaces(instance)
237268
if err != nil {
238-
return err
269+
return fmt.Errorf("error while retrieving interface details from Azure: %w", err)
239270
}
240271
// Perform the operation against the first interface listed, which will be
241272
// the primary interface (if it's defined as such) or the first one returned
@@ -262,9 +293,16 @@ func (a *Azure) ReleasePrivateIP(ip net.IP, node *corev1.Node) error {
262293
// Send the request
263294
poller, err := a.createOrUpdate(networkInterface)
264295
if err != nil {
265-
return err
296+
return fmt.Errorf("error while updating network interface: %w", err)
297+
}
298+
if err = a.waitForCompletion(poller); err != nil {
299+
return fmt.Errorf("error while updating network interface: %w", err)
266300
}
267-
return a.waitForCompletion(poller)
301+
cacheKey := getIPCacheKey(ip, node.Name)
302+
a.lbBackendPoolSyncedLock.Lock()
303+
delete(a.lbBackendPoolSynced, cacheKey)
304+
a.lbBackendPoolSyncedLock.Unlock()
305+
return nil
268306
}
269307

270308
func (a *Azure) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets.Set[string]) ([]*NodeEgressIPConfiguration, error) {
@@ -302,6 +340,90 @@ func (a *Azure) GetNodeEgressIPConfiguration(node *corev1.Node, cpicIPs sets.Set
302340
return []*NodeEgressIPConfiguration{config}, nil
303341
}
304342

343+
// The consensus is to not add egress IP to public load balancer
344+
// backend pool regardless of the presence of an OutBoundRule.
345+
// During upgrade this function removes any egress IP added to
346+
// public load balancer backend pool previously.
347+
func (a *Azure) SyncLBBackend() error {
348+
cloudPrivateIPConfigs, err := a.cloudPrivateIPConfigLister.List(labels.Everything())
349+
if err != nil {
350+
return fmt.Errorf("error listing cloud private ip config, err: %v", err)
351+
}
352+
for _, cloudPrivateIPConfig := range cloudPrivateIPConfigs {
353+
ip, _, err := cloudprivateipconfig.NameToIP(cloudPrivateIPConfig.Name)
354+
if err != nil {
355+
return fmt.Errorf("error parsing CloudPrivateIPConfig %s: %v", cloudPrivateIPConfig.Name, err)
356+
}
357+
cacheKey := getIPCacheKey(ip, cloudPrivateIPConfig.Spec.Node)
358+
a.lbBackendPoolSyncedLock.Lock()
359+
if synced, ok := a.lbBackendPoolSynced[cacheKey]; ok && synced {
360+
// nothing to do. Continue if LB backend has already synced
361+
a.lbBackendPoolSyncedLock.Unlock()
362+
continue
363+
}
364+
a.lbBackendPoolSyncedLock.Unlock()
365+
ipc := ip.String()
366+
node, err := a.nodeLister.Get(cloudPrivateIPConfig.Spec.Node)
367+
if err != nil && apierrors.IsNotFound(err) {
368+
klog.Warningf("source node: %s no longer exists for CloudPrivateIPConfig: %q",
369+
cloudPrivateIPConfig.Spec.Node, cloudPrivateIPConfig.Name)
370+
continue
371+
} else if err != nil {
372+
return fmt.Errorf("error getting node %s for CloudPrivateIPConfig %q: %w",
373+
cloudPrivateIPConfig.Spec.Node, cloudPrivateIPConfig.Name, err)
374+
}
375+
376+
klog.Infof("Acquiring node lock for modifying load balancer backend pool, node: %s, ip: %s", node.Name, ipc)
377+
nodeLock := a.getNodeLock(node.Name)
378+
if err := func() error {
379+
nodeLock.Lock()
380+
defer nodeLock.Unlock()
381+
instance, err := a.getInstance(node)
382+
if err != nil {
383+
return fmt.Errorf("error while retrieving instance details from Azure: %w", err)
384+
}
385+
networkInterfaces, err := a.getNetworkInterfaces(instance)
386+
if err != nil {
387+
return fmt.Errorf("error while retrieving interface details from Azure: %w", err)
388+
}
389+
if networkInterfaces[0].Properties == nil {
390+
return fmt.Errorf("nil network interface properties")
391+
}
392+
// Perform the operation against the first interface listed, which will be
393+
// the primary interface (if it's defined as such) or the first one returned
394+
// following the order Azure specifies.
395+
networkInterface := networkInterfaces[0]
396+
var loadBalancerBackendPoolModified bool
397+
// omit Egress IP from LB backend pool
398+
ipConfigurations := networkInterface.Properties.IPConfigurations
399+
for _, ipCfg := range ipConfigurations {
400+
if ptr.Deref(ipCfg.Properties.PrivateIPAddress, "") == ipc &&
401+
ipCfg.Properties.LoadBalancerBackendAddressPools != nil {
402+
ipCfg.Properties.LoadBalancerBackendAddressPools = nil
403+
loadBalancerBackendPoolModified = true
404+
}
405+
}
406+
if loadBalancerBackendPoolModified {
407+
networkInterface.Properties.IPConfigurations = ipConfigurations
408+
poller, err := a.createOrUpdate(networkInterface)
409+
if err != nil {
410+
return fmt.Errorf("error while updating network interface: %w", err)
411+
}
412+
if err = a.waitForCompletion(poller); err != nil {
413+
return fmt.Errorf("error while updating network interface: %w", err)
414+
}
415+
a.lbBackendPoolSyncedLock.Lock()
416+
a.lbBackendPoolSynced[cacheKey] = true
417+
a.lbBackendPoolSyncedLock.Unlock()
418+
}
419+
return nil
420+
}(); err != nil {
421+
return err
422+
}
423+
}
424+
return nil
425+
}
426+
305427
func (a *Azure) createOrUpdate(networkInterface armnetwork.Interface) (*runtime.Poller[armnetwork.InterfacesClientCreateOrUpdateResponse], error) {
306428
ctx, cancel := context.WithTimeout(a.ctx, defaultAzureOperationTimeout)
307429
defer cancel()
@@ -604,3 +726,7 @@ func ParseCloudEnvironment(env azureapi.Environment) cloud.Configuration {
604726
}
605727
return cloudConfig
606728
}
729+
730+
func getIPCacheKey(ip net.IP, node string) string {
731+
return fmt.Sprintf("%s|%s", node, ip.String())
732+
}

pkg/cloudprovider/cloudprovider.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import (
1111

1212
configv1 "github.com/openshift/api/config/v1"
1313
apifeatures "github.com/openshift/api/features"
14+
cloudnetworklisters "github.com/openshift/client-go/cloudnetwork/listers/cloudnetwork/v1"
1415
"github.com/openshift/library-go/pkg/operator/configobserver/featuregates"
16+
corelisters "k8s.io/client-go/listers/core/v1"
1517

1618
corev1 "k8s.io/api/core/v1"
1719
"k8s.io/apimachinery/pkg/util/sets"
@@ -31,14 +33,16 @@ func UnexpectedURIError(uri string) error {
3133
}
3234

3335
type CloudProviderIntf interface {
34-
// initCredentials initializes the cloud API credentials by reading the
36+
// init function initializes the cloud API credentials by reading the
3537
// secret data which has been mounted in cloudProviderSecretLocation. The
3638
// mounted secret data in Kubernetes is generated following a one-to-one
3739
// mapping between each .data field and a corresponding file. Hence
3840
// .data.foo will generate a file foo in that location with the decoded
3941
// secret data, similarity we would have a file bar if .data.bar was
4042
// defined.
41-
initCredentials() error
43+
// For Azure platform it additionally removes any egress IP which is
44+
// already added to backend pool of a public load balancer.
45+
init() error
4246

4347
// AssignPrivateIP attempts to assigning the IP address provided to the VM
4448
// instance corresponding to the corev1.Node provided on the cloud the
@@ -134,7 +138,11 @@ func (n *NodeEgressIPConfiguration) String() string {
134138
return fmt.Sprintf("%v", *n)
135139
}
136140

137-
func NewCloudProviderClient(cfg CloudProviderConfig, platformStatus *configv1.PlatformStatus, featureGates featuregates.FeatureGate) (CloudProviderIntf, error) {
141+
func NewCloudProviderClient(cfg CloudProviderConfig,
142+
platformStatus *configv1.PlatformStatus,
143+
featureGates featuregates.FeatureGate,
144+
cloudPrivateIPConfigLister cloudnetworklisters.CloudPrivateIPConfigLister,
145+
nodeLister corelisters.NodeLister) (CloudProviderIntf, error) {
138146
var cloudProviderIntf CloudProviderIntf
139147

140148
// Initialize a separate context from the main context, rationale: cloud
@@ -161,6 +169,9 @@ func NewCloudProviderClient(cfg CloudProviderConfig, platformStatus *configv1.Pl
161169
platformStatus: azurePlatformStatus,
162170
nodeLockMap: make(map[string]*sync.Mutex),
163171
azureWorkloadIdentityEnabled: featureGates.Enabled(apifeatures.FeatureGateAzureWorkloadIdentity),
172+
lbBackendPoolSynced: make(map[string]bool),
173+
cloudPrivateIPConfigLister: cloudPrivateIPConfigLister,
174+
nodeLister: nodeLister,
164175
}
165176
case PlatformTypeAWS:
166177
cloudProviderIntf = &AWS{
@@ -178,7 +189,7 @@ func NewCloudProviderClient(cfg CloudProviderConfig, platformStatus *configv1.Pl
178189
default:
179190
return nil, fmt.Errorf("unsupported cloud provider platform type: %s", cfg.PlatformType)
180191
}
181-
return cloudProviderIntf, cloudProviderIntf.initCredentials()
192+
return cloudProviderIntf, cloudProviderIntf.init()
182193
}
183194

184195
func (c *CloudProvider) readSecretData(secret string) (string, error) {

pkg/cloudprovider/cloudprovider_fake.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func NewFakeCloudProvider(mockErrorOnAssign, mockErrorOnAssignWithExistingIPCond
3030
}
3131
}
3232

33-
func (f *FakeCloudProvider) initCredentials() error {
33+
func (f *FakeCloudProvider) init() error {
3434
return nil
3535
}
3636

pkg/cloudprovider/gcp.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ type GCP struct {
3636
nodeLockMap map[string]*sync.Mutex
3737
}
3838

39+
func (g *GCP) init() error {
40+
if err := g.initCredentials(); err != nil {
41+
return err
42+
}
43+
44+
return nil
45+
}
46+
3947
func (g *GCP) initCredentials() (err error) {
4048
secret, err := g.readSecretData("service_account.json")
4149
if err != nil {

pkg/cloudprovider/openstack.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ type OpenStack struct {
6767

6868
var novaDeviceOwnerRegex = regexp.MustCompile("^compute:.*")
6969

70+
func (o *OpenStack) init() error {
71+
if err := o.initCredentials(); err != nil {
72+
return err
73+
}
74+
75+
return nil
76+
}
77+
7078
// initCredentials initializes the cloud API credentials by reading the
7179
// secret data which has been mounted in cloudProviderSecretLocation. The
7280
// mounted secret data in Kubernetes is generated following a one-to-one

0 commit comments

Comments
 (0)