From 8cf2985bd95de8be2de84e163bfa86c1843783da Mon Sep 17 00:00:00 2001 From: Lance Tuller Date: Fri, 19 Jun 2026 14:42:23 -0400 Subject: [PATCH] feat(evpn): add managed vrf l3vxlan status substrate --- CHANGELOG.md | 10 + README.md | 7 +- ROADMAP.md | 19 +- crates/api/src/evpn_service.rs | 195 ++++- crates/cli/src/commands/evpn.rs | 88 ++ crates/cli/src/test_support.rs | 4 + crates/evpn-linux/src/in_memory.rs | 3 + crates/evpn-linux/src/linux/links.rs | 272 ++++-- crates/evpn-linux/src/linux/mod.rs | 7 + crates/evpn-linux/src/linux/notify.rs | 1 + crates/evpn-linux/src/linux/probe.rs | 1 + crates/evpn-linux/src/reconcile.rs | 825 ++++++++++++++++-- crates/evpn-linux/src/snapshot.rs | 45 + crates/evpn-linux/tests/reconcile_actor.rs | 3 + crates/evpn/src/lib.rs | 8 +- crates/evpn/src/managed_netdev.rs | 183 +++- docs/API.md | 16 +- docs/CONFIGURATION.md | 89 +- .../adr/0054-evpn-linux-dataplane-boundary.md | 7 +- ...an-aware-bridge-managed-netdev-boundary.md | 7 +- docs/adr/0091-evpn-managed-netdev-creation.md | 35 +- docs/evpn-enablement.md | 11 +- docs/evpn-vtep-setup.md | 4 +- docs/grpc-method-inventory.md | 2 +- docs/reload-matrix.md | 2 +- proto/rustbgpd.proto | 6 + src/config/mod.rs | 44 +- src/config/schema.rs | 50 +- src/config/tests.rs | 83 +- src/config/validation.rs | 187 +++- 30 files changed, 1936 insertions(+), 278 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 950d672b..3b602d03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,16 @@ This project follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html). instance probe transitions from NotReady to Ready only after both links are owned-safe. SVD / collect-metadata VXLAN plus VRF/L3VXLAN lifecycle stay deferred. +- **ADR-0091 managed VRF/L3VXLAN schema and status substrate.** + `[managed_netdevs]` now accepts `[[managed_netdevs.vrfs]]` and + `[[managed_netdevs.l3vxlans]]` rows, derives + `rustbgpd:vrf::` and `rustbgpd:l3vxlan::` + ownership stamps, validates protected VRF/L3VXLAN identity attributes, + parses observed VRF/L3VXLAN link state from Linux link dumps, and exposes + desired/observed/orphan/foreign/unsafe status through + `EvpnService.ListManagedNetdevs` and `rbgp evpn managed-netdevs`. VRF and + L3VXLAN create/adopt/reap lifecycle remains deferred to the next managed + netdev slice. ### Changed diff --git a/README.md b/README.md index 3b8de6d6..aebb8b49 100644 --- a/README.md +++ b/README.md @@ -361,8 +361,9 @@ See [docs/INTEROP.md](docs/INTEROP.md) for full procedures and results. topologies, including SVD / collect-metadata VXLAN; opt-in bridge and fixed-VNI VXLAN netdev creation now ship under [ADR-0091](docs/adr/0091-evpn-managed-netdev-creation.md) - (`[managed_netdevs]`). SVD / collect-metadata VXLAN and VRF netdev - creation remain operator-provisioned per + (`[managed_netdevs]`), and VRF/L3VXLAN rows now have schema validation, + ownership stamps, and `ListManagedNetdevs` status. SVD / collect-metadata + VXLAN and VRF/L3VXLAN lifecycle creation remain operator-provisioned per [ADR-0088](docs/adr/0088-evpn-vlan-aware-bridge-managed-netdev-boundary.md); [ADR-0089](docs/adr/0089-evpn-vni-per-bd-vlan-aware-bridge-support.md) scopes the first VLAN-aware bridge support to VNI-per-broadcast-domain @@ -412,7 +413,7 @@ evolving API.** | **Runtime** | Rust 1.95+ (workspace MSRV — set by the bundled SQLite build), single binary, no external dependencies except optional RPKI/BMP/MRT backends | | **Config stability** | TOML format may change between minor versions; migrations documented in CHANGELOG | | **API stability** | gRPC proto may add fields/RPCs; breaking changes documented in CHANGELOG | -| **Not yet supported** | EVPN runtime L3VNI/device/table IP-VRF identity changes (restart-required by design) and ES/IP-VRF row mixed edits outside the L2VNI-only composer, true RFC VLAN-aware bundle / non-zero Ethernet Tag service, rustbgpd-managed SVD / collect-metadata VXLAN and VRF / L3VXLAN netdev creation (managed bridge and fixed-VNI VXLAN creation now ship), EVPN route types 6-11 / PBB / MVPN / MPLS/SRv6 service encapsulation, VPNv4/v6, labeled-unicast, Route Target Constraints, BGP-LS, Confederation, TCP-AO dynamic-neighbor / runtime-rotation / multi-key rollover | +| **Not yet supported** | EVPN runtime L3VNI/device/table IP-VRF identity changes (restart-required by design) and ES/IP-VRF row mixed edits outside the L2VNI-only composer, true RFC VLAN-aware bundle / non-zero Ethernet Tag service, rustbgpd-managed SVD / collect-metadata VXLAN and VRF / L3VXLAN lifecycle creation (managed bridge and fixed-VNI VXLAN lifecycle now ship; VRF/L3VXLAN schema/status now ship), EVPN route types 6-11 / PBB / MVPN / MPLS/SRv6 service encapsulation, VPNv4/v6, labeled-unicast, Route Target Constraints, BGP-LS, Confederation, TCP-AO dynamic-neighbor / runtime-rotation / multi-key rollover | | **Tests** | Workspace test suite, fuzz targets, an automated interop suite (see `docs/INTEROP.md`) primarily against FRR plus GoBGP / StayRTR / documented BIRD coverage, and an in-tree EVPN load generator (foundation tier gated on every PR; privileged kernel dataplane smokes run on GitHub-hosted CI) | ## Documentation diff --git a/ROADMAP.md b/ROADMAP.md index 10af4a55..9cf97557 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -218,8 +218,14 @@ has it, no broad performance sprints without profile evidence. vnifilter mode the fixed-VNI lifecycle never creates; `managed_ready` proves that a rustbgpd-created bridge plus rustbgpd-created fixed-VNI VXLAN make the real EVPN L2 instance probe Ready only after both - links are owned-safe; SVD / collect-metadata VXLAN and VRF/L3VXLAN classes - still deferred. The `svd_fdb_vni` netns proof + links are owned-safe. **ADR-0091 VRF/L3VXLAN schema/status substrate + landed:** `[managed_netdevs]` accepts VRF and L3VXLAN desired rows, derives + `rustbgpd:vrf::` and `rustbgpd:l3vxlan::` stamps, + parses observed VRF/L3VXLAN protected attributes from Linux link dumps, and + reports desired/observed/orphan/foreign/unsafe state through + `ListManagedNetdevs` / `rbgp`; VRF/L3VXLAN create/adopt/reap lifecycle + remains deferred. SVD / collect-metadata VXLAN lifecycle is still deferred. + The `svd_fdb_vni` netns proof covers Ready + add + same-MAC two-VNI isolation + scoped delete on a real kernel; sparse `NDA_VLAN` / `NDA_DST` echoes are handled by configured-VLAN inference plus owned-state convergence. Service-provider EVPN breadth @@ -365,9 +371,12 @@ has it, no broad performance sprints without profile evidence. `desired-absent`, `foreign-present`, `owned-unsafe`, `owned-safe`, `orphaned`, or `unknown`; the dataplane actor creates missing managed bridges and fixed-VNI VXLANs, adopts exact stamped links after restart, and - safely reaps same-owner bridge/VXLAN orphans; `managed_ready` proves that - this rustbgpd-created bridge + VXLAN topology drives the real EVPN L2 probe - to Ready. A dedicated counter + safely reaps same-owner bridge/VXLAN orphans. VRF/L3VXLAN schema and status + substrate now ship too: desired rows validate, derive ownership stamps, parse + observed VRF/L3VXLAN link attributes, and surface status through + `ListManagedNetdevs` / `rbgp`, while VRF/L3VXLAN create/adopt/reap lifecycle + remains deferred. `managed_ready` proves that this rustbgpd-created bridge + + VXLAN topology drives the real EVPN L2 probe to Ready. A dedicated counter for unattributable-VLAN local-MAC classifier misses is intentionally not a feature: those events fail closed as normal "not ours" outcomes, while downstream originator backpressure is diff --git a/crates/api/src/evpn_service.rs b/crates/api/src/evpn_service.rs index 97c823a0..59826e3f 100644 --- a/crates/api/src/evpn_service.rs +++ b/crates/api/src/evpn_service.rs @@ -891,6 +891,8 @@ fn managed_netdev_to_proto(row: &ManagedNetdevStatus) -> proto::ManagedNetdevSta class: match row.class { rustbgpd_evpn::ManagedNetdevClass::Bridge => proto::ManagedNetdevClass::Bridge as i32, rustbgpd_evpn::ManagedNetdevClass::Vxlan => proto::ManagedNetdevClass::Vxlan as i32, + rustbgpd_evpn::ManagedNetdevClass::Vrf => proto::ManagedNetdevClass::Vrf as i32, + rustbgpd_evpn::ManagedNetdevClass::L3Vxlan => proto::ManagedNetdevClass::L3vxlan as i32, }, name: row.name.clone(), desired: row.desired, @@ -926,6 +928,10 @@ fn managed_netdev_to_proto(row: &ManagedNetdevStatus) -> proto::ManagedNetdevSta observed_collect_metadata: row.observed_collect_metadata, observed_vnifilter: row.observed_vnifilter, observed_bridge: row.observed_bridge.clone(), + observed_table_id: row.observed_table_id, + observed_up: row.observed_up, + observed_master: row.observed_master.clone(), + observed_router_mac: row.observed_router_mac.map(|mac| mac.to_string()), } } @@ -2136,46 +2142,7 @@ mod tests { #[tokio::test] async fn list_managed_netdevs_reads_status_snapshot() { let svc = EvpnService::new(Arc::new(EvpnInstanceTable::new())) - .with_managed_netdev_status_snapshot(Arc::new(|| { - vec![ - ManagedNetdevStatus { - class: rustbgpd_evpn::ManagedNetdevClass::Bridge, - name: "br100".to_string(), - desired: true, - ownership_stamp: Some("rustbgpd:bridge:leaf-1:br100".to_string()), - state: rustbgpd_evpn::ManagedNetdevState::OwnedSafe, - reason: String::new(), - ifindex: Some(10), - observed_vlan_filtering: Some(false), - observed_vni: None, - observed_local_ip: None, - observed_dstport: None, - observed_learning_disabled: None, - observed_collect_metadata: None, - observed_vnifilter: None, - observed_bridge: None, - observed_stamps: vec!["rustbgpd:bridge:leaf-1:br100".to_string()], - }, - ManagedNetdevStatus { - class: rustbgpd_evpn::ManagedNetdevClass::Vxlan, - name: "vxlan100".to_string(), - desired: true, - ownership_stamp: Some("rustbgpd:vxlan:leaf-1:vxlan100".to_string()), - state: rustbgpd_evpn::ManagedNetdevState::OwnedSafe, - reason: String::new(), - ifindex: Some(20), - observed_vlan_filtering: None, - observed_vni: Some(100), - observed_local_ip: Some("10.0.0.1".parse().unwrap()), - observed_dstport: Some(4789), - observed_learning_disabled: Some(true), - observed_collect_metadata: Some(false), - observed_vnifilter: Some(false), - observed_bridge: Some("br100".to_string()), - observed_stamps: vec!["rustbgpd:vxlan:leaf-1:vxlan100".to_string()], - }, - ] - })); + .with_managed_netdev_status_snapshot(Arc::new(managed_netdev_status_fixture)); let resp = svc .list_managed_netdevs(Request::new(proto::ListManagedNetdevsRequest {})) @@ -2183,8 +2150,123 @@ mod tests { .unwrap() .into_inner(); - assert_eq!(resp.netdevs.len(), 2); - let row = &resp.netdevs[0]; + assert_eq!(resp.netdevs.len(), 4); + assert_bridge_managed_netdev(&resp.netdevs[0]); + assert_vxlan_managed_netdev(&resp.netdevs[1]); + assert_vrf_managed_netdev(&resp.netdevs[2]); + assert_l3vxlan_managed_netdev(&resp.netdevs[3]); + } + + fn managed_netdev_status_fixture() -> Vec { + vec![ + bridge_managed_netdev_status(), + vxlan_managed_netdev_status(), + vrf_managed_netdev_status(), + l3vxlan_managed_netdev_status(), + ] + } + + fn bridge_managed_netdev_status() -> ManagedNetdevStatus { + ManagedNetdevStatus { + class: rustbgpd_evpn::ManagedNetdevClass::Bridge, + name: "br100".to_string(), + desired: true, + ownership_stamp: Some("rustbgpd:bridge:leaf-1:br100".to_string()), + state: rustbgpd_evpn::ManagedNetdevState::OwnedSafe, + reason: String::new(), + ifindex: Some(10), + observed_vlan_filtering: Some(false), + observed_vni: None, + observed_local_ip: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, + observed_stamps: vec!["rustbgpd:bridge:leaf-1:br100".to_string()], + } + } + + fn vxlan_managed_netdev_status() -> ManagedNetdevStatus { + ManagedNetdevStatus { + class: rustbgpd_evpn::ManagedNetdevClass::Vxlan, + name: "vxlan100".to_string(), + desired: true, + ownership_stamp: Some("rustbgpd:vxlan:leaf-1:vxlan100".to_string()), + state: rustbgpd_evpn::ManagedNetdevState::OwnedSafe, + reason: String::new(), + ifindex: Some(20), + observed_vlan_filtering: None, + observed_vni: Some(100), + observed_local_ip: Some("10.0.0.1".parse().unwrap()), + observed_dstport: Some(4789), + observed_learning_disabled: Some(true), + observed_collect_metadata: Some(false), + observed_vnifilter: Some(false), + observed_bridge: Some("br100".to_string()), + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, + observed_stamps: vec!["rustbgpd:vxlan:leaf-1:vxlan100".to_string()], + } + } + + fn vrf_managed_netdev_status() -> ManagedNetdevStatus { + ManagedNetdevStatus { + class: rustbgpd_evpn::ManagedNetdevClass::Vrf, + name: "vrf100".to_string(), + desired: true, + ownership_stamp: Some("rustbgpd:vrf:leaf-1:vrf100".to_string()), + state: rustbgpd_evpn::ManagedNetdevState::OwnedSafe, + reason: String::new(), + ifindex: Some(30), + observed_vlan_filtering: None, + observed_vni: None, + observed_local_ip: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: Some(5000), + observed_up: Some(true), + observed_master: None, + observed_router_mac: None, + observed_stamps: vec!["rustbgpd:vrf:leaf-1:vrf100".to_string()], + } + } + + fn l3vxlan_managed_netdev_status() -> ManagedNetdevStatus { + ManagedNetdevStatus { + class: rustbgpd_evpn::ManagedNetdevClass::L3Vxlan, + name: "l3vxlan100".to_string(), + desired: true, + ownership_stamp: Some("rustbgpd:l3vxlan:leaf-1:l3vxlan100".to_string()), + state: rustbgpd_evpn::ManagedNetdevState::OwnedSafe, + reason: String::new(), + ifindex: Some(40), + observed_vlan_filtering: None, + observed_vni: Some(5000), + observed_local_ip: Some("10.0.0.1".parse().unwrap()), + observed_dstport: Some(4789), + observed_learning_disabled: Some(true), + observed_collect_metadata: Some(false), + observed_vnifilter: Some(false), + observed_bridge: None, + observed_table_id: None, + observed_up: Some(true), + observed_master: Some("vrf100".to_string()), + observed_router_mac: Some(MacAddress::new([0x02, 0x00, 0x00, 0x00, 0x00, 0x01])), + observed_stamps: vec!["rustbgpd:l3vxlan:leaf-1:l3vxlan100".to_string()], + } + } + + fn assert_bridge_managed_netdev(row: &proto::ManagedNetdevState) { assert_eq!(row.class, proto::ManagedNetdevClass::Bridge as i32); assert_eq!(row.name, "br100"); assert!(row.desired); @@ -2196,7 +2278,9 @@ mod tests { assert_eq!(row.ifindex, Some(10)); assert_eq!(row.observed_vlan_filtering, Some(false)); assert_eq!(row.observed_stamps, vec!["rustbgpd:bridge:leaf-1:br100"]); - let vxlan = &resp.netdevs[1]; + } + + fn assert_vxlan_managed_netdev(vxlan: &proto::ManagedNetdevState) { assert_eq!(vxlan.class, proto::ManagedNetdevClass::Vxlan as i32); assert_eq!(vxlan.name, "vxlan100"); assert_eq!(vxlan.ownership_stamp, "rustbgpd:vxlan:leaf-1:vxlan100"); @@ -2210,6 +2294,31 @@ mod tests { assert_eq!(vxlan.observed_bridge.as_deref(), Some("br100")); } + fn assert_vrf_managed_netdev(vrf: &proto::ManagedNetdevState) { + assert_eq!(vrf.class, proto::ManagedNetdevClass::Vrf as i32); + assert_eq!(vrf.name, "vrf100"); + assert_eq!(vrf.ownership_stamp, "rustbgpd:vrf:leaf-1:vrf100"); + assert_eq!(vrf.ifindex, Some(30)); + assert_eq!(vrf.observed_table_id, Some(5000)); + assert_eq!(vrf.observed_up, Some(true)); + } + + fn assert_l3vxlan_managed_netdev(l3vxlan: &proto::ManagedNetdevState) { + assert_eq!(l3vxlan.class, proto::ManagedNetdevClass::L3vxlan as i32); + assert_eq!(l3vxlan.name, "l3vxlan100"); + assert_eq!( + l3vxlan.ownership_stamp, + "rustbgpd:l3vxlan:leaf-1:l3vxlan100" + ); + assert_eq!(l3vxlan.ifindex, Some(40)); + assert_eq!(l3vxlan.observed_vni, Some(5000)); + assert_eq!(l3vxlan.observed_master.as_deref(), Some("vrf100")); + assert_eq!( + l3vxlan.observed_router_mac.as_deref(), + Some("02:00:00:00:00:01") + ); + } + // -- Gate 9 IP-VRF surface -------------------------------------- use rustbgpd_evpn::IpVrfDataplaneStatus; diff --git a/crates/cli/src/commands/evpn.rs b/crates/cli/src/commands/evpn.rs index 5a650a55..20026523 100644 --- a/crates/cli/src/commands/evpn.rs +++ b/crates/cli/src/commands/evpn.rs @@ -625,6 +625,18 @@ pub async fn list_managed_netdevs(connection: Connection, json: bool) -> Result< if let Some(bridge) = row.observed_bridge.as_deref() { detail.push(format!("bridge={bridge}")); } + if let Some(table_id) = row.observed_table_id { + detail.push(format!("table-id={table_id}")); + } + if let Some(up) = row.observed_up { + detail.push(format!("up={up}")); + } + if let Some(master) = row.observed_master.as_deref() { + detail.push(format!("master={master}")); + } + if let Some(router_mac) = row.observed_router_mac.as_deref() { + detail.push(format!("router-mac={router_mac}")); + } if !row.observed_stamps.is_empty() { detail.push(format!( "observed-stamps=[{}]", @@ -644,6 +656,8 @@ fn managed_netdev_class_label(class: i32) -> &'static str { match ManagedNetdevClass::try_from(class) { Ok(ManagedNetdevClass::Bridge) => "bridge", Ok(ManagedNetdevClass::Vxlan) => "vxlan", + Ok(ManagedNetdevClass::Vrf) => "vrf", + Ok(ManagedNetdevClass::L3vxlan) => "l3vxlan", Ok(ManagedNetdevClass::Unknown) | Err(_) => "unknown", } } @@ -700,6 +714,20 @@ fn managed_netdev_to_json(row: &ManagedNetdevState) -> serde_json::Value { .observed_bridge .as_ref() .map_or(serde_json::Value::Null, |value| serde_json::Value::String(value.clone())), + "observed_table_id": row + .observed_table_id + .map_or(serde_json::Value::Null, serde_json::Value::from), + "observed_up": row + .observed_up + .map_or(serde_json::Value::Null, serde_json::Value::from), + "observed_master": row + .observed_master + .as_ref() + .map_or(serde_json::Value::Null, |value| serde_json::Value::String(value.clone())), + "observed_router_mac": row + .observed_router_mac + .as_ref() + .map_or(serde_json::Value::Null, |value| serde_json::Value::String(value.clone())), "observed_stamps": row.observed_stamps, }) } @@ -1484,6 +1512,10 @@ evpn_duplicate_mac_moves_total{vni="100",mac="02:aa:bb:cc:dd:01"} 2 observed_collect_metadata: None, observed_vnifilter: None, observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, }); assert_eq!(value["class"], "bridge"); @@ -1513,6 +1545,10 @@ evpn_duplicate_mac_moves_total{vni="100",mac="02:aa:bb:cc:dd:01"} 2 observed_collect_metadata: Some(false), observed_vnifilter: Some(false), observed_bridge: Some("br100".to_string()), + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, }); assert_eq!(vxlan["class"], "vxlan"); assert_eq!(vxlan["observed_vni"], 100); @@ -1522,6 +1558,58 @@ evpn_duplicate_mac_moves_total{vni="100",mac="02:aa:bb:cc:dd:01"} 2 assert_eq!(vxlan["observed_collect_metadata"], false); assert_eq!(vxlan["observed_vnifilter"], false); assert_eq!(vxlan["observed_bridge"], "br100"); + + let vrf = super::managed_netdev_to_json(&crate::proto::ManagedNetdevState { + class: crate::proto::ManagedNetdevClass::Vrf as i32, + name: "vrf100".to_string(), + desired: true, + ownership_stamp: "rustbgpd:vrf:leaf-1:vrf100".to_string(), + state: crate::proto::ManagedNetdevLifecycleState::ManagedNetdevStateOwnedSafe as i32, + reason: String::new(), + ifindex: Some(30), + observed_vlan_filtering: None, + observed_stamps: vec!["rustbgpd:vrf:leaf-1:vrf100".to_string()], + observed_vni: None, + observed_local: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: Some(5000), + observed_up: Some(true), + observed_master: None, + observed_router_mac: None, + }); + assert_eq!(vrf["class"], "vrf"); + assert_eq!(vrf["observed_table_id"], 5000); + assert_eq!(vrf["observed_up"], true); + + let l3vxlan = super::managed_netdev_to_json(&crate::proto::ManagedNetdevState { + class: crate::proto::ManagedNetdevClass::L3vxlan as i32, + name: "l3vxlan100".to_string(), + desired: true, + ownership_stamp: "rustbgpd:l3vxlan:leaf-1:l3vxlan100".to_string(), + state: crate::proto::ManagedNetdevLifecycleState::ManagedNetdevStateOwnedSafe as i32, + reason: String::new(), + ifindex: Some(40), + observed_vlan_filtering: None, + observed_stamps: vec!["rustbgpd:l3vxlan:leaf-1:l3vxlan100".to_string()], + observed_vni: Some(5000), + observed_local: Some("10.0.0.1".to_string()), + observed_dstport: Some(4789), + observed_learning_disabled: Some(true), + observed_collect_metadata: Some(false), + observed_vnifilter: Some(false), + observed_bridge: None, + observed_table_id: None, + observed_up: Some(true), + observed_master: Some("vrf100".to_string()), + observed_router_mac: Some("02:00:00:00:00:01".to_string()), + }); + assert_eq!(l3vxlan["class"], "l3vxlan"); + assert_eq!(l3vxlan["observed_master"], "vrf100"); + assert_eq!(l3vxlan["observed_router_mac"], "02:00:00:00:00:01"); } #[tokio::test] diff --git a/crates/cli/src/test_support.rs b/crates/cli/src/test_support.rs index d7cf4276..8d269d02 100644 --- a/crates/cli/src/test_support.rs +++ b/crates/cli/src/test_support.rs @@ -710,6 +710,10 @@ impl rustbgpd_api::proto::evpn_service_server::EvpnService for MockEvpnService { observed_collect_metadata: None, observed_vnifilter: None, observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, }], })) } diff --git a/crates/evpn-linux/src/in_memory.rs b/crates/evpn-linux/src/in_memory.rs index b9de3f21..cc6af940 100644 --- a/crates/evpn-linux/src/in_memory.rs +++ b/crates/evpn-linux/src/in_memory.rs @@ -518,6 +518,7 @@ impl InMemoryDataplane { ifindex: next_ifindex, name: name.clone(), altnames: vec![ownership_stamp.clone()], + up: true, vni: Some(spec.vni), local_ip: Some(spec.local_ip), dstport: Some(spec.dstport), @@ -525,6 +526,8 @@ impl InMemoryDataplane { collect_metadata: false, vnifilter: false, bridge: Some(spec.bridge.clone()), + master: Some(spec.bridge.clone()), + mac: None, }); } }, diff --git a/crates/evpn-linux/src/linux/links.rs b/crates/evpn-linux/src/linux/links.rs index f8b39e8d..6adadb4d 100644 --- a/crates/evpn-linux/src/linux/links.rs +++ b/crates/evpn-linux/src/linux/links.rs @@ -17,8 +17,8 @@ use futures::stream::TryStreamExt; use netlink_packet_route::AddressFamily; use netlink_packet_route::link::{ AfSpecBridge, BridgeVlanInfo, BridgeVlanInfoFlags, BridgeVlanTunnelInfo, InfoBridge, - InfoBridgePort, InfoData, InfoKind, InfoPortData, InfoVlan, InfoVxlan, LinkAttribute, - LinkExtentMask, LinkInfo, LinkMessage, Prop, + InfoBridgePort, InfoData, InfoKind, InfoPortData, InfoVlan, InfoVrf, InfoVxlan, LinkAttribute, + LinkExtentMask, LinkFlags, LinkInfo, LinkMessage, Prop, }; use rtnetlink::Handle; @@ -27,8 +27,8 @@ use rustbgpd_evpn::{EvpnInstanceId, EvpnInstanceTable, MacAddress}; use crate::error::DataplaneError; use crate::snapshot::{ KernelBridgePortVlanInfo, KernelBridgeVlanFlags, KernelBridgeVlanInfo, - KernelBridgeVlanTunnelInfo, KernelSvdVxlanInfo, KernelVxlanInfo, KernelVxlanLinkInfo, - vlan_rows_contain, + KernelBridgeVlanTunnelInfo, KernelSvdVxlanInfo, KernelVrfLinkInfo, KernelVxlanInfo, + KernelVxlanLinkInfo, vlan_rows_contain, }; /// One bridge link the inventory cared about. @@ -144,6 +144,9 @@ pub(crate) struct LinkCache { /// status uses this to classify desired VXLAN rows and stamped /// leftovers independently of bridge readiness. pub vxlan_links: HashMap, + /// VRF links by name. Managed-netdev status uses this to classify desired + /// VRF rows and stamped leftovers independently of IP-VRF readiness. + pub vrfs: HashMap, /// VXLAN ifindex -> EVPN VNI back-reference. Populated alongside /// the bridge inventory so the FDB dump can derive the VNI of an /// FDB entry from `msg.header.ifindex` (which is the *VXLAN* @@ -523,6 +526,19 @@ fn insert_unique_ifindex_binding( type BridgeVlanInventory = HashMap, Vec)>; +#[derive(Default)] +struct LinkDumpState { + bridges: HashMap, + all_link_names: HashSet, + link_ifindex_to_name: HashMap, + bridge_ifindex_to_name: HashMap, + vrfs: HashMap, + vxlan_ports: Vec, + vxlan_links: HashMap)>, + vlan_upper_links: HashMap<(u32, u16), Option>, + all_enslaved: Vec, +} + async fn dump_bridge_vlan_inventory_optional(handle: &Handle) -> BridgeVlanInventory { match dump_bridge_vlan_inventory(handle).await { Ok(inventory) => inventory, @@ -545,22 +561,13 @@ async fn dump_bridge_vlan_inventory_optional(handle: &Handle) -> BridgeVlanInven /// kernel-ordered and a VXLAN port can appear before its master /// bridge. pub(crate) async fn dump_links(handle: &Handle) -> Result { - let mut bridges: HashMap = HashMap::new(); - let mut all_link_names: HashSet = HashSet::new(); - let mut bridge_ifindex_to_name: HashMap = HashMap::new(); - let mut vxlan_ports: Vec = Vec::new(); - let mut vxlan_links: HashMap)> = HashMap::new(); - let mut vlan_upper_links: HashMap<(u32, u16), Option> = HashMap::new(); + let mut state = LinkDumpState::default(); // The AF_BRIDGE filtered dump carries bridge VLAN/tunnel extension // rows, but on real kernels it is not a substitute for the normal // RTM_GETLINK walk: VXLAN InfoData/learning attributes can be absent // there. Keep this diagnostic substrate optional so a kernel that // refuses the extension mask does not break existing readiness. let vlan_inventory = dump_bridge_vlan_inventory_optional(handle).await; - // Every link that has a Controller attribute. Post-processed by - // `index_bridge_ports` to seed `bridge_port_to_vni` and - // `bridge_ports_by_name` for non-VXLAN ports of known bridges. - let mut all_enslaved: Vec = Vec::new(); let mut stream = handle.link().get().execute(); while let Some(msg) = stream @@ -568,68 +575,25 @@ pub(crate) async fn dump_links(handle: &Handle) -> Result { - if let Some(name) = name { - let vlan_filtering = bridge_vlan_filtering(&msg); - let mac = extract_link_mac(&msg); - let altnames = extract_altnames(&msg); - bridges.insert( - name.clone(), - BridgeLink { - ifindex, - mac, - altnames, - vlan_filtering, - vxlan: None, - vxlan_ports: Vec::new(), - svd_vxlan_ports: Vec::new(), - vxlan_attach_count: 0, - ce_port_ifindexes: Vec::new(), - vlans: bridge_vlans, - vlan_tunnels, - port_vlan_inventory: Vec::new(), - }, - ); - bridge_ifindex_to_name.insert(ifindex, name); - } - } - Some(InfoKind::Vxlan) => { - if let Some(name) = name { - record_named_vxlan_link(&msg, name, &mut vxlan_ports, &mut vxlan_links); - } else if let Some(port) = parse_vxlan_port(&msg) { - vxlan_ports.push(port); - } - } - Some(InfoKind::Vlan) => { - record_vlan_upper_link(&msg, &mut vlan_upper_links); - } - _ => {} - } + record_link_message(&msg, &vlan_inventory, &mut state); } + let LinkDumpState { + mut bridges, + all_link_names, + link_ifindex_to_name, + bridge_ifindex_to_name, + vrfs, + vxlan_ports, + vxlan_links, + vlan_upper_links, + all_enslaved, + } = state; + let (vxlan_ifindex_to_vni, svd_vxlan_ifindexes) = attach_vxlan_ports(vxlan_ports, &bridge_ifindex_to_name, &mut bridges); - let vxlan_links = attach_vxlan_link_status(vxlan_links, &bridge_ifindex_to_name); + let vxlan_links = + attach_vxlan_link_status(vxlan_links, &bridge_ifindex_to_name, &link_ifindex_to_name); let (bridge_port_to_vni, bridge_ports_by_name) = index_bridge_ports(all_enslaved, &bridge_ifindex_to_name, &mut bridges); @@ -638,6 +602,7 @@ pub(crate) async fn dump_links(handle: &Handle) -> Result Result { + if let Some(name) = name { + record_bridge_link( + msg, + name, + bridge_vlans, + vlan_tunnels, + &mut state.bridges, + &mut state.bridge_ifindex_to_name, + ); + } + } + Some(InfoKind::Vxlan) => { + if let Some(name) = name { + record_named_vxlan_link(msg, name, &mut state.vxlan_ports, &mut state.vxlan_links); + } else if let Some(port) = parse_vxlan_port(msg) { + state.vxlan_ports.push(port); + } + } + Some(InfoKind::Vrf) => { + if let Some(name) = name { + state.vrfs.insert(name.clone(), parse_vrf_link(msg, name)); + } + } + Some(InfoKind::Vlan) => { + record_vlan_upper_link(msg, &mut state.vlan_upper_links); + } + _ => {} + } +} + +fn record_bridge_link( + msg: &LinkMessage, + name: String, + bridge_vlans: Vec, + vlan_tunnels: Vec, + bridges: &mut HashMap, + bridge_ifindex_to_name: &mut HashMap, +) { + let ifindex = msg.header.index; + bridges.insert( + name.clone(), + BridgeLink { + ifindex, + mac: extract_link_mac(msg), + altnames: extract_altnames(msg), + vlan_filtering: bridge_vlan_filtering(msg), + vxlan: None, + vxlan_ports: Vec::new(), + svd_vxlan_ports: Vec::new(), + vxlan_attach_count: 0, + ce_port_ifindexes: Vec::new(), + vlans: bridge_vlans, + vlan_tunnels, + port_vlan_inventory: Vec::new(), + }, + ); + bridge_ifindex_to_name.insert(ifindex, name); +} + fn record_named_vxlan_link( msg: &LinkMessage, name: String, @@ -681,11 +737,13 @@ fn record_vlan_upper_link( fn attach_vxlan_link_status( vxlan_links: HashMap)>, bridge_ifindex_to_name: &HashMap, + link_ifindex_to_name: &HashMap, ) -> HashMap { vxlan_links .into_iter() .map(|(name, (mut link, master))| { link.bridge = master.and_then(|idx| bridge_ifindex_to_name.get(&idx).cloned()); + link.master = master.and_then(|idx| link_ifindex_to_name.get(&idx).cloned()); (name, link) }) .collect() @@ -1117,6 +1175,7 @@ fn parse_vxlan_link(msg: &LinkMessage, name: String) -> ParsedVxlanLink { ifindex, name, altnames: extract_altnames(msg), + up: msg.header.flags.contains(LinkFlags::Up), vni, local_ip: local, dstport, @@ -1124,10 +1183,39 @@ fn parse_vxlan_link(msg: &LinkMessage, name: String) -> ParsedVxlanLink { collect_metadata, vnifilter, bridge: None, + master: None, + mac: extract_link_mac(msg), }, } } +fn parse_vrf_link(msg: &LinkMessage, name: String) -> KernelVrfLinkInfo { + KernelVrfLinkInfo { + ifindex: msg.header.index, + name, + altnames: extract_altnames(msg), + up: msg.header.flags.contains(LinkFlags::Up), + table_id: extract_vrf_table_id(msg), + } +} + +fn extract_vrf_table_id(msg: &LinkMessage) -> Option { + for attr in &msg.attributes { + if let LinkAttribute::LinkInfo(infos) = attr { + for info in infos { + if let LinkInfo::Data(InfoData::Vrf(items)) = info { + for item in items { + if let InfoVrf::TableId(id) = item { + return Some(*id); + } + } + } + } + } + } + None +} + fn parse_vlan_upper_link(msg: &LinkMessage) -> Option { let ifindex = msg.header.index; let mut lower_ifindex = None; @@ -1286,6 +1374,10 @@ mod tests { fn parse_vxlan_port_captures_fixed_vni_device() { let mut msg = LinkMessage::default(); msg.header.index = 20; + msg.header.flags.insert(LinkFlags::Up); + msg.attributes.push(LinkAttribute::Address(vec![ + 0x02, 0x00, 0x00, 0x00, 0x00, 0x01, + ])); msg.attributes .push(LinkAttribute::IfName("vxlan100".to_string())); msg.attributes.push(LinkAttribute::PropList(vec![ @@ -1323,6 +1415,11 @@ mod tests { assert_eq!(parsed.info.learning_disabled, Some(true)); assert!(!parsed.info.collect_metadata); assert!(!parsed.info.vnifilter); + assert!(parsed.info.up); + assert_eq!( + parsed.info.mac, + Some(MacAddress::new([0x02, 0x00, 0x00, 0x00, 0x00, 0x01])) + ); let Some(VxlanPort::Fixed(port)) = parse_vxlan_port(&msg) else { panic!("expected fixed VXLAN port"); @@ -1334,6 +1431,37 @@ mod tests { assert_eq!(port.info.learning_disabled, Some(true)); } + #[test] + fn parse_vrf_link_captures_table_stamp_and_up_state() { + let mut msg = LinkMessage::default(); + msg.header.index = 50; + msg.header.flags.insert(LinkFlags::Up); + msg.attributes + .push(LinkAttribute::IfName("vrf100".to_string())); + msg.attributes.push(LinkAttribute::PropList(vec![ + Prop::AltIfName("operator-alias".to_string()), + Prop::AltIfName("rustbgpd:vrf:leaf-1:vrf100".to_string()), + ])); + msg.attributes.push(LinkAttribute::LinkInfo(vec![ + LinkInfo::Kind(InfoKind::Vrf), + LinkInfo::Data(InfoData::Vrf(vec![InfoVrf::TableId(5000)])), + ])); + + let parsed = parse_vrf_link(&msg, "vrf100".to_string()); + + assert_eq!(parsed.ifindex, 50); + assert_eq!(parsed.name, "vrf100"); + assert_eq!( + parsed.altnames, + vec![ + "operator-alias".to_string(), + "rustbgpd:vrf:leaf-1:vrf100".to_string(), + ] + ); + assert!(parsed.up); + assert_eq!(parsed.table_id, Some(5000)); + } + #[test] fn parse_vxlan_port_captures_collect_metadata_without_fixed_vni() { let mut msg = LinkMessage::default(); diff --git a/crates/evpn-linux/src/linux/mod.rs b/crates/evpn-linux/src/linux/mod.rs index 1d109eb6..fe4b4153 100644 --- a/crates/evpn-linux/src/linux/mod.rs +++ b/crates/evpn-linux/src/linux/mod.rs @@ -532,6 +532,13 @@ impl Dataplane for LinuxDataplane { .map(|(name, link)| (name.clone(), link.clone())) .collect(), ); + snap.set_vrfs( + cache + .vrfs + .iter() + .map(|(name, link)| (name.clone(), link.clone())) + .collect(), + ); for (name, link) in &cache.bridges { // Only surface bridges with exactly one VXLAN port so // the diff loop's NotReady inference matches the probe diff --git a/crates/evpn-linux/src/linux/notify.rs b/crates/evpn-linux/src/linux/notify.rs index 6baa3a08..6ec12dc4 100644 --- a/crates/evpn-linux/src/linux/notify.rs +++ b/crates/evpn-linux/src/linux/notify.rs @@ -726,6 +726,7 @@ mod tests { all_link_names, bridges, vxlan_links: HashMap::new(), + vrfs: HashMap::new(), vxlan_ifindex_to_vni, svd_vxlan_ifindexes: HashSet::new(), bridge_port_to_vni, diff --git a/crates/evpn-linux/src/linux/probe.rs b/crates/evpn-linux/src/linux/probe.rs index 8a6ecd58..9382c86b 100644 --- a/crates/evpn-linux/src/linux/probe.rs +++ b/crates/evpn-linux/src/linux/probe.rs @@ -337,6 +337,7 @@ mod tests { all_link_names, bridges, vxlan_links: HashMap::new(), + vrfs: HashMap::new(), vxlan_ifindex_to_vni: vxlan_to_vni, svd_vxlan_ifindexes: HashSet::new(), bridge_port_to_vni: HashMap::new(), diff --git a/crates/evpn-linux/src/reconcile.rs b/crates/evpn-linux/src/reconcile.rs index a54b52ca..15c8e198 100644 --- a/crates/evpn-linux/src/reconcile.rs +++ b/crates/evpn-linux/src/reconcile.rs @@ -40,8 +40,9 @@ use rustbgpd_evpn::{ AppliedOp, DataplaneIntent, DataplaneOpKind, DataplaneReport, EvpnInstanceTable, FailedOp, FdbNexthopDataplaneStatus, FdbNexthopGroupStatus, FdbNexthopMemberStatus, FdbNhgDriftCounters, InstanceDataplaneStatus, InstanceState, L3AdoptionCounters, ManagedBridgeNetdev, - ManagedNetdevClass, ManagedNetdevState, ManagedNetdevStatus, ManagedNetdevTable, - ManagedVxlanNetdev, RemoteMacTable, SingleActiveCounters, parse_ownership_stamp, + ManagedL3VxlanNetdev, ManagedNetdevClass, ManagedNetdevState, ManagedNetdevStatus, + ManagedNetdevTable, ManagedVrfNetdev, ManagedVxlanNetdev, RemoteMacTable, SingleActiveCounters, + parse_ownership_stamp, }; use tokio::sync::{mpsc, watch}; use tokio::time::{Instant, MissedTickBehavior, sleep_until}; @@ -59,8 +60,8 @@ use crate::enforcement::build_bum_enforcement_status; use crate::error::FailureClass; use crate::l3_adoption::{AdoptedL3Route, AdoptedL3VxlanFdb, AdoptedL3VxlanFdbTarget}; use crate::snapshot::{ - InstanceProbe, InstanceProbes, KernelLinkInfo, KernelSnapshot, KernelVxlanLinkInfo, OwnedEntry, - OwnedEntryKind, OwnedSet, + InstanceProbe, InstanceProbes, KernelLinkInfo, KernelSnapshot, KernelVrfLinkInfo, + KernelVxlanLinkInfo, OwnedEntry, OwnedEntryKind, OwnedSet, }; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] @@ -3544,82 +3545,152 @@ fn build_managed_netdev_status( snapshot: Option<&KernelSnapshot>, ) -> Vec { let mut rows = Vec::new(); - let mut desired_bridge_names = BTreeSet::new(); - let mut desired_vxlan_names = BTreeSet::new(); + let desired = desired_managed_netdev_statuses(managed, snapshot, &mut rows); + + if let Some(snapshot) = snapshot + && managed.owner_token().is_some() + { + unconfigured_managed_netdev_statuses(managed, snapshot, &desired, &mut rows); + } + + rows.sort_by(|a, b| { + ( + a.class, + a.name.as_str(), + std::cmp::Reverse(a.desired), + a.state.as_str(), + ) + .cmp(&( + b.class, + b.name.as_str(), + std::cmp::Reverse(b.desired), + b.state.as_str(), + )) + }); + rows +} +#[derive(Default)] +struct DesiredManagedNetdevNames { + bridges: BTreeSet, + vxlans: BTreeSet, + vrfs: BTreeSet, + l3vxlans: BTreeSet, +} + +fn desired_managed_netdev_statuses( + managed: &ManagedNetdevTable, + snapshot: Option<&KernelSnapshot>, + rows: &mut Vec, +) -> DesiredManagedNetdevNames { + let mut desired = DesiredManagedNetdevNames::default(); for bridge in managed.bridges() { - desired_bridge_names.insert(bridge.name.clone()); - let link = snapshot.and_then(|kernel| kernel.links.get(&bridge.name)); - let name_occupied = snapshot.is_some_and(|kernel| kernel.link_name_exists(&bridge.name)); + desired.bridges.insert(bridge.name.clone()); rows.push(desired_managed_bridge_status( bridge, - link, + snapshot.and_then(|kernel| kernel.links.get(&bridge.name)), snapshot.is_some(), - name_occupied, + snapshot.is_some_and(|kernel| kernel.link_name_exists(&bridge.name)), )); } for vxlan in managed.vxlans() { - desired_vxlan_names.insert(vxlan.name.clone()); - let link = snapshot.and_then(|kernel| kernel.vxlans.get(&vxlan.name)); - let name_occupied = snapshot.is_some_and(|kernel| kernel.link_name_exists(&vxlan.name)); + desired.vxlans.insert(vxlan.name.clone()); rows.push(desired_managed_vxlan_status( vxlan, - link, + snapshot.and_then(|kernel| kernel.vxlans.get(&vxlan.name)), snapshot.is_some(), - name_occupied, + snapshot.is_some_and(|kernel| kernel.link_name_exists(&vxlan.name)), )); } + for vrf in managed.vrfs() { + desired.vrfs.insert(vrf.name.clone()); + rows.push(desired_managed_vrf_status( + vrf, + snapshot.and_then(|kernel| kernel.vrfs.get(&vrf.name)), + snapshot.is_some(), + snapshot.is_some_and(|kernel| kernel.link_name_exists(&vrf.name)), + )); + } + for l3vxlan in managed.l3vxlans() { + desired.l3vxlans.insert(l3vxlan.name.clone()); + rows.push(desired_managed_l3vxlan_status( + l3vxlan, + snapshot.and_then(|kernel| kernel.vxlans.get(&l3vxlan.name)), + snapshot.is_some(), + snapshot.is_some_and(|kernel| kernel.link_name_exists(&l3vxlan.name)), + )); + } + desired +} - if let Some(snapshot) = snapshot - && managed.owner_token().is_some() - { - for (name, link) in &snapshot.links { - if desired_bridge_names.contains(name) { - continue; - } - let observed_stamps = rustbgpd_stamps(&link.altnames); - if observed_stamps.is_empty() { - continue; - } +fn unconfigured_managed_netdev_statuses( + managed: &ManagedNetdevTable, + snapshot: &KernelSnapshot, + desired: &DesiredManagedNetdevNames, + rows: &mut Vec, +) { + for (name, link) in &snapshot.links { + if desired.bridges.contains(name) { + continue; + } + let stamps = rustbgpd_stamps_for_class(&link.altnames, ManagedNetdevClass::Bridge); + if !stamps.is_empty() { rows.push(unconfigured_managed_bridge_status( name, link, - observed_stamps, + stamps, managed.owner_token(), )); } - for (name, link) in &snapshot.vxlans { - if desired_vxlan_names.contains(name) { - continue; - } - let observed_stamps = rustbgpd_stamps(&link.altnames); - if observed_stamps.is_empty() { - continue; - } - rows.push(unconfigured_managed_vxlan_status( + } + for (name, link) in &snapshot.vxlans { + push_unconfigured_vxlan_status(name, link, managed, desired, rows); + } + for (name, link) in &snapshot.vrfs { + if desired.vrfs.contains(name) { + continue; + } + let stamps = rustbgpd_stamps_for_class(&link.altnames, ManagedNetdevClass::Vrf); + if !stamps.is_empty() { + rows.push(unconfigured_managed_vrf_status( name, link, - observed_stamps, + stamps, managed.owner_token(), )); } } +} - rows.sort_by(|a, b| { - ( - a.class, - a.name.as_str(), - std::cmp::Reverse(a.desired), - a.state.as_str(), - ) - .cmp(&( - b.class, - b.name.as_str(), - std::cmp::Reverse(b.desired), - b.state.as_str(), - )) - }); - rows +fn push_unconfigured_vxlan_status( + name: &str, + link: &KernelVxlanLinkInfo, + managed: &ManagedNetdevTable, + desired: &DesiredManagedNetdevNames, + rows: &mut Vec, +) { + if !desired.vxlans.contains(name) { + let stamps = rustbgpd_stamps_for_class(&link.altnames, ManagedNetdevClass::Vxlan); + if !stamps.is_empty() { + rows.push(unconfigured_managed_vxlan_status( + name, + link, + stamps, + managed.owner_token(), + )); + } + } + if !desired.l3vxlans.contains(name) { + let stamps = rustbgpd_stamps_for_class(&link.altnames, ManagedNetdevClass::L3Vxlan); + if !stamps.is_empty() { + rows.push(unconfigured_managed_l3vxlan_status( + name, + link, + stamps, + managed.owner_token(), + )); + } + } } fn compute_managed_netdev_ops( @@ -3721,6 +3792,10 @@ fn desired_managed_bridge_status( observed_collect_metadata: None, observed_vnifilter: None, observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, observed_stamps: Vec::new(), }; }; @@ -3743,6 +3818,10 @@ fn desired_managed_bridge_status( observed_collect_metadata: None, observed_vnifilter: None, observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, observed_stamps, } } @@ -3826,6 +3905,10 @@ fn unconfigured_managed_bridge_status( observed_collect_metadata: None, observed_vnifilter: None, observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, observed_stamps, } } @@ -3871,6 +3954,10 @@ fn desired_managed_vxlan_status( observed_collect_metadata: None, observed_vnifilter: None, observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, observed_stamps: Vec::new(), }; }; @@ -3893,6 +3980,10 @@ fn desired_managed_vxlan_status( observed_collect_metadata: Some(link.collect_metadata), observed_vnifilter: Some(link.vnifilter), observed_bridge: link.bridge.clone(), + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, observed_stamps, } } @@ -4024,6 +4115,392 @@ fn unconfigured_managed_vxlan_status( observed_collect_metadata: Some(link.collect_metadata), observed_vnifilter: Some(link.vnifilter), observed_bridge: link.bridge.clone(), + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, + observed_stamps, + } +} + +fn desired_managed_vrf_status( + vrf: &ManagedVrfNetdev, + link: Option<&KernelVrfLinkInfo>, + snapshot_available: bool, + name_occupied: bool, +) -> ManagedNetdevStatus { + let Some(link) = link else { + let (state, reason) = if snapshot_available { + if name_occupied { + ( + ManagedNetdevState::ForeignPresent, + "desired VRF name is occupied by a non-VRF link".to_string(), + ) + } else { + ( + ManagedNetdevState::DesiredAbsent, + "VRF is not present".to_string(), + ) + } + } else { + ( + ManagedNetdevState::Unknown, + "kernel snapshot unavailable".to_string(), + ) + }; + return ManagedNetdevStatus { + class: ManagedNetdevClass::Vrf, + name: vrf.name.clone(), + desired: true, + ownership_stamp: Some(vrf.ownership_stamp.clone()), + state, + reason, + ifindex: None, + observed_vlan_filtering: None, + observed_vni: None, + observed_local_ip: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, + observed_stamps: Vec::new(), + }; + }; + + let observed_stamps = rustbgpd_stamps(&link.altnames); + let (state, reason) = classify_desired_managed_vrf(vrf, link, &observed_stamps); + ManagedNetdevStatus { + class: ManagedNetdevClass::Vrf, + name: vrf.name.clone(), + desired: true, + ownership_stamp: Some(vrf.ownership_stamp.clone()), + state, + reason, + ifindex: Some(link.ifindex), + observed_vlan_filtering: None, + observed_vni: None, + observed_local_ip: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: link.table_id, + observed_up: Some(link.up), + observed_master: None, + observed_router_mac: None, + observed_stamps, + } +} + +fn classify_desired_managed_vrf( + vrf: &ManagedVrfNetdev, + link: &KernelVrfLinkInfo, + observed_stamps: &[String], +) -> (ManagedNetdevState, String) { + let has_expected_stamp = observed_stamps + .iter() + .any(|stamp| stamp == &vrf.ownership_stamp); + if !has_expected_stamp { + return if observed_stamps.is_empty() { + ( + ManagedNetdevState::ForeignPresent, + "VRF exists without the expected rustbgpd ownership stamp".to_string(), + ) + } else { + ( + ManagedNetdevState::OwnedUnsafe, + format!( + "VRF carries rustbgpd ownership stamp(s) but not expected stamp {:?}", + vrf.ownership_stamp + ), + ) + }; + } + if observed_stamps.len() != 1 { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "VRF carries expected ownership stamp plus additional rustbgpd stamp(s): {observed_stamps:?}" + ), + ); + } + if link.table_id != Some(vrf.spec.table_id) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "table_id mismatch: observed {:?}, desired {}", + link.table_id, vrf.spec.table_id + ), + ); + } + (ManagedNetdevState::OwnedSafe, String::new()) +} + +fn unconfigured_managed_vrf_status( + name: &str, + link: &KernelVrfLinkInfo, + observed_stamps: Vec, + owner_token: Option<&str>, +) -> ManagedNetdevStatus { + let safe_orphan = + owner_token.is_none_or(|owner| safe_orphan_vrf_stamp_for_owner(link, owner).is_some()); + let (state, reason) = if safe_orphan { + ( + ManagedNetdevState::Orphaned, + "rustbgpd-stamped VRF is not configured".to_string(), + ) + } else { + ( + ManagedNetdevState::OwnedUnsafe, + "rustbgpd-stamped VRF is not configured but is not owned by this daemon".to_string(), + ) + }; + ManagedNetdevStatus { + class: ManagedNetdevClass::Vrf, + name: name.to_string(), + desired: false, + ownership_stamp: None, + state, + reason, + ifindex: Some(link.ifindex), + observed_vlan_filtering: None, + observed_vni: None, + observed_local_ip: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: link.table_id, + observed_up: Some(link.up), + observed_master: None, + observed_router_mac: None, + observed_stamps, + } +} + +fn desired_managed_l3vxlan_status( + l3vxlan: &ManagedL3VxlanNetdev, + link: Option<&KernelVxlanLinkInfo>, + snapshot_available: bool, + name_occupied: bool, +) -> ManagedNetdevStatus { + let Some(link) = link else { + let (state, reason) = if snapshot_available { + if name_occupied { + ( + ManagedNetdevState::ForeignPresent, + "desired L3VXLAN name is occupied by a non-VXLAN link".to_string(), + ) + } else { + ( + ManagedNetdevState::DesiredAbsent, + "L3VXLAN is not present".to_string(), + ) + } + } else { + ( + ManagedNetdevState::Unknown, + "kernel snapshot unavailable".to_string(), + ) + }; + return ManagedNetdevStatus { + class: ManagedNetdevClass::L3Vxlan, + name: l3vxlan.name.clone(), + desired: true, + ownership_stamp: Some(l3vxlan.ownership_stamp.clone()), + state, + reason, + ifindex: None, + observed_vlan_filtering: None, + observed_vni: None, + observed_local_ip: None, + observed_dstport: None, + observed_learning_disabled: None, + observed_collect_metadata: None, + observed_vnifilter: None, + observed_bridge: None, + observed_table_id: None, + observed_up: None, + observed_master: None, + observed_router_mac: None, + observed_stamps: Vec::new(), + }; + }; + + let observed_stamps = rustbgpd_stamps(&link.altnames); + let (state, reason) = classify_desired_managed_l3vxlan(l3vxlan, link, &observed_stamps); + ManagedNetdevStatus { + class: ManagedNetdevClass::L3Vxlan, + name: l3vxlan.name.clone(), + desired: true, + ownership_stamp: Some(l3vxlan.ownership_stamp.clone()), + state, + reason, + ifindex: Some(link.ifindex), + observed_vlan_filtering: None, + observed_vni: link.vni, + observed_local_ip: link.local_ip, + observed_dstport: link.dstport, + observed_learning_disabled: link.learning_disabled, + observed_collect_metadata: Some(link.collect_metadata), + observed_vnifilter: Some(link.vnifilter), + observed_bridge: None, + observed_table_id: None, + observed_up: Some(link.up), + observed_master: link.master.clone(), + observed_router_mac: link.mac, + observed_stamps, + } +} + +fn classify_desired_managed_l3vxlan( + l3vxlan: &ManagedL3VxlanNetdev, + link: &KernelVxlanLinkInfo, + observed_stamps: &[String], +) -> (ManagedNetdevState, String) { + let has_expected_stamp = observed_stamps + .iter() + .any(|stamp| stamp == &l3vxlan.ownership_stamp); + if !has_expected_stamp { + return if observed_stamps.is_empty() { + ( + ManagedNetdevState::ForeignPresent, + "L3VXLAN exists without the expected rustbgpd ownership stamp".to_string(), + ) + } else { + ( + ManagedNetdevState::OwnedUnsafe, + format!( + "L3VXLAN carries rustbgpd ownership stamp(s) but not expected stamp {:?}", + l3vxlan.ownership_stamp + ), + ) + }; + } + if observed_stamps.len() != 1 { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "L3VXLAN carries expected ownership stamp plus additional rustbgpd stamp(s): {observed_stamps:?}" + ), + ); + } + if link.vni != Some(l3vxlan.spec.vni) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "vni mismatch: observed {:?}, desired {}", + link.vni, l3vxlan.spec.vni + ), + ); + } + if link.local_ip != Some(l3vxlan.spec.local_ip) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "local IP mismatch: observed {:?}, desired {}", + link.local_ip, l3vxlan.spec.local_ip + ), + ); + } + if link.dstport != Some(l3vxlan.spec.dstport) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "dstport mismatch: observed {:?}, desired {}", + link.dstport, l3vxlan.spec.dstport + ), + ); + } + if link.learning_disabled != Some(true) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "learning mismatch: observed {:?}, desired nolearning", + link.learning_disabled + ), + ); + } + if link.collect_metadata { + return ( + ManagedNetdevState::OwnedUnsafe, + "collect-metadata/external VXLAN is outside L3VXLAN lifecycle scope".to_string(), + ); + } + if link.vnifilter { + return ( + ManagedNetdevState::OwnedUnsafe, + "vnifilter is enabled; L3VXLAN lifecycle requires vnifilter off".to_string(), + ); + } + if link.master.as_deref() != Some(l3vxlan.spec.vrf.as_str()) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "VRF attachment mismatch: observed {:?}, desired {}", + link.master, l3vxlan.spec.vrf + ), + ); + } + if link.mac != Some(l3vxlan.spec.router_mac) { + return ( + ManagedNetdevState::OwnedUnsafe, + format!( + "router MAC mismatch: observed {:?}, desired {:?}", + link.mac, l3vxlan.spec.router_mac + ), + ); + } + (ManagedNetdevState::OwnedSafe, String::new()) +} + +fn unconfigured_managed_l3vxlan_status( + name: &str, + link: &KernelVxlanLinkInfo, + observed_stamps: Vec, + owner_token: Option<&str>, +) -> ManagedNetdevStatus { + let safe_orphan = + owner_token.is_none_or(|owner| safe_orphan_l3vxlan_stamp_for_owner(link, owner).is_some()); + let (state, reason) = if safe_orphan { + ( + ManagedNetdevState::Orphaned, + "rustbgpd-stamped L3VXLAN is not configured".to_string(), + ) + } else { + ( + ManagedNetdevState::OwnedUnsafe, + "rustbgpd-stamped L3VXLAN is not configured but is not owned by this daemon" + .to_string(), + ) + }; + ManagedNetdevStatus { + class: ManagedNetdevClass::L3Vxlan, + name: name.to_string(), + desired: false, + ownership_stamp: None, + state, + reason, + ifindex: Some(link.ifindex), + observed_vlan_filtering: None, + observed_vni: link.vni, + observed_local_ip: link.local_ip, + observed_dstport: link.dstport, + observed_learning_disabled: link.learning_disabled, + observed_collect_metadata: Some(link.collect_metadata), + observed_vnifilter: Some(link.vnifilter), + observed_bridge: None, + observed_table_id: None, + observed_up: Some(link.up), + observed_master: link.master.clone(), + observed_router_mac: link.mac, observed_stamps, } } @@ -4038,6 +4515,20 @@ fn rustbgpd_stamps(altnames: &[String]) -> Vec { stamps } +fn rustbgpd_stamps_for_class(altnames: &[String], class: ManagedNetdevClass) -> Vec { + let mut stamps: Vec = altnames + .iter() + .filter_map(|altname| { + parse_ownership_stamp(altname) + .filter(|stamp| stamp.class == class) + .map(|stamp| stamp.raw) + }) + .collect(); + stamps.sort(); + stamps.dedup(); + stamps +} + fn safe_orphan_bridge_stamp_for_owner(link: &KernelLinkInfo, owner_token: &str) -> Option { let stamps: Vec<_> = link .altnames @@ -4089,6 +4580,49 @@ fn safe_orphan_vxlan_stamp_for_owner( Some(stamp.raw.clone()) } +fn safe_orphan_vrf_stamp_for_owner(link: &KernelVrfLinkInfo, owner_token: &str) -> Option { + let stamps: Vec<_> = link + .altnames + .iter() + .filter_map(|altname| parse_ownership_stamp(altname)) + .collect(); + if stamps.len() != 1 { + return None; + } + let stamp = &stamps[0]; + if stamp.class == ManagedNetdevClass::Vrf + && stamp.owner_token == owner_token + && stamp.name == link.name + { + Some(stamp.raw.clone()) + } else { + None + } +} + +fn safe_orphan_l3vxlan_stamp_for_owner( + link: &KernelVxlanLinkInfo, + owner_token: &str, +) -> Option { + let stamps: Vec<_> = link + .altnames + .iter() + .filter_map(|altname| parse_ownership_stamp(altname)) + .collect(); + if stamps.len() != 1 { + return None; + } + let stamp = &stamps[0]; + if stamp.class == ManagedNetdevClass::L3Vxlan + && stamp.owner_token == owner_token + && stamp.name == link.name + { + Some(stamp.raw.clone()) + } else { + None + } +} + /// Build the per-IP-VRF status block for a [`DataplaneReport`] (Gate 9). /// /// Joins the operator-facing handle from the [`IpVrfTable`] with the @@ -5289,7 +5823,7 @@ fn empty_snapshot() -> KernelSnapshot { #[cfg(test)] mod managed_netdev_tests { use super::*; - use crate::snapshot::{KernelLinkInfo, KernelVxlanLinkInfo}; + use crate::snapshot::{KernelLinkInfo, KernelVrfLinkInfo, KernelVxlanLinkInfo}; fn link(name: &str, ifindex: u32, vlan_filtering: bool, altnames: Vec<&str>) -> KernelLinkInfo { KernelLinkInfo { @@ -5306,6 +5840,7 @@ mod managed_netdev_tests { ifindex, name: name.to_string(), altnames: altnames.into_iter().map(str::to_string).collect(), + up: true, vni: Some(vni), local_ip: Some("10.0.0.1".parse().unwrap()), dstport: Some(4789), @@ -5313,6 +5848,18 @@ mod managed_netdev_tests { collect_metadata: false, vnifilter: false, bridge: Some("br100".to_string()), + master: Some("br100".to_string()), + mac: None, + } + } + + fn vrf_link(name: &str, ifindex: u32, altnames: Vec<&str>, table_id: u32) -> KernelVrfLinkInfo { + KernelVrfLinkInfo { + ifindex, + name: name.to_string(), + altnames: altnames.into_iter().map(str::to_string).collect(), + up: true, + table_id: Some(table_id), } } @@ -5332,6 +5879,28 @@ mod managed_netdev_tests { ) } + fn vrf_l3vxlan_table() -> ManagedNetdevTable { + ManagedNetdevTable::from_all_maps( + "leaf-1".to_string(), + BTreeMap::new(), + BTreeMap::new(), + BTreeMap::from([( + "vrf100".to_string(), + rustbgpd_evpn::ManagedVrfNetdevSpec { table_id: 5000 }, + )]), + BTreeMap::from([( + "l3vxlan100".to_string(), + rustbgpd_evpn::ManagedL3VxlanNetdevSpec { + vni: 5000, + local_ip: "10.0.0.1".parse().unwrap(), + dstport: 4789, + vrf: "vrf100".to_string(), + router_mac: MacAddress::new([0x02, 0, 0, 0, 0, 1]), + }, + )]), + ) + } + #[test] fn managed_netdev_status_classifies_desired_rows() { let table = ManagedNetdevTable::from_bridge_map( @@ -5592,6 +6161,154 @@ mod managed_netdev_tests { assert_eq!(rows[2].state, ManagedNetdevState::OwnedUnsafe); } + #[test] + fn managed_netdev_status_classifies_desired_vrf_and_l3vxlan_rows() { + let table = vrf_l3vxlan_table(); + + let unknown = build_managed_netdev_status(&table, None); + assert_eq!(unknown.len(), 2); + assert!( + unknown + .iter() + .all(|row| row.state == ManagedNetdevState::Unknown) + ); + + let absent = build_managed_netdev_status(&table, Some(&KernelSnapshot::new())); + assert_eq!(absent.len(), 2); + assert!( + absent + .iter() + .all(|row| row.state == ManagedNetdevState::DesiredAbsent) + ); + + let mut collision = KernelSnapshot::new(); + collision.insert_link_name("vrf100"); + collision.insert_link_name("l3vxlan100"); + let foreign_name = build_managed_netdev_status(&table, Some(&collision)); + assert_eq!(foreign_name.len(), 2); + assert!( + foreign_name + .iter() + .all(|row| row.state == ManagedNetdevState::ForeignPresent) + ); + + let mut snapshot = KernelSnapshot::new(); + snapshot.insert_vrf(vrf_link( + "vrf100", + 30, + vec!["rustbgpd:vrf:leaf-1:vrf100"], + 5000, + )); + let mut l3vxlan = vxlan_link( + "l3vxlan100", + 40, + vec!["rustbgpd:l3vxlan:leaf-1:l3vxlan100"], + 5000, + ); + l3vxlan.bridge = None; + l3vxlan.master = Some("vrf100".to_string()); + l3vxlan.mac = Some(MacAddress::new([0x02, 0, 0, 0, 0, 1])); + snapshot.insert_vxlan(l3vxlan); + + let safe = build_managed_netdev_status(&table, Some(&snapshot)); + assert_eq!(safe.len(), 2); + let l3_row = safe + .iter() + .find(|row| row.class == ManagedNetdevClass::L3Vxlan) + .unwrap(); + assert_eq!(l3_row.state, ManagedNetdevState::OwnedSafe); + assert_eq!(l3_row.observed_vni, Some(5000)); + assert_eq!(l3_row.observed_master.as_deref(), Some("vrf100")); + assert_eq!( + l3_row.observed_router_mac, + Some(MacAddress::new([0x02, 0, 0, 0, 0, 1])) + ); + let vrf_row = safe + .iter() + .find(|row| row.class == ManagedNetdevClass::Vrf) + .unwrap(); + assert_eq!(vrf_row.state, ManagedNetdevState::OwnedSafe); + assert_eq!(vrf_row.observed_table_id, Some(5000)); + } + + #[test] + fn managed_netdev_status_classifies_vrf_and_l3vxlan_protected_attribute_drift() { + let table = vrf_l3vxlan_table(); + let mut snapshot = KernelSnapshot::new(); + snapshot.insert_vrf(vrf_link( + "vrf100", + 30, + vec!["rustbgpd:vrf:leaf-1:vrf100"], + 6000, + )); + let mut l3vxlan = vxlan_link( + "l3vxlan100", + 40, + vec!["rustbgpd:l3vxlan:leaf-1:l3vxlan100"], + 5000, + ); + l3vxlan.bridge = None; + l3vxlan.master = Some("wrong-vrf".to_string()); + l3vxlan.mac = Some(MacAddress::new([0x02, 0, 0, 0, 0, 1])); + snapshot.insert_vxlan(l3vxlan); + + let rows = build_managed_netdev_status(&table, Some(&snapshot)); + assert_eq!(rows.len(), 2); + let l3_row = rows + .iter() + .find(|row| row.class == ManagedNetdevClass::L3Vxlan) + .unwrap(); + assert_eq!(l3_row.state, ManagedNetdevState::OwnedUnsafe); + assert!(l3_row.reason.contains("VRF attachment mismatch")); + let vrf_row = rows + .iter() + .find(|row| row.class == ManagedNetdevClass::Vrf) + .unwrap(); + assert_eq!(vrf_row.state, ManagedNetdevState::OwnedUnsafe); + assert!(vrf_row.reason.contains("table_id mismatch")); + } + + #[test] + fn managed_netdev_status_reports_orphaned_vrf_and_l3vxlan_without_lifecycle_ops() { + let table = ManagedNetdevTable::from_all_maps( + "leaf-1".to_string(), + BTreeMap::new(), + BTreeMap::new(), + BTreeMap::new(), + BTreeMap::new(), + ); + let mut snapshot = KernelSnapshot::new(); + snapshot.insert_vrf(vrf_link( + "vrf200", + 30, + vec!["rustbgpd:vrf:leaf-1:vrf200"], + 5000, + )); + let mut l3vxlan = vxlan_link( + "l3vxlan200", + 40, + vec!["rustbgpd:l3vxlan:leaf-1:l3vxlan200"], + 5000, + ); + l3vxlan.bridge = None; + l3vxlan.master = Some("vrf200".to_string()); + l3vxlan.mac = Some(MacAddress::new([0x02, 0, 0, 0, 0, 2])); + snapshot.insert_vxlan(l3vxlan); + + let rows = build_managed_netdev_status(&table, Some(&snapshot)); + assert_eq!(rows.len(), 2); + assert!(rows.iter().any(|row| { + row.class == ManagedNetdevClass::L3Vxlan && row.state == ManagedNetdevState::Orphaned + })); + assert!(rows.iter().any(|row| { + row.class == ManagedNetdevClass::Vrf && row.state == ManagedNetdevState::Orphaned + })); + assert!( + compute_managed_netdev_ops(&table, &snapshot).is_empty(), + "LAN-94 surfaces VRF/L3VXLAN status only; lifecycle ops remain LAN-95" + ); + } + #[test] fn managed_netdev_ops_create_bridge_and_vxlan_only_when_absent() { let table = ManagedNetdevTable::from_maps( diff --git a/crates/evpn-linux/src/snapshot.rs b/crates/evpn-linux/src/snapshot.rs index 646a121e..efe2e99e 100644 --- a/crates/evpn-linux/src/snapshot.rs +++ b/crates/evpn-linux/src/snapshot.rs @@ -226,6 +226,8 @@ pub struct KernelVxlanLinkInfo { pub name: String, /// Linux alternative interface names observed on the VXLAN. pub altnames: Vec, + /// Administrative link-up state. + pub up: bool, /// Fixed VNI when reported. `None` for malformed or collect-metadata /// devices that do not carry one fixed VNI. pub vni: Option, @@ -243,6 +245,27 @@ pub struct KernelVxlanLinkInfo { /// Observed bridge master by name, if the master is a bridge present in /// the same link snapshot. pub bridge: Option, + /// Observed master device by name, if the master is present in the same + /// link snapshot. L3VXLAN managed status uses this to verify VRF + /// attachment without overloading the bridge-specific field. + pub master: Option, + /// Link-layer address reported on the VXLAN device. + pub mac: Option, +} + +/// One VRF link observed by name in the kernel link dump. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct KernelVrfLinkInfo { + /// Kernel ifindex of the VRF link. + pub ifindex: u32, + /// Linux link name. + pub name: String, + /// Linux alternative interface names observed on the VRF. + pub altnames: Vec, + /// Administrative link-up state. + pub up: bool, + /// VRF route table id from `IFLA_VRF_TABLE`, when reported. + pub table_id: Option, } /// Properties of a collect-metadata / Single VXLAN Device (SVD) port. @@ -455,6 +478,9 @@ pub struct KernelSnapshot { /// lifecycle to classify desired VXLAN rows and rustbgpd-stamped VXLAN /// orphans independently of bridge readiness. pub vxlans: BTreeMap, + /// VRF links by name. Used by ADR-0091 managed-netdev status substrate + /// to classify desired VRF rows before lifecycle create/delete lands. + pub vrfs: BTreeMap, /// Non-VXLAN bridge ports by link name, with observed /// `IFLA_BRPORT_STATE`. Consumed by the single-active AC-gate /// resolver (`crate::ac_gate`) to map an ADR-0085 `interface` @@ -558,6 +584,22 @@ impl KernelSnapshot { self.vxlans.insert(info.name.clone(), info); } + /// Replace the VRF inventory while preserving names of non-VRF links + /// already recorded via [`Self::set_link_names`]. + pub fn set_vrfs(&mut self, vrfs: BTreeMap) { + for name in self.vrfs.keys() { + self.link_names.remove(name); + } + self.link_names.extend(vrfs.keys().cloned()); + self.vrfs = vrfs; + } + + /// Add a single VRF link to the inventory. + pub fn insert_vrf(&mut self, info: KernelVrfLinkInfo) { + self.link_names.insert(info.name.clone()); + self.vrfs.insert(info.name.clone(), info); + } + /// Remove a bridge from the link inventory, returning the previous /// value if any. pub fn remove_link(&mut self, name: &str) -> Option { @@ -974,6 +1016,7 @@ mod tests { ifindex: 20, name: "vxlan100".to_string(), altnames: Vec::new(), + up: true, vni: Some(100), local_ip: Some(ip("10.0.0.1")), dstport: Some(4789), @@ -981,6 +1024,8 @@ mod tests { collect_metadata: false, vnifilter: false, bridge: Some("br100".to_string()), + master: Some("br100".to_string()), + mac: None, }, )])); snap.set_links(BTreeMap::from([( diff --git a/crates/evpn-linux/tests/reconcile_actor.rs b/crates/evpn-linux/tests/reconcile_actor.rs index 7a80baca..50792353 100644 --- a/crates/evpn-linux/tests/reconcile_actor.rs +++ b/crates/evpn-linux/tests/reconcile_actor.rs @@ -235,6 +235,7 @@ fn managed_vxlan_link( ifindex, name: name.to_string(), altnames: altnames.into_iter().map(str::to_string).collect(), + up: true, vni: Some(vni), local_ip: Some(ipa("10.0.0.1")), dstport: Some(4789), @@ -242,6 +243,8 @@ fn managed_vxlan_link( collect_metadata: false, vnifilter: false, bridge: Some("br100".to_string()), + master: Some("br100".to_string()), + mac: None, } } diff --git a/crates/evpn/src/lib.rs b/crates/evpn/src/lib.rs index fbeedd4d..12db1a4e 100644 --- a/crates/evpn/src/lib.rs +++ b/crates/evpn/src/lib.rs @@ -143,9 +143,11 @@ pub use mac::{ }; pub use managed_netdev::{ MANAGED_NETDEV_STAMP_PREFIX, MAX_ALT_IFNAME_LEN, MAX_IFNAME_LEN, MAX_OWNER_TOKEN_LEN, - ManagedBridgeNetdev, ManagedNetdevClass, ManagedNetdevStamp, ManagedNetdevState, - ManagedNetdevStatus, ManagedNetdevTable, ManagedVxlanNetdev, ManagedVxlanNetdevSpec, - bridge_ownership_stamp, parse_ownership_stamp, vxlan_ownership_stamp, + ManagedBridgeNetdev, ManagedL3VxlanNetdev, ManagedL3VxlanNetdevSpec, ManagedNetdevClass, + ManagedNetdevStamp, ManagedNetdevState, ManagedNetdevStatus, ManagedNetdevTable, + ManagedVrfNetdev, ManagedVrfNetdevSpec, ManagedVxlanNetdev, ManagedVxlanNetdevSpec, + bridge_ownership_stamp, l3vxlan_ownership_stamp, parse_ownership_stamp, vrf_ownership_stamp, + vxlan_ownership_stamp, }; pub use mass_withdraw::{AsPathFingerprint, AsPathTracker, MassWithdrawTrigger}; pub use origination::{LocalMacOriginator, OriginationAction, RemoteMacView}; diff --git a/crates/evpn/src/managed_netdev.rs b/crates/evpn/src/managed_netdev.rs index 670d3520..9cf11a52 100644 --- a/crates/evpn/src/managed_netdev.rs +++ b/crates/evpn/src/managed_netdev.rs @@ -7,6 +7,8 @@ use std::collections::BTreeMap; use std::net::IpAddr; +use crate::MacAddress; + /// Prefix used in rustbgpd-managed altname ownership stamps. pub const MANAGED_NETDEV_STAMP_PREFIX: &str = "rustbgpd"; /// Maximum Linux altname byte length (`ALTIFNAMSIZ - 1`). @@ -16,15 +18,19 @@ pub const MAX_IFNAME_LEN: usize = 15; /// Maximum configured owner token length for ADR-0091 v1. pub const MAX_OWNER_TOKEN_LEN: usize = 63; -/// Managed netdev class. ADR-0091 v1 shipped bridge rows first; fixed-VNI -/// VXLAN rows are the next class. SVD / VRF creation remains explicitly -/// deferred. +/// Managed netdev class. ADR-0091 ships lifecycle support class by class: +/// bridge and fixed-VNI VXLAN rows are live, while VRF and L3VXLAN rows first +/// enter as schema/status substrate before their create/delete executor lands. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum ManagedNetdevClass { /// Linux bridge device. Bridge, /// Traditional Linux VXLAN device with one fixed VNI. Vxlan, + /// Linux VRF master device. + Vrf, + /// Linux L3 VXLAN device enslaved to a VRF. + L3Vxlan, } impl ManagedNetdevClass { @@ -34,6 +40,8 @@ impl ManagedNetdevClass { match self { Self::Bridge => "bridge", Self::Vxlan => "vxlan", + Self::Vrf => "vrf", + Self::L3Vxlan => "l3vxlan", } } } @@ -74,12 +82,59 @@ pub struct ManagedVxlanNetdev { pub ownership_stamp: String, } +/// Desired protected attributes for one managed Linux VRF. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ManagedVrfNetdevSpec { + /// Linux route-table id carried by `IFLA_VRF_TABLE`. + pub table_id: u32, +} + +/// One configured VRF row in the managed-netdev table. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ManagedVrfNetdev { + /// Linux VRF interface name. + pub name: String, + /// Desired protected attributes. + pub spec: ManagedVrfNetdevSpec, + /// Derived durable ownership stamp. + pub ownership_stamp: String, +} + +/// Desired protected attributes for one managed L3 VXLAN. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ManagedL3VxlanNetdevSpec { + /// VXLAN VNI (`1..=16_777_215`). + pub vni: u32, + /// Local source IP for encapsulated packets. + pub local_ip: IpAddr, + /// UDP destination port. + pub dstport: u16, + /// VRF this L3 VXLAN must be enslaved to before it can satisfy IP-VRF + /// readiness. + pub vrf: String, + /// Router MAC the L3 VXLAN link must carry. + pub router_mac: MacAddress, +} + +/// One configured L3 VXLAN row in the managed-netdev table. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ManagedL3VxlanNetdev { + /// Linux L3 VXLAN interface name. + pub name: String, + /// Desired protected attributes. + pub spec: ManagedL3VxlanNetdevSpec, + /// Derived durable ownership stamp. + pub ownership_stamp: String, +} + /// Resolved managed-netdev desired state. #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct ManagedNetdevTable { owner_token: Option, bridges: BTreeMap, vxlans: BTreeMap, + vrfs: BTreeMap, + l3vxlans: BTreeMap, } impl ManagedNetdevTable { @@ -101,6 +156,24 @@ impl ManagedNetdevTable { owner_token: String, bridges: BTreeMap, vxlans: BTreeMap, + ) -> Self { + Self::from_all_maps( + owner_token, + bridges, + vxlans, + BTreeMap::new(), + BTreeMap::new(), + ) + } + + /// Build a table from validated, already-unique managed-netdev rows. + #[must_use] + pub fn from_all_maps( + owner_token: String, + bridges: BTreeMap, + vxlans: BTreeMap, + vrfs: BTreeMap, + l3vxlans: BTreeMap, ) -> Self { let bridges = bridges .into_iter() @@ -130,10 +203,40 @@ impl ManagedNetdevTable { ) }) .collect(); + let vrfs = vrfs + .into_iter() + .map(|(name, spec)| { + let ownership_stamp = vrf_ownership_stamp(&owner_token, &name); + ( + name.clone(), + ManagedVrfNetdev { + name, + spec, + ownership_stamp, + }, + ) + }) + .collect(); + let l3vxlans = l3vxlans + .into_iter() + .map(|(name, spec)| { + let ownership_stamp = l3vxlan_ownership_stamp(&owner_token, &name); + ( + name.clone(), + ManagedL3VxlanNetdev { + name, + spec, + ownership_stamp, + }, + ) + }) + .collect(); Self { owner_token: Some(owner_token), bridges, vxlans, + vrfs, + l3vxlans, } } @@ -144,7 +247,11 @@ impl ManagedNetdevTable { /// the operator removes the last managed bridge row. #[must_use] pub fn is_empty(&self) -> bool { - self.owner_token.is_none() && self.bridges.is_empty() && self.vxlans.is_empty() + self.owner_token.is_none() + && self.bridges.is_empty() + && self.vxlans.is_empty() + && self.vrfs.is_empty() + && self.l3vxlans.is_empty() } /// Owner token when `[managed_netdevs]` is configured. @@ -174,6 +281,28 @@ impl ManagedNetdevTable { pub fn vxlan(&self, name: &str) -> Option<&ManagedVxlanNetdev> { self.vxlans.get(name) } + + /// Desired VRF rows in deterministic name order. + pub fn vrfs(&self) -> impl Iterator { + self.vrfs.values() + } + + /// Find one desired VRF by name. + #[must_use] + pub fn vrf(&self, name: &str) -> Option<&ManagedVrfNetdev> { + self.vrfs.get(name) + } + + /// Desired L3 VXLAN rows in deterministic name order. + pub fn l3vxlans(&self) -> impl Iterator { + self.l3vxlans.values() + } + + /// Find one desired L3 VXLAN by name. + #[must_use] + pub fn l3vxlan(&self, name: &str) -> Option<&ManagedL3VxlanNetdev> { + self.l3vxlans.get(name) + } } /// Derived bridge ownership stamp. @@ -194,6 +323,24 @@ pub fn vxlan_ownership_stamp(owner_token: &str, vxlan_name: &str) -> String { ) } +/// Derived VRF ownership stamp. +#[must_use] +pub fn vrf_ownership_stamp(owner_token: &str, vrf_name: &str) -> String { + format!( + "{MANAGED_NETDEV_STAMP_PREFIX}:{}:{owner_token}:{vrf_name}", + ManagedNetdevClass::Vrf.as_str() + ) +} + +/// Derived L3 VXLAN ownership stamp. +#[must_use] +pub fn l3vxlan_ownership_stamp(owner_token: &str, l3vxlan_name: &str) -> String { + format!( + "{MANAGED_NETDEV_STAMP_PREFIX}:{}:{owner_token}:{l3vxlan_name}", + ManagedNetdevClass::L3Vxlan.as_str() + ) +} + /// Parsed rustbgpd ownership stamp from a link altname. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ManagedNetdevStamp { @@ -218,6 +365,8 @@ pub fn parse_ownership_stamp(raw: &str) -> Option { let class = match parts.next()? { "bridge" => ManagedNetdevClass::Bridge, "vxlan" => ManagedNetdevClass::Vxlan, + "vrf" => ManagedNetdevClass::Vrf, + "l3vxlan" => ManagedNetdevClass::L3Vxlan, _ => return None, }; let owner_token = parts.next()?; @@ -300,6 +449,14 @@ pub struct ManagedNetdevStatus { pub observed_vnifilter: Option, /// Observed bridge master for VXLAN rows. pub observed_bridge: Option, + /// Observed VRF table id for VRF rows. + pub observed_table_id: Option, + /// Observed administrative link-up state for VRF and L3VXLAN rows. + pub observed_up: Option, + /// Observed master device for L3VXLAN rows. + pub observed_master: Option, + /// Observed Router MAC / link-layer address for L3VXLAN rows. + pub observed_router_mac: Option, /// Observed rustbgpd ownership stamps on the link. pub observed_stamps: Vec, } @@ -328,10 +485,26 @@ mod tests { assert_eq!(parsed.name, "vxlan100"); } + #[test] + fn vrf_and_l3vxlan_stamps_round_trip() { + let vrf = vrf_ownership_stamp("leaf-1", "vrf100"); + assert_eq!(vrf, "rustbgpd:vrf:leaf-1:vrf100"); + let parsed = parse_ownership_stamp(&vrf).unwrap(); + assert_eq!(parsed.class, ManagedNetdevClass::Vrf); + assert_eq!(parsed.owner_token, "leaf-1"); + assert_eq!(parsed.name, "vrf100"); + + let l3vxlan = l3vxlan_ownership_stamp("leaf-1", "l3vxlan100"); + assert_eq!(l3vxlan, "rustbgpd:l3vxlan:leaf-1:l3vxlan100"); + let parsed = parse_ownership_stamp(&l3vxlan).unwrap(); + assert_eq!(parsed.class, ManagedNetdevClass::L3Vxlan); + assert_eq!(parsed.owner_token, "leaf-1"); + assert_eq!(parsed.name, "l3vxlan100"); + } + #[test] fn parse_ownership_stamp_ignores_unrelated_or_malformed_altnames() { assert!(parse_ownership_stamp("operator:bridge:leaf-1:br100").is_none()); - assert!(parse_ownership_stamp("rustbgpd:vrf:leaf-1:vrf100").is_none()); assert!(parse_ownership_stamp("rustbgpd:bridge:leaf-1").is_none()); assert!(parse_ownership_stamp("rustbgpd:bridge:leaf-1:br100:extra").is_none()); } diff --git a/docs/API.md b/docs/API.md index 28a9a4d8..e237683c 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1628,7 +1628,7 @@ semantics used by both `ApplyEvpnRuntime` and SIGHUP reload. | `ListEvpnNexthops` | List Linux dataplane reconciler-owned ADR-0059 FDB nexthop groups (per-VNI groups with ESI / Ethernet Tag / kernel group ID, per-VTEP member nexthop IDs + gateways, MAC refs) plus top-level orphan-NH count, pending-delete count, and the `drift_recovery_disabled` latch — read-only operator visibility | | `ListEthernetSegments` | List configured Ethernet Segments sorted by ESI, joined with live multi-homing state: composed drain reasons, per-member DF role and BUM forwarding action, same-ESI local-bias eligibility, whole-port AC-gate state/interface, and matching FDB-NHG group / MAC-ref counts — read-only ADR-0083/0085 diagnose visibility | | `ListIpVrfs` | List configured IP-VRFs / L3VNI tenants (name, l3vni, rd, resolved route_targets including any auto-derived RT, local_vtep_ip, router_mac, optional `evpn_instance` link, readiness state, originated_routes_count, installed_routes_count, remote_prefix_drop_counts) — Gate 9 / ADR-0058 | -| `ListManagedNetdevs` | List configured ADR-0091 managed EVPN bridge and fixed-VNI VXLAN rows joined with the latest Linux link snapshot, plus rustbgpd-stamped orphan/unsafe rows for the configured owner. Reports class, name, desired flag, ownership stamp, state (`desired-absent`, `owned-safe`, `foreign-present`, `owned-unsafe`, `orphaned`, or `unknown`), observed ifindex, bridge `vlan_filtering`, VXLAN `vni` / `local` / `dstport` / `learning` / `collect-metadata` / `vnifilter` / bridge-master attributes, observed rustbgpd ownership stamps, and reason text. Bridge and fixed-VNI VXLAN lifecycle execution is active in the dataplane actor. This RPC remains read-only status. | +| `ListManagedNetdevs` | List configured ADR-0091 managed EVPN bridge, fixed-VNI VXLAN, VRF, and L3VXLAN rows joined with the latest Linux link snapshot, plus rustbgpd-stamped orphan/unsafe rows for the configured owner. Reports class, name, desired flag, ownership stamp, state (`desired-absent`, `owned-safe`, `foreign-present`, `owned-unsafe`, `orphaned`, or `unknown`), observed ifindex, bridge `vlan_filtering`, VXLAN/L3VXLAN `vni` / `local` / `dstport` / `learning` / `collect-metadata` / `vnifilter` / master attributes, VRF `table_id`, L3VXLAN `router_mac`, observed rustbgpd ownership stamps, and reason text. Bridge and fixed-VNI VXLAN lifecycle execution is active in the dataplane actor; VRF/L3VXLAN rows are schema/status-only in this tranche. This RPC remains read-only status. | | `GetIpVrf` | Detail view of a single IP-VRF including the seven readiness predicates (`not_ready_reasons`) when `readiness_state != Ready` and scoped remote Type 5 projection-drop counts | | `ClearDuplicateMacQuarantine` | Clear one RFC 7432 §15.1 duplicate-MAC local-origin quarantine by `(vni, mac)`. Returns `cleared=false` when no active quarantine exists; read-only listeners reject it. | | `ApplyEvpnRuntime` | Validate or apply a full candidate EVPN runtime model through the ADR-0063 coordinator. `validate_only=true` returns the plan without mutation; no-op applies succeed; a single L2VNI add, single L2VNI delete that is not an Ethernet Segment member, single L2VNI redefine with unchanged `ip_vrf` link metadata, single IP-VRF add, single standalone IP-VRF delete with no L2VNI links, single IP-VRF redefine with unchanged L3VNI/device/table identity, single Ethernet Segment add/delete/redefine, additive build-up, or an atomic tenant teardown (a delete-only plan dropping an ES-member L2VNI together with its Ethernet Segment and/or a linked IP-VRF in one pass) converges live and commits a new generation. When a segment actor already exists, L2VNI add/delete also republishes the current instance table so later ES add/redefine can bind a VNI added at runtime; ES-member L2VNI redefine also rebuilds the segment actor's Type 1/4 routes from the candidate instance snapshot. An `ip_vrf` relink (an L2VNI re-homed to a different IP-VRF) also converges live as a dataplane-only republish. L3VNI/device/table IP-VRF identity changes are restart-required by design, and generic mixed add/delete/redefine edits still fail closed. | @@ -1813,17 +1813,21 @@ rbgp evpn vrfs vrf1 # single-VRF detail (matches GetIpVrf) ### List managed EVPN netdevs ADR-0091 managed-netdev lifecycle/status surface. Returns configured -`[managed_netdevs]` bridge and fixed-VNI VXLAN rows joined with the latest -Linux link snapshot, plus rustbgpd-stamped orphan links observed by the -dataplane actor. This is read-only status: a row can be `desired-absent`, +`[managed_netdevs]` bridge, fixed-VNI VXLAN, VRF, and L3VXLAN rows joined with +the latest Linux link snapshot, plus rustbgpd-stamped orphan links observed by +the dataplane actor. This is read-only status: a row can be `desired-absent`, `foreign-present`, `owned-unsafe`, `owned-safe`, `orphaned`, or `unknown`. Bridge and fixed-VNI VXLAN rows are active lifecycle intent inside the -dataplane reconciler. The RPC itself never mutates links. +dataplane reconciler. VRF and L3VXLAN rows are schema/status substrate only in +this tranche; their create/adopt/reap lifecycle is deferred. The RPC itself +never mutates links. Bridge rows expose observed `vlan_filtering`. Fixed-VNI VXLAN rows expose observed `vni`, `local`, `dstport`, `learning-disabled`, `collect-metadata`, `vnifilter`, and bridge-master fields when the Linux link -snapshot reports them. +snapshot reports them. VRF rows expose observed `table_id` and `up`. L3VXLAN +rows expose observed `vni`, `local`, `dstport`, `learning-disabled`, +`collect-metadata`, `vnifilter`, `vrf` master, `up`, and `router_mac`. ```bash grpcurl -plaintext -import-path . -proto proto/rustbgpd.proto \ diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index af30db79..f1f66d1c 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -1939,8 +1939,9 @@ default — RR-only deployments leave it empty. > the `bridge` / `local_vtep_ip` fields below must match. ADR-0091 is the > explicit opt-in exception for bridge creation/adoption/reap through > `[managed_netdevs]`; fixed-VNI VXLAN rows can also create/adopt/reap -> traditional one-VNI VXLAN devices. SVD / collect-metadata VXLAN and VRF -> creation remain deferred. +> traditional one-VNI VXLAN devices. Managed VRF / L3VXLAN rows are accepted +> for schema validation and ownership/status reporting, but VRF / L3VXLAN +> creation remains deferred. > ADR-0089 enables the first VLAN-aware bridge programming target through > a local bridge-VLAN / VNI binding while keeping EVPN Ethernet Tag ID at > `0`. @@ -1986,7 +1987,8 @@ duplicate_mac_detection = { action = "detect", window_seconds = 180, threshold = or is declared in `[managed_netdevs]` for ADR-0091 bridge lifecycle ownership. ADR-0091 bridge and fixed-VNI `[[managed_netdevs.vxlans]]` lifecycle now ship (create/adopt/reap); SVD / collect-metadata VXLAN and - VRF netdev creation remain operator-provisioned. + VRF / L3VXLAN lifecycle remain operator-provisioned, though managed VRF / + L3VXLAN rows are valid for status and protected-attribute diagnostics. - `bridge_vlan` (when set) must be in `1..=4094` and requires `bridge`. At runtime it selects the ADR-0089 VLAN-aware path: the observed bridge must have `vlan_filtering=1`, the configured VLAN @@ -2341,14 +2343,15 @@ for the design rationale. ## `[managed_netdevs]` ADR-0091 managed EVPN netdevs are opt-in and class-scoped. The current -surface accepts bridge rows and fixed-VNI VXLAN rows, derives durable Linux -altname ownership stamps, and reports status through -`EvpnService.ListManagedNetdevs` / `rbgp evpn managed-netdevs`. Bridge rows -are active lifecycle intent: the dataplane actor creates missing bridges and -fixed-VNI VXLANs, stamps them with the derived altname, treats exact stamped -links as crash-restart adoption, and reaps exact same-owner orphans when the -config keeps the owner token but removes the row. SVD / collect-metadata -VXLAN, managed VRF, and managed L3VXLAN creation remain deferred. +surface accepts bridge rows, fixed-VNI VXLAN rows, VRF rows, and L3VXLAN rows, +derives durable Linux altname ownership stamps, and reports status through +`EvpnService.ListManagedNetdevs` / `rbgp evpn managed-netdevs`. Bridge and +fixed-VNI VXLAN rows are active lifecycle intent: the dataplane actor creates +missing links, stamps them with the derived altname, treats exact stamped links +as crash-restart adoption, and reaps exact same-owner orphans when the config +keeps the owner token but removes the row. VRF and L3VXLAN rows are +schema/status substrate in this tranche; their create/adopt/reap lifecycle +remains deferred. Any `[managed_netdevs]` add/remove/change is restart-required in this tranche for SIGHUP, config transactions, gNMI Set, and @@ -2369,6 +2372,19 @@ local = "10.0.0.1" # VXLAN local source IP dstport = 4789 # optional; defaults to IANA VXLAN port bridge = "br100" # desired bridge master learning = false # optional default; true is rejected + +[[managed_netdevs.vrfs]] +name = "vrf100" # Linux ifname, <= 15 bytes +table_id = 100 # Linux VRF table id, non-zero + +[[managed_netdevs.l3vxlans]] +name = "l3vxlan100" # Linux ifname, <= 15 bytes +vni = 100 # L3VNI, 1..=16_777_215 +local = "10.0.0.1" # VXLAN local source IP +dstport = 4789 # optional; defaults to IANA VXLAN port +vrf = "vrf100" # desired VRF master +router_mac = "02:00:00:00:00:01" # non-zero unicast Router MAC +learning = false # optional default; true is rejected ``` The VXLAN `bridge` field names the desired bridge master. It may reference a @@ -2385,43 +2401,50 @@ The derived ownership stamps are: ```text rustbgpd:bridge:: rustbgpd:vxlan:: +rustbgpd:vrf:: +rustbgpd:l3vxlan:: ``` Validation rejects managed rows without `owner_token`, duplicate managed -netdev names across bridge and VXLAN rows, invalid Linux-style link names -(`.`, `..`, spaces, or names over 15 bytes), invalid owner tokens, and -derived stamps longer than Linux's 127-byte altname limit. VXLAN validation -also rejects invalid VNIs (outside `1..=16_777_215`), a duplicate `vni` -shared by two VXLAN rows, `dstport = 0`, and `learning = true`; SVD / -collect-metadata VXLANs, managed VRFs, and managed L3VXLAN rows remain -unsupported. - -rustbgpd preserves foreign links. A same-name bridge or VXLAN without the -exact ownership stamp is reported `foreign-present` and is not modified. A -link with the expected stamp plus any other rustbgpd stamp, a wrong owner -stamp, a stamp/name mismatch, or protected-attribute drift such as unexpected -bridge `vlan_filtering` or VXLAN `vni`, `local`, `dstport`, `learning`, -`collect-metadata`, `vnifilter`, or `bridge` attachment is reported -`owned-unsafe` and is not repaired or deleted by v1. The bounded Prometheus gauge +netdev names across bridge, VXLAN, VRF, and L3VXLAN rows, invalid Linux-style +link names (`.`, `..`, spaces, or names over 15 bytes), invalid owner tokens, +and derived stamps longer than Linux's 127-byte altname limit. VXLAN +validation also rejects invalid VNIs (outside `1..=16_777_215`), a duplicate +`vni` shared by two VXLAN rows, `dstport = 0`, and `learning = true`. VRF +validation rejects `table_id = 0` and duplicate managed VRF table ids. L3VXLAN +validation rejects invalid VNIs, duplicate managed L3VXLAN VNIs, `dstport = 0`, +`learning = true`, and a missing, multicast, or all-zero `router_mac`. + +rustbgpd preserves foreign links. A same-name bridge, VXLAN, VRF, or L3VXLAN +without the exact ownership stamp is reported `foreign-present` and is not +modified. A link with the expected stamp plus any other rustbgpd stamp, a wrong +owner stamp, a stamp/name mismatch, or protected-attribute drift is reported +`owned-unsafe` and is not repaired or deleted by v1. Protected attributes are: +bridge `vlan_filtering`; VXLAN `vni`, `local`, `dstport`, `learning`, +`collect-metadata`, `vnifilter`, and `bridge` attachment; VRF `table_id`; and +L3VXLAN `vni`, `local`, `dstport`, `learning`, `collect-metadata`, +`vnifilter`, `vrf` master, and `router_mac`. The bounded Prometheus gauge `evpn_managed_netdev_state{class,name,desired,state}` mirrors the latest reported state for alerting; detailed reason text is available through `ListManagedNetdevs` / `rbgp evpn managed-netdevs`. -Reaping is equally conservative. When the owner token stays but a VXLAN row is -removed, only an exact same-owner stamped plain orphan is reaped. A -de-configured rustbgpd-stamped VXLAN that has drifted into a collect-metadata -or vnifilter mode — modes the fixed-VNI lifecycle never creates — is preserved -(`owned-unsafe`), not reaped. +Reaping is equally conservative. When the owner token stays but a bridge or +fixed-VNI VXLAN row is removed, only an exact same-owner stamped plain orphan +is reaped. A de-configured rustbgpd-stamped VXLAN that has drifted into a +collect-metadata or vnifilter mode — modes the fixed-VNI lifecycle never +creates — is preserved (`owned-unsafe`), not reaped. VRF and L3VXLAN orphan +rows are reported for status only in this tranche and are not reaped until +their lifecycle proof lands. Status states: | State | Meaning | |-------|---------| -| `desired-absent` | Configured bridge or VXLAN is not present in the kernel snapshot | +| `desired-absent` | Configured bridge, VXLAN, VRF, or L3VXLAN is not present in the kernel snapshot | | `foreign-present` | Same-name link exists without the expected rustbgpd ownership stamp | | `owned-unsafe` | Link carries a rustbgpd stamp that is not the expected one, or a protected attribute does not match config | | `owned-safe` | Expected stamp and protected attributes match | -| `orphaned` | A rustbgpd-stamped bridge or VXLAN exists with no desired config row | +| `orphaned` | A rustbgpd-stamped link exists with no desired config row | | `unknown` | No dataplane status snapshot has been published yet, or the link dump failed | --- diff --git a/docs/adr/0054-evpn-linux-dataplane-boundary.md b/docs/adr/0054-evpn-linux-dataplane-boundary.md index aab43409..4fcae6e3 100644 --- a/docs/adr/0054-evpn-linux-dataplane-boundary.md +++ b/docs/adr/0054-evpn-linux-dataplane-boundary.md @@ -162,9 +162,10 @@ expects the operator or host-networking layer to create them. For an `EvpnInstance` with `bridge = "br100"`, the initial Gate 7b dataplane crate verifies: -ADR-0091 later adds an explicit opt-in exception for Linux bridge -create/adopt/reap through `[managed_netdevs]`. VXLAN and VRF/L3VXLAN -netdev creation remain outside this ADR's default boundary. +ADR-0091 later adds an explicit opt-in exception for Linux bridge and +fixed-VNI VXLAN create/adopt/reap through `[managed_netdevs]`, plus +VRF/L3VXLAN schema/status substrate. SVD / collect-metadata VXLAN lifecycle +and VRF/L3VXLAN lifecycle remain outside this ADR's default boundary. 1. the bridge exists; 2. exactly one VXLAN port for the instance VNI is attached to that diff --git a/docs/adr/0088-evpn-vlan-aware-bridge-managed-netdev-boundary.md b/docs/adr/0088-evpn-vlan-aware-bridge-managed-netdev-boundary.md index 311ee8fc..6a1cc623 100644 --- a/docs/adr/0088-evpn-vlan-aware-bridge-managed-netdev-boundary.md +++ b/docs/adr/0088-evpn-vlan-aware-bridge-managed-netdev-boundary.md @@ -79,9 +79,10 @@ lifecycle by default. The daemon may reconcile owned FDB entries, FDB nexthop groups, neighbors, and L3 FIB routes on top of existing devices, but it must not create or delete bridge, VXLAN, VRF, bond, VLAN, or lower-link netdevs unless an opt-in ownership mode explicitly says so. -ADR-0091 is the first such mode for Linux bridge create/adopt/reap; VXLAN, -VRF/L3VXLAN, bond, VLAN, and lower-link creation remain outside the -default boundary. +ADR-0091 is the first such mode for Linux bridge and fixed-VNI VXLAN +create/adopt/reap, and also adds VRF/L3VXLAN schema/status substrate; +SVD / collect-metadata VXLAN lifecycle, VRF/L3VXLAN lifecycle, bond, VLAN, and +lower-link creation remain outside the default boundary. The current L2VNI readiness rule also remains in force: a configured `[[evpn_instances]].bridge` with `vlan_filtering=1` is `NotReady`, not a diff --git a/docs/adr/0091-evpn-managed-netdev-creation.md b/docs/adr/0091-evpn-managed-netdev-creation.md index bd92039e..ff755275 100644 --- a/docs/adr/0091-evpn-managed-netdev-creation.md +++ b/docs/adr/0091-evpn-managed-netdev-creation.md @@ -57,8 +57,10 @@ order: 4. optional VLAN upper / bridge membership helpers after the base classes. The first implementation slice was bridge create/adopt/reap. The second -slice adds fixed-VNI VXLAN create/adopt/reap. SVD / collect-metadata VXLAN -and VRF/L3VXLAN are separate slices with their own proofs. +slice adds fixed-VNI VXLAN create/adopt/reap. The VRF/L3VXLAN schema/status +substrate is present, but VRF/L3VXLAN create/adopt/reap lifecycle remains a +separate slice with its own proof. SVD / collect-metadata VXLAN lifecycle is +also a separate proof gate. ### 2. `IFLA_ALT_IFNAME` is the durable ownership marker @@ -77,16 +79,17 @@ the logical fields are fixed: block's stable identity; - `owner-token`: operator-configured daemon / installation token. -As shipped, the bridge and fixed-VNI VXLAN classes encode this as a -colon-delimited altname: +As shipped, bridge, fixed-VNI VXLAN, VRF, and L3VXLAN classes encode this as +a colon-delimited altname: ```text rustbgpd:bridge:: rustbgpd:vxlan:: +rustbgpd:vrf:: +rustbgpd:l3vxlan:: ``` -where the configured link name serves as the class's -`stable-config-id`. +where the configured link name serves as the class's `stable-config-id`. The owner token is not a secret. It does not defend against privileged local root or an operator deliberately spoofing the marker. Its job is accidental @@ -178,11 +181,22 @@ bridge = "br_default" [[managed_netdevs.vrfs]] name = "vrf-blue" -table = 1001 +table_id = 1001 + +[[managed_netdevs.l3vxlans]] +name = "l3vxlan1001" +vni = 1001 +local = "10.0.0.1" +dstport = 4789 +vrf = "vrf-blue" +router_mac = "02:00:00:00:00:01" +learning = false ``` -The final schema may split fixed-VNI VXLAN, SVD/collect-metadata VXLAN, and -L3VXLAN into more specific blocks. The ownership model stays the same. +The fixed-VNI VXLAN and L3VXLAN blocks are deliberately separate: fixed-VNI +VXLAN rows create L2 bridge members, while L3VXLAN rows are the per-VRF VTEP +device used by IRB. SVD / collect-metadata VXLAN lifecycle remains a more +specific future class. The ownership model stays the same. Runtime mutation, SIGHUP reload, gNMI `Set`, and `ApplyEvpnRuntime` must remain fail-closed for managed-netdev fields until the corresponding class @@ -303,7 +317,8 @@ foreign-vs-owned signal. 4. Add VXLAN class support. **Done for fixed-VNI schema/status and create/adopt/reap lifecycle.** SVD / collect-metadata VXLAN remains deferred. -5. Add VRF / L3VXLAN class support. +5. Add VRF / L3VXLAN class support. **Done for schema/status and ownership + stamps; lifecycle create/adopt/reap remains deferred.** 6. Add optional VLAN upper / bridge membership helpers if operator demand remains after bridge/VXLAN/VRF creation. diff --git a/docs/evpn-enablement.md b/docs/evpn-enablement.md index 363c4cd7..079ac579 100644 --- a/docs/evpn-enablement.md +++ b/docs/evpn-enablement.md @@ -57,8 +57,8 @@ record, [gobgp-parity.md](gobgp-parity.md) for the cross-daemon comparison. ADR-0063 runtime convergence exceptions and lower-priority VTEP operability gaps such as bridge-ifindex MAC+IP VLAN correlation, true shared-VNI / non-zero Ethernet Tag service, and rustbgpd-managed SVD / collect-metadata - VXLAN and VRF/L3VXLAN netdev creation (managed bridge and fixed-VNI VXLAN - creation have shipped). + VXLAN plus VRF/L3VXLAN lifecycle creation (managed bridge and fixed-VNI + VXLAN lifecycle has shipped; VRF/L3VXLAN schema/status has shipped). ADR-0088 records the boundary for those operability gaps; ADR-0089's first VNI-per-broadcast-domain VLAN-aware bridge slice now validates an explicit `bridge_vlan` binding, programs VLAN-scoped remote-MAC FDB rows, attributes @@ -72,8 +72,9 @@ record, [gobgp-parity.md](gobgp-parity.md) for the cross-daemon comparison. bridge-ifindex ARP/ND on `vlan_filtering=1` bridges still fails closed because Linux does not report bridge VLAN identity there. Managed bridge and fixed-VNI VXLAN creation now ship under ADR-0091's altname-stamp ownership - model; the remaining managed classes (SVD / collect-metadata VXLAN, VRF / - L3VXLAN) stay fail-closed until their own ownership/lifecycle proofs land. + model; VRF/L3VXLAN rows now validate, derive ownership stamps, and report + status, while SVD / collect-metadata VXLAN lifecycle and VRF/L3VXLAN + lifecycle stay fail-closed until their own ownership/lifecycle proofs land. ## Current Position @@ -874,7 +875,7 @@ demand. | RFC 9136 ESI overlay-index Type 5 origination + single-active receive | Shipped (bounded v1) | RT-5 carries non-zero ESI, zero Gateway Address, L3VNI label, and configured virtual/transit Router MAC; receive-side recursion imports exactly one single-active EAD-per-EVI candidate scoped by linked L2VNI and Ethernet Tag | Pure origination/projection + daemon tests plus M71 GoBGP real-peer receive proof | | ADR-0090 all-active ESI overlay-index Type 5 receive | Shipped (bounded v1) | Extends the M71 shape to all-active ESI recursion with deterministic remote-VTEP target sets, route-level ECMP, per-VTEP L3 neighbors, and L3VXLAN FDB-NHG for the shared Router MAC. The model distinguishes single-active, all-active, and conflicting EAD redundancy signals; invalid one-member/mixed/family-conflict/Router-MAC-conflict shapes fail closed; valid all-active target sets install through the production L3 writer and reclaim crash-leftover L3 NHID/FDB-NHG state on restart | `l3_all_active_writer` same-host netns proof is CI-gated for both steady-state writer cleanup and abort/restart adoption; M72 real-peer proof is CI-gated with two GoBGP route-source PEs: unresolved before EAD, then VRF ECMP route + `nhid` Router-MAC FDB row, then deterministic target-set collapse/re-expand and withdraw cleanup | | VLAN-aware bridges | Demand-shaped Linux/VXLAN operability; ADR-0088 boundary accepted; ADR-0089 v1 VNI-per-broadcast-domain slice landed for traditional multi-VXLAN bridges and SVD / collect-metadata VXLAN: `bridge_vlan` schema/status, observed VLAN topology validation, `NDA_VLAN` remote-MAC FDB attribution for fixed-VNI devices, `NDA_SRC_VNI` attribution for SVD devices, AF_BRIDGE local-MAC VLAN attribution, VLAN-upper AF_INET / AF_INET6 MAC+IP attribution, M70 FRR interop proving same-MAC two-VNI isolation on a traditional rustbgpd-owned `vlan_filtering=1` bridge, `dataplane_vlan_fdb` + `macip_vlan_attribution` proving real-kernel VLAN-scoped FDB and MAC+IP isolation, and `svd_fdb_vni` proving SVD Ready + add + same-MAC two-VNI isolation + scoped delete on a real kernel. Ethernet Tag ID stays `0`; unattributable VLAN observations fail closed as normal "not ours" classifier outcomes, downstream observation backpressure is metered, and startup link-cache/probe priming bounds the boot window | Raw bridge-ifindex ARP/ND on `vlan_filtering=1` bridges remains fail-closed unless a future FDB-correlation design proves freshness and ambiguity handling; true VLAN-aware bundle / non-zero Ethernet Tag needs a separate ADR; managed netdev creation stays a separate ergonomics track | Unit tests, hosted/gated local kernel netns tests including `dataplane_vlan_fdb`, `macip_vlan_attribution`, and `svd_fdb_vni`, and hosted M70 FRR containerlab receipt | -| rustbgpd-managed bridge / VXLAN / VRF netdev creation | Demand-shaped operator ergonomics; boundary accepted in ADR-0088 and ADR-0091; bridge and fixed-VNI VXLAN lifecycle landed (`[managed_netdevs]`, derived altname stamps, `EvpnService.ListManagedNetdevs`, `rbgp evpn managed-netdevs`, create → stamp → fresh-dump confirm, crash-restart adoption, safe same-owner reap, foreign/unsafe preservation). Fixed-VNI VXLAN rows create traditional one-VNI VXLAN devices on the desired bridge, fail closed on SVD/collect-metadata, `vnifilter`, learning, bridge-attachment, or protected-attribute drift, and the `managed_ready` proof shows the rustbgpd-created bridge + VXLAN topology makes the real EVPN L2 probe Ready | Add VRF/L3VXLAN classes. SVD/collect-metadata VXLAN creation and VLAN-upper creation remain separate class-specific gates | Unit + actor tests and hosted/gated local kernel netns tests `managed_bridge`, `managed_vxlan`, and `managed_ready`; fixed-VNI VXLAN status/lifecycle is covered by config/API/CLI/link-parser/reconcile unit tests | +| rustbgpd-managed bridge / VXLAN / VRF netdev creation | Demand-shaped operator ergonomics; boundary accepted in ADR-0088 and ADR-0091; bridge and fixed-VNI VXLAN lifecycle landed (`[managed_netdevs]`, derived altname stamps, `EvpnService.ListManagedNetdevs`, `rbgp evpn managed-netdevs`, create -> stamp -> fresh-dump confirm, crash-restart adoption, safe same-owner reap, foreign/unsafe preservation). Fixed-VNI VXLAN rows create traditional one-VNI VXLAN devices on the desired bridge, fail closed on SVD/collect-metadata, `vnifilter`, learning, bridge-attachment, or protected-attribute drift, and the `managed_ready` proof shows the rustbgpd-created bridge + VXLAN topology makes the real EVPN L2 probe Ready. VRF/L3VXLAN schema/status substrate has landed: desired rows validate, derive `vrf` / `l3vxlan` ownership stamps, parse observed protected attributes, and report desired/observed/orphan/foreign/unsafe state through the same surfaces | Add VRF/L3VXLAN lifecycle create/adopt/reap proof. SVD/collect-metadata VXLAN creation and VLAN-upper creation remain separate class-specific gates | Unit + actor tests and hosted/gated local kernel netns tests `managed_bridge`, `managed_vxlan`, and `managed_ready`; fixed-VNI VXLAN status/lifecycle plus VRF/L3VXLAN status substrate are covered by config/API/CLI/link-parser/reconcile unit tests | | BGP Add-Path for L2VPN EVPN | Demand-shaped control-plane breadth | Negotiate RFC 7911 Add-Path for AFI 25 / SAFI 70 only after EVPN Adj-RIB-In/Out, API, event history, and export paths are path-id-safe | Unit matrix plus FRR/GoBGP interop if a peer supports the shape | | RFC 9251 Route Types 6/7/8 | Out-of-current-lane / service-provider multicast | Typed route-family slice for SMET and IGMP/MLD sync routes; no Linux dataplane change until multicast ownership is designed | Standards codec tests plus real-peer reflect/withdraw interop | | RFC 9572 Route Types 9/10/11 | Out-of-current-lane / service-provider BUM segmentation | Typed route-family slice for PMSI/leaf A-D routes | Standards codec tests plus real-peer interop | diff --git a/docs/evpn-vtep-setup.md b/docs/evpn-vtep-setup.md index 7dd64292..1df7c4b5 100644 --- a/docs/evpn-vtep-setup.md +++ b/docs/evpn-vtep-setup.md @@ -4,7 +4,9 @@ rustbgpd is **observe-only by default** for kernel netdev topology. It programs and reconciles FDB / L3 FIB state on top of interfaces you provide. ADR-0091 adds an explicit opt-in exception for Linux bridge and fixed-VNI VXLAN create/adopt/reap through `[managed_netdevs]`. SVD / -collect-metadata VXLAN and VRF netdev creation remain operator-provisioned. +collect-metadata VXLAN and VRF/L3VXLAN lifecycle creation remain +operator-provisioned, though managed VRF/L3VXLAN rows can now validate and +report ownership/status. You normally provision topology with your host's network layer (`ip link`, ifupdown2, systemd-networkd, NetworkManager, SONiC, a CNI, ansible, …); rustbgpd probes it each diff --git a/docs/grpc-method-inventory.md b/docs/grpc-method-inventory.md index a8bece56..c5a35a44 100644 --- a/docs/grpc-method-inventory.md +++ b/docs/grpc-method-inventory.md @@ -188,7 +188,7 @@ shape itself does not raise the tier. | `ListEvpnNexthops` | `sensitive_read` | ADR-0059 FDB nexthop groups — exposes multi-homing topology, ES layout, drift-recovery status. | | `ListEthernetSegments` | `sensitive_read` | ADR-0083/0085 Ethernet Segment diagnose state — exposes configured ES membership, composed drain reasons, DF/BUM role rows, AC-gate state, same-ESI local-bias eligibility, and FDB-NHG refs. | | `ListIpVrfs` | `sensitive_read` | Gate 9 IP-VRF table. | -| `ListManagedNetdevs` | `sensitive_read` | ADR-0091 managed EVPN netdev status — exposes desired bridge and fixed-VNI VXLAN names, ownership stamps, observed bridge/VXLAN protected attributes, and orphan/foreign/unsafe state. | +| `ListManagedNetdevs` | `sensitive_read` | ADR-0091 managed EVPN netdev status — exposes desired bridge, fixed-VNI VXLAN, VRF, and L3VXLAN names, ownership stamps, observed protected attributes, and orphan/foreign/unsafe state. | | `GetIpVrf` | `sensitive_read` | Single-VRF detail. | | `ClearDuplicateMacQuarantine` | `mutating` | Clears one local duplicate-MAC suppression key and may replay still-live local MAC state. Reversible, per-`(VNI, MAC)` scope; not a route-injection primitive and not a clear-all. | | `SetEthernetSegmentDrain` | `operator_only` | ADR-0084 manual Ethernet Segment drain/undrain. Draining withdraws the ES's Type 4/EAD routes and the member VNIs' local Type 2 routes and suppresses new local-MAC origination — traffic-impacting origination control that redirects live customer traffic onto remote PEs' backup paths (a step above the per-key, restorative duplicate-MAC clear). Owns the `operator` drain reason only (ADR-0085): reasons compose, so the response's `drained` is the composed state and `reasons` lists what holds (an operator undrain does not override a `link` drain from the interface binding). Runtime-only and in-memory; restart clears it (bound segments re-evaluate carrier at startup). | diff --git a/docs/reload-matrix.md b/docs/reload-matrix.md index e3aad002..6bf0dc49 100644 --- a/docs/reload-matrix.md +++ b/docs/reload-matrix.md @@ -246,7 +246,7 @@ or a later convergence failure; those remain runtime SIGHUP outcomes. | `[[evpn_instances]]` | coordinator-gated | Supported ADR-0063 L2VNI shapes hot-apply, including standalone L2VNI swaps and L2VNI-only batch redefines; unsupported mixed edits, missing actors, or convergence failure pin/log. | | `[[evpn_ip_vrfs]]` | coordinator-gated | Supported IP-VRF add/delete/redefine and `ip_vrf` relink hot-apply; L3VNI/device/table identity changes stay restart-required. | | `[[ethernet_segments]]` | coordinator-gated | Supported ES add/delete/redefine and atomic tenant teardown hot-apply when the segment actor can converge. | -| `[managed_netdevs]` | restart-required | ADR-0091 bridge and fixed-VNI VXLAN lifecycle is resolved at startup and reconciled by the dataplane actor (create, stamp, restart adoption, same-owner orphan reap). Managed netdevs are not live-mutable in this tranche; SVD / collect-metadata VXLAN and VRF/L3VXLAN creation remain deferred. | +| `[managed_netdevs]` | restart-required | ADR-0091 bridge and fixed-VNI VXLAN lifecycle is resolved at startup and reconciled by the dataplane actor (create, stamp, restart adoption, same-owner orphan reap). VRF/L3VXLAN rows are accepted for schema/status and protected-attribute diagnostics, but their lifecycle remains deferred. Managed netdevs are not live-mutable in this tranche; SVD / collect-metadata VXLAN and VRF/L3VXLAN creation remain deferred. | ## `[[fib_tables]]` and FIB runtime diff --git a/proto/rustbgpd.proto b/proto/rustbgpd.proto index 270f76b2..3d990155 100644 --- a/proto/rustbgpd.proto +++ b/proto/rustbgpd.proto @@ -2276,6 +2276,8 @@ enum ManagedNetdevClass { MANAGED_NETDEV_CLASS_UNKNOWN = 0; MANAGED_NETDEV_CLASS_BRIDGE = 1; MANAGED_NETDEV_CLASS_VXLAN = 2; + MANAGED_NETDEV_CLASS_VRF = 3; + MANAGED_NETDEV_CLASS_L3VXLAN = 4; } enum ManagedNetdevLifecycleState { @@ -2305,6 +2307,10 @@ message ManagedNetdevState { optional bool observed_collect_metadata = 14; optional bool observed_vnifilter = 15; optional string observed_bridge = 16; + optional uint32 observed_table_id = 17; + optional bool observed_up = 18; + optional string observed_master = 19; + optional string observed_router_mac = 20; } message ListEvpnNexthopsRequest {} diff --git a/src/config/mod.rs b/src/config/mod.rs index 6f9548b8..6ed37845 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1047,6 +1047,8 @@ impl Config { let owner_token = self.managed_netdevs.owner_token.as_str(); if self.managed_netdevs.bridges.is_empty() && self.managed_netdevs.vxlans.is_empty() + && self.managed_netdevs.vrfs.is_empty() + && self.managed_netdevs.l3vxlans.is_empty() && owner_token.is_empty() { return Ok(rustbgpd_evpn::ManagedNetdevTable::new()); @@ -1079,10 +1081,50 @@ impl Config { ) }) .collect(); - Ok(rustbgpd_evpn::ManagedNetdevTable::from_maps( + let vrfs = self + .managed_netdevs + .vrfs + .iter() + .map(|vrf| { + ( + vrf.name.clone(), + rustbgpd_evpn::ManagedVrfNetdevSpec { + table_id: vrf.table_id, + }, + ) + }) + .collect(); + let l3vxlans = self + .managed_netdevs + .l3vxlans + .iter() + .map(|l3vxlan| { + let router_mac = parse_mac_address(&l3vxlan.router_mac).map_err(|e| { + ConfigError::InvalidManagedNetdev { + reason: format!( + "managed L3VXLAN {:?}: invalid router_mac {:?}: {e}", + l3vxlan.name, l3vxlan.router_mac + ), + } + })?; + Ok(( + l3vxlan.name.clone(), + rustbgpd_evpn::ManagedL3VxlanNetdevSpec { + vni: l3vxlan.vni, + local_ip: l3vxlan.local, + dstport: l3vxlan.dstport, + vrf: l3vxlan.vrf.clone(), + router_mac, + }, + )) + }) + .collect::>()?; + Ok(rustbgpd_evpn::ManagedNetdevTable::from_all_maps( owner_token.to_string(), bridges, vxlans, + vrfs, + l3vxlans, )) } diff --git a/src/config/schema.rs b/src/config/schema.rs index 88772766..75d090ec 100644 --- a/src/config/schema.rs +++ b/src/config/schema.rs @@ -1216,8 +1216,9 @@ pub struct EvpnIpVrfConfig { /// ADR-0091 opt-in block for rustbgpd-managed EVPN Linux netdevs. /// -/// v1 accepts `[[managed_netdevs.bridges]]` and fixed-VNI -/// `[[managed_netdevs.vxlans]]` rows. The `owner_token` is required +/// v1 accepts `[[managed_netdevs.bridges]]`, fixed-VNI +/// `[[managed_netdevs.vxlans]]`, `[[managed_netdevs.vrfs]]`, and +/// `[[managed_netdevs.l3vxlans]]` rows. The `owner_token` is required /// when at least one row is configured and is used only to derive /// durable `IFLA_ALT_IFNAME` ownership stamps: /// `rustbgpd:::`. @@ -1232,9 +1233,17 @@ pub struct ManagedNetdevsConfig { #[serde(default)] pub bridges: Vec, /// Managed fixed-VNI VXLAN rows. SVD / collect-metadata VXLANs and - /// VRFs are intentionally not accepted in this release. + /// shared-device VXLANs are intentionally not accepted in this release. #[serde(default)] pub vxlans: Vec, + /// Managed VRF rows. LAN-94 accepts schema/status substrate only; Linux + /// lifecycle create/delete lands in the next ADR-0091 slice. + #[serde(default)] + pub vrfs: Vec, + /// Managed L3 VXLAN rows. LAN-94 accepts schema/status substrate only; + /// Linux lifecycle create/delete lands in the next ADR-0091 slice. + #[serde(default)] + pub l3vxlans: Vec, } /// One managed Linux bridge row (ADR-0091 bridge-first tranche). @@ -1271,6 +1280,41 @@ pub struct ManagedVxlanNetdevConfig { pub learning: bool, } +/// One managed Linux VRF row (ADR-0091 VRF/L3VXLAN substrate tranche). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct ManagedVrfNetdevConfig { + /// Linux VRF interface name. + pub name: String, + /// Desired VRF table id (`IFLA_VRF_TABLE`). + pub table_id: u32, +} + +/// One managed L3 VXLAN row (ADR-0091 VRF/L3VXLAN substrate tranche). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct ManagedL3VxlanNetdevConfig { + /// Linux L3 VXLAN interface name. + pub name: String, + /// Fixed VXLAN VNI (`1..=16_777_215`). + pub vni: u32, + /// Local source IP for VXLAN encapsulation. + pub local: std::net::IpAddr, + /// UDP destination port. Defaults to the IANA VXLAN port. + #[serde(default = "default_vxlan_dstport")] + pub dstport: u16, + /// VRF device this L3 VXLAN must be enslaved to. + pub vrf: String, + /// Router MAC this L3 VXLAN must carry. + pub router_mac: String, + /// Linux VXLAN learning mode. ADR-0091 L3VXLAN lifecycle requires + /// `false` (`nolearning`); the field exists so typos or intentional + /// deviations fail validation explicitly instead of silently using the + /// default. + #[serde(default)] + pub learning: bool, +} + const fn default_vxlan_dstport() -> u16 { 4789 } diff --git a/src/config/tests.rs b/src/config/tests.rs index 6503abe4..fc841e98 100644 --- a/src/config/tests.rs +++ b/src/config/tests.rs @@ -10291,7 +10291,7 @@ fn reload_matrix_documents_every_peer_group_field() { #[test] fn managed_netdevs_default_empty_and_resolve_stamps() { let config = parse(&format!( - "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.bridges]]\nname = \"br100\"\nvlan_filtering = true\n\n[[managed_netdevs.vxlans]]\nname = \"vxlan100\"\nvni = 100\nlocal = \"10.0.0.1\"\nbridge = \"br100\"\n", + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.bridges]]\nname = \"br100\"\nvlan_filtering = true\n\n[[managed_netdevs.vxlans]]\nname = \"vxlan100\"\nvni = 100\nlocal = \"10.0.0.1\"\nbridge = \"br100\"\n\n[[managed_netdevs.vrfs]]\nname = \"vrf100\"\ntable_id = 5000\n\n[[managed_netdevs.l3vxlans]]\nname = \"l3vxlan100\"\nvni = 5000\nlocal = \"10.0.0.1\"\nvrf = \"vrf100\"\nrouter_mac = \"02:00:00:00:00:01\"\n", valid_toml() )) .unwrap(); @@ -10312,6 +10312,27 @@ fn managed_netdevs_default_empty_and_resolve_stamps() { assert_eq!(vxlan.spec.dstport, 4789); assert_eq!(vxlan.spec.bridge, "br100"); assert_eq!(vxlan.ownership_stamp, "rustbgpd:vxlan:leaf-1:vxlan100"); + let vrf = table.vrf("vrf100").unwrap(); + assert_eq!(vrf.name, "vrf100"); + assert_eq!(vrf.spec.table_id, 5000); + assert_eq!(vrf.ownership_stamp, "rustbgpd:vrf:leaf-1:vrf100"); + let l3vxlan = table.l3vxlan("l3vxlan100").unwrap(); + assert_eq!(l3vxlan.name, "l3vxlan100"); + assert_eq!(l3vxlan.spec.vni, 5000); + assert_eq!( + l3vxlan.spec.local_ip, + "10.0.0.1".parse::().unwrap() + ); + assert_eq!(l3vxlan.spec.dstport, 4789); + assert_eq!(l3vxlan.spec.vrf, "vrf100"); + assert_eq!( + l3vxlan.spec.router_mac, + rustbgpd_wire::MacAddress::new([0x02, 0, 0, 0, 0, 1]) + ); + assert_eq!( + l3vxlan.ownership_stamp, + "rustbgpd:l3vxlan:leaf-1:l3vxlan100" + ); let owner_only = parse(&format!( "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n", @@ -10387,6 +10408,18 @@ fn managed_netdevs_reject_unknown_fields() { valid_toml() )); assert!(matches!(unknown_vxlan_field, Err(ConfigError::Parse(_)))); + + let unknown_vrf_field = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.vrfs]]\nname = \"vrf100\"\ntable_id = 5000\nmtu = 9000\n", + valid_toml() + )); + assert!(matches!(unknown_vrf_field, Err(ConfigError::Parse(_)))); + + let unknown_l3vxlan_field = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.l3vxlans]]\nname = \"l3vxlan100\"\nvni = 5000\nlocal = \"10.0.0.1\"\nvrf = \"vrf100\"\nrouter_mac = \"02:00:00:00:00:01\"\nexternal = true\n", + valid_toml() + )); + assert!(matches!(unknown_l3vxlan_field, Err(ConfigError::Parse(_)))); } #[test] @@ -10443,6 +10476,54 @@ fn managed_netdevs_reject_duplicate_vxlan_vni() { assert!(distinct_vnis.is_ok(), "got {distinct_vnis:?}"); } +#[test] +fn managed_netdevs_reject_invalid_vrf_and_l3vxlan_fields() { + let zero_table = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.vrfs]]\nname = \"vrf100\"\ntable_id = 0\n", + valid_toml() + )); + assert!(matches!( + zero_table, + Err(ConfigError::InvalidManagedNetdev { .. }) + )); + + let duplicate_table = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.vrfs]]\nname = \"vrf100\"\ntable_id = 5000\n\n[[managed_netdevs.vrfs]]\nname = \"vrf200\"\ntable_id = 5000\n", + valid_toml() + )); + assert!(matches!( + duplicate_table, + Err(ConfigError::InvalidManagedNetdev { .. }) + )); + + let duplicate_l3_vni = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.l3vxlans]]\nname = \"l3vxlan100\"\nvni = 5000\nlocal = \"10.0.0.1\"\nvrf = \"vrf100\"\nrouter_mac = \"02:00:00:00:00:01\"\n\n[[managed_netdevs.l3vxlans]]\nname = \"l3vxlan200\"\nvni = 5000\nlocal = \"10.0.0.1\"\nvrf = \"vrf200\"\nrouter_mac = \"02:00:00:00:00:02\"\n", + valid_toml() + )); + assert!(matches!( + duplicate_l3_vni, + Err(ConfigError::InvalidManagedNetdev { .. }) + )); + + let learning_enabled = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.l3vxlans]]\nname = \"l3vxlan100\"\nvni = 5000\nlocal = \"10.0.0.1\"\nvrf = \"vrf100\"\nrouter_mac = \"02:00:00:00:00:01\"\nlearning = true\n", + valid_toml() + )); + assert!(matches!( + learning_enabled, + Err(ConfigError::InvalidManagedNetdev { .. }) + )); + + let multicast_router_mac = parse(&format!( + "{}\n[managed_netdevs]\nowner_token = \"leaf-1\"\n\n[[managed_netdevs.l3vxlans]]\nname = \"l3vxlan100\"\nvni = 5000\nlocal = \"10.0.0.1\"\nvrf = \"vrf100\"\nrouter_mac = \"01:00:5e:00:00:01\"\n", + valid_toml() + )); + assert!(matches!( + multicast_router_mac, + Err(ConfigError::InvalidManagedNetdev { .. }) + )); +} + #[test] fn managed_netdevs_diff_marks_restart_required() { let old = parse(valid_toml()).unwrap(); diff --git a/src/config/validation.rs b/src/config/validation.rs index a9ef7681..c63e6313 100644 --- a/src/config/validation.rs +++ b/src/config/validation.rs @@ -1,12 +1,17 @@ +use std::collections::HashSet; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr}; use std::path::Path; use super::parse::{ parse_families, parse_named_policy, parse_neighbor_set, parse_policy, resolve_chain, }; +use super::schema::{ + ManagedBridgeNetdevConfig, ManagedL3VxlanNetdevConfig, ManagedNetdevsConfig, + ManagedVrfNetdevConfig, ManagedVxlanNetdevConfig, +}; use super::{ Config, ConfigError, DEFAULT_HOLD_TIME, EventHistoryConfig, GrpcEnforcementConfig, - PeerGroupConfig, SecurityConfig, TcpAoConfig, + PeerGroupConfig, SecurityConfig, TcpAoConfig, is_unicast_nonzero_mac, parse_mac_address, }; /// Canonical key for a dynamic-neighbor prefix: the network address with all @@ -801,7 +806,7 @@ fn validate_managed_netdevs(config: &Config) -> Result<(), ConfigError> { if !owner_token.is_empty() { validate_managed_token(owner_token, "managed_netdevs.owner_token")?; } - if (!managed.bridges.is_empty() || !managed.vxlans.is_empty()) && owner_token.is_empty() { + if managed_netdevs_has_rows(managed) && owner_token.is_empty() { return Err(ConfigError::InvalidManagedNetdev { reason: "managed_netdevs.owner_token is required when managed netdev rows are configured" @@ -809,29 +814,49 @@ fn validate_managed_netdevs(config: &Config) -> Result<(), ConfigError> { }); } - let mut names = std::collections::HashSet::new(); - for bridge in &managed.bridges { + let mut names = HashSet::new(); + validate_managed_bridges(&managed.bridges, owner_token, &mut names)?; + validate_managed_vxlans(&managed.vxlans, owner_token, &mut names)?; + validate_managed_vrfs(&managed.vrfs, owner_token, &mut names)?; + validate_managed_l3vxlans(&managed.l3vxlans, owner_token, &mut names)?; + Ok(()) +} + +fn managed_netdevs_has_rows(managed: &ManagedNetdevsConfig) -> bool { + !managed.bridges.is_empty() + || !managed.vxlans.is_empty() + || !managed.vrfs.is_empty() + || !managed.l3vxlans.is_empty() +} + +fn validate_managed_bridges( + bridges: &[ManagedBridgeNetdevConfig], + owner_token: &str, + names: &mut HashSet, +) -> Result<(), ConfigError> { + for bridge in bridges { validate_managed_link_name(&bridge.name)?; if !names.insert(bridge.name.clone()) { return Err(ConfigError::InvalidManagedNetdev { reason: format!("duplicate managed bridge name {:?}", bridge.name), }); } - let stamp = rustbgpd_evpn::bridge_ownership_stamp(owner_token, &bridge.name); - if stamp.len() > rustbgpd_evpn::MAX_ALT_IFNAME_LEN { - return Err(ConfigError::InvalidManagedNetdev { - reason: format!( - "managed bridge {:?}: derived ownership altname {:?} is {} bytes; maximum is {}", - bridge.name, - stamp, - stamp.len(), - rustbgpd_evpn::MAX_ALT_IFNAME_LEN - ), - }); - } + validate_managed_stamp_len( + "managed bridge", + &bridge.name, + &rustbgpd_evpn::bridge_ownership_stamp(owner_token, &bridge.name), + )?; } - let mut seen_vnis = std::collections::HashSet::new(); - for vxlan in &managed.vxlans { + Ok(()) +} + +fn validate_managed_vxlans( + vxlans: &[ManagedVxlanNetdevConfig], + owner_token: &str, + names: &mut HashSet, +) -> Result<(), ConfigError> { + let mut seen_vnis = HashSet::new(); + for vxlan in vxlans { validate_managed_link_name(&vxlan.name)?; validate_managed_link_name(&vxlan.bridge)?; if !names.insert(vxlan.name.clone()) { @@ -871,18 +896,130 @@ fn validate_managed_netdevs(config: &Config) -> Result<(), ConfigError> { ), }); } - let stamp = rustbgpd_evpn::vxlan_ownership_stamp(owner_token, &vxlan.name); - if stamp.len() > rustbgpd_evpn::MAX_ALT_IFNAME_LEN { + validate_managed_stamp_len( + "managed VXLAN", + &vxlan.name, + &rustbgpd_evpn::vxlan_ownership_stamp(owner_token, &vxlan.name), + )?; + } + Ok(()) +} + +fn validate_managed_vrfs( + vrfs: &[ManagedVrfNetdevConfig], + owner_token: &str, + names: &mut HashSet, +) -> Result<(), ConfigError> { + let mut seen_table_ids = HashSet::new(); + for vrf in vrfs { + validate_managed_link_name(&vrf.name)?; + if !names.insert(vrf.name.clone()) { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!("duplicate managed netdev name {:?}", vrf.name), + }); + } + if vrf.table_id == 0 { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!("managed VRF {:?}: table_id must be > 0", vrf.name), + }); + } + if !seen_table_ids.insert(vrf.table_id) { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!( + "managed VRF {:?}: duplicate table_id {}", + vrf.name, vrf.table_id + ), + }); + } + validate_managed_stamp_len( + "managed VRF", + &vrf.name, + &rustbgpd_evpn::vrf_ownership_stamp(owner_token, &vrf.name), + )?; + } + Ok(()) +} + +fn validate_managed_l3vxlans( + l3vxlans: &[ManagedL3VxlanNetdevConfig], + owner_token: &str, + names: &mut HashSet, +) -> Result<(), ConfigError> { + let mut seen_vnis = HashSet::new(); + for l3vxlan in l3vxlans { + validate_managed_link_name(&l3vxlan.name)?; + validate_managed_link_name(&l3vxlan.vrf)?; + if !names.insert(l3vxlan.name.clone()) { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!("duplicate managed netdev name {:?}", l3vxlan.name), + }); + } + rustbgpd_evpn::EvpnInstanceId::new(l3vxlan.vni).map_err(|e| { + ConfigError::InvalidManagedNetdev { + reason: format!( + "managed L3VXLAN {:?}: invalid vni {}: {e}", + l3vxlan.name, l3vxlan.vni + ), + } + })?; + if !seen_vnis.insert(l3vxlan.vni) { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!( + "managed L3VXLAN {:?}: duplicate vni {}", + l3vxlan.name, l3vxlan.vni + ), + }); + } + if l3vxlan.dstport == 0 { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!( + "managed L3VXLAN {:?}: dstport must be in 1..=65535", + l3vxlan.name + ), + }); + } + if l3vxlan.learning { return Err(ConfigError::InvalidManagedNetdev { reason: format!( - "managed VXLAN {:?}: derived ownership altname {:?} is {} bytes; maximum is {}", - vxlan.name, - stamp, - stamp.len(), - rustbgpd_evpn::MAX_ALT_IFNAME_LEN + "managed L3VXLAN {:?}: learning=true is unsupported; use learning=false (`nolearning`)", + l3vxlan.name ), }); } + let router_mac = parse_mac_address(&l3vxlan.router_mac).map_err(|e| { + ConfigError::InvalidManagedNetdev { + reason: format!( + "managed L3VXLAN {:?}: invalid router_mac {:?}: {e}", + l3vxlan.name, l3vxlan.router_mac + ), + } + })?; + if !is_unicast_nonzero_mac(router_mac) { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!( + "managed L3VXLAN {:?}: router_mac {:?} must be a non-zero unicast MAC", + l3vxlan.name, l3vxlan.router_mac + ), + }); + } + validate_managed_stamp_len( + "managed L3VXLAN", + &l3vxlan.name, + &rustbgpd_evpn::l3vxlan_ownership_stamp(owner_token, &l3vxlan.name), + )?; + } + Ok(()) +} + +fn validate_managed_stamp_len(label: &str, name: &str, stamp: &str) -> Result<(), ConfigError> { + if stamp.len() > rustbgpd_evpn::MAX_ALT_IFNAME_LEN { + return Err(ConfigError::InvalidManagedNetdev { + reason: format!( + "{label} {name:?}: derived ownership altname {stamp:?} is {} bytes; maximum is {}", + stamp.len(), + rustbgpd_evpn::MAX_ALT_IFNAME_LEN + ), + }); } Ok(()) }