diff --git a/csil/v1/components/k3s.csil b/csil/v1/components/k3s.csil index 870d0f9..882ac0f 100644 --- a/csil/v1/components/k3s.csil +++ b/csil/v1/components/k3s.csil @@ -22,6 +22,7 @@ Config = { server_url: text @go_name("ServerURL"), dns_servers: [* text] @go_name("DNSServers"), ? additional_registries: [* AdditionalRegistry] @go_name("AdditionalRegistries"), +? etcd_args: [* text] @go_name("EtcdArgs"), ? allow_cgnat_vip: bool @go_name("AllowCGNATVIP"), } diff --git a/docs/tailscale-integration.md b/docs/tailscale-integration.md index 75a4059..1eba56f 100644 --- a/docs/tailscale-integration.md +++ b/docs/tailscale-integration.md @@ -53,6 +53,7 @@ Your Tailscale ACL must allow: ## Configuration +<<<<<<< HEAD ### VIP Requirements **IMPORTANT:** The VIP must always be a separate, dedicated IP address that is not assigned to any host. This is required because kube-vip manages the VIP through ARP advertisements, and having the VIP match a host's actual IP can cause network conflicts and packet loss. @@ -69,17 +70,30 @@ For both single and multi-control-plane setups, the process is the same: #### Step 1: Configure Foundry with a Dedicated VIP Choose a CGNAT IP for your VIP that is NOT assigned to any node: +======= +### Single Control Plane Setup + +For single control plane deployments, use a dedicated VIP address that is routable via Tailscale: +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) ```yaml cluster: name: my-cluster primary_domain: example.local +<<<<<<< HEAD vip: 100.81.89.100 # Dedicated VIP (not a node IP!) +======= + vip: 100.81.89.100 # Dedicated VIP (not assigned to any host) +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) allow_cgnat_vip: true hosts: - hostname: control-plane +<<<<<<< HEAD address: 100.81.89.62 # Different from VIP +======= + address: 100.81.89.62 # Control plane's Tailscale IP +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) user: root - hostname: worker-1 address: 100.70.90.12 @@ -89,6 +103,7 @@ hosts: user: root ``` +<<<<<<< HEAD #### Step 2: Deploy the Cluster Run Foundry to deploy K3s and kube-vip: @@ -146,6 +161,50 @@ kubectl apply -f proxyclass.yaml ``` This will automatically advertise your VIP subnet route to Tailscale, making it reachable from all nodes. +======= +**Important:** The VIP must be different from any host's IP address. You must advertise the VIP as a subnet route from the control plane: + +```bash +# On the control plane node +tailscale up --advertise-routes=100.81.89.100/32 +``` + +Then approve the route in the Tailscale admin console. + +### High Availability (Multi-Control-Plane) Setup + +For HA setups with multiple control planes, you need to make the VIP routable via Tailscale: + +#### Option 1: Tailscale Subnet Routes + +Advertise the VIP as a subnet route from the active control plane: + +```bash +# On the control plane node +tailscale up --advertise-routes=100.81.89.100/32 +``` + +Then approve the route in the Tailscale admin console. + +```yaml +cluster: + name: my-cluster + primary_domain: example.local + vip: 100.81.89.100 # Dedicated VIP + allow_cgnat_vip: true +``` + +**Note:** kube-vip will manage the VIP assignment, but you need to ensure the route is advertised from whichever node currently holds the VIP. + +#### Option 2: Tailscale Operator (Recommended for HA) + +The Tailscale Operator integration will be available in a future Foundry release. This will provide: +- Automatic operator installation on control planes +- Automated VIP subnet route management +- Support for cross-pod network policies via Tailscale ACLs + +For now, use Option 1 (Subnet Routes) for HA setups. +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) ## Network Routing Considerations @@ -161,11 +220,21 @@ Traditional kube-vip assumes Layer 2 networking where the VIP can "float" betwee For worker nodes to reach the VIP: +<<<<<<< HEAD **All deployments (single or multi-control-plane):** - VIP must be a dedicated IP, separate from any node's IP - VIP must be advertised as a subnet route via Tailscale - Tailscale operator automates route management as VIP moves between nodes - kube-vip handles VIP assignment and failover via ARP (local to each node) +======= +**Single control plane:** +- VIP = control plane IP → Always routable (it's the node's primary IP) + +**Multiple control planes:** +- VIP = dedicated IP → Must be advertised as subnet route +- Route must be updated when VIP moves between control planes +- Tailscale operator can automate this +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) ## Troubleshooting @@ -188,9 +257,14 @@ curl -k https://:6443/version --max-time 5 ``` **Solution:** +<<<<<<< HEAD - Ensure VIP is advertised as a subnet route via Tailscale operator - Verify ProxyClass is configured correctly with the VIP route - Check that the route is approved in Tailscale admin console +======= +- Single control plane: Advertise VIP as subnet route from control plane +- Multi control plane: Advertise VIP as subnet route from active control plane +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) ### SSH Connection Refused Between Nodes @@ -216,6 +290,7 @@ VIP is assigned to the local interface but not advertised to Tailscale. **Solution:** ```bash +<<<<<<< HEAD # Check if Tailscale operator is running kubectl get pods -n tailscale @@ -226,6 +301,12 @@ kubectl get proxyclass kubectl logs -n tailscale -l tailscale-vip=true # If operator is not installed, install it following Step 3 above +======= +# On control plane +tailscale up --advertise-routes=/32 + +# Then approve in Tailscale admin console +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) ``` ## Validation Checklist @@ -244,10 +325,17 @@ Before deploying: Future enhancements planned for Tailscale integration: +<<<<<<< HEAD 1. **Automated Tailscale Operator Installation** - Automatic operator installation during cluster setup - Auto-generated OAuth credentials integration - Automated ProxyClass configuration +======= +1. **Tailscale Operator Integration** + - Automatic operator installation on control planes + - Automated VIP subnet route management + - Support for cross-pod network policies via Tailscale ACLs +>>>>>>> 9edde40 (feat: add CGNAT VIP support and Tailscale integration) 2. **Multi-Cluster Mesh** - Connect multiple Foundry clusters via Tailscale diff --git a/v1/internal/component/k3s/types.go b/v1/internal/component/k3s/types.go index 1f2c7a8..caf0b5a 100644 --- a/v1/internal/component/k3s/types.go +++ b/v1/internal/component/k3s/types.go @@ -64,6 +64,11 @@ func ParseConfig(cfg component.ComponentConfig) (*Config, error) { config.VIP = vip } + // Allow CGNAT VIP + if allowCGNAT, ok := cfg.GetBool("allow_cgnat_vip"); ok { + config.AllowCGNATVIP = &allowCGNAT + } + // Interface if iface, ok := cfg.GetString("interface"); ok { config.Interface = iface diff --git a/v1/internal/component/k3s/vip.go b/v1/internal/component/k3s/vip.go index 2d25e00..f9bcc53 100644 --- a/v1/internal/component/k3s/vip.go +++ b/v1/internal/component/k3s/vip.go @@ -48,7 +48,7 @@ func ValidateVIP(vip string, allowCGNAT bool) error { if allowCGNAT { return fmt.Errorf("VIP should be a private IP address (RFC1918 or RFC6598): %s", vip) } - return fmt.Errorf("VIP should be a private IP address: %s (hint: set allow_cgnat_vip: true to use CGNAT IPs in the 100.64.0.0/10 range)", vip) + return fmt.Errorf("VIP should be a private IP address: %s (hint: set allow_cgnat_vip: true to use CGNAT IPs in the 100.64.0.0/10 range, e.g. Tailscale)", vip) } return nil @@ -57,9 +57,9 @@ func ValidateVIP(vip string, allowCGNAT bool) error { // isPrivateIP checks if an IP is in private ranges (RFC1918) or optionally shared address space (RFC6598) func isPrivateIP(ip net.IP, allowCGNAT bool) bool { private := []string{ - "10.0.0.0/8", // RFC1918 - Private-Use - "172.16.0.0/12", // RFC1918 - Private-Use - "192.168.0.0/16", // RFC1918 - Private-Use + "10.0.0.0/8", // RFC1918 - Private-Use + "172.16.0.0/12", // RFC1918 - Private-Use + "192.168.0.0/16", // RFC1918 - Private-Use } // Optionally include CGNAT range (RFC6598) used by Tailscale and similar overlay networks @@ -106,13 +106,14 @@ func DetermineVIPConfig(vip string, conn network.SSHExecutor, allowCGNAT bool) ( return nil, fmt.Errorf("interface detection failed: %w", err) } - // Convert bool to *bool for VIPConfig - allowCGNATPtr := &allowCGNAT - return &VIPConfig{ - VIP: vip, - Interface: iface, - AllowCGNATVIP: allowCGNATPtr, - }, nil + cfg := &VIPConfig{ + VIP: vip, + Interface: iface, + } + if allowCGNAT { + cfg.AllowCGNATVIP = &allowCGNAT + } + return cfg, nil } // GenerateKubeVIPManifest generates the kube-vip DaemonSet manifest YAML diff --git a/v1/internal/config/types.gen.go b/v1/internal/config/types.gen.go index 5def5de..322a8f7 100644 --- a/v1/internal/config/types.gen.go +++ b/v1/internal/config/types.gen.go @@ -4,46 +4,46 @@ package config import ( - "github.com/catalystcommunity/foundry/v1/internal/setup" "github.com/catalystcommunity/foundry/v1/internal/host" + "github.com/catalystcommunity/foundry/v1/internal/setup" ) // NetworkConfig represents a structured data type type NetworkConfig struct { - Gateway string `json:"gateway" yaml:"gateway"` - Netmask string `json:"netmask" yaml:"netmask"` + Gateway string `json:"gateway" yaml:"gateway"` + Netmask string `json:"netmask" yaml:"netmask"` DHCPRange *DHCPRange `json:"dhcp_range,omitempty" yaml:"dhcp_range,omitempty"` } // DHCPRange represents a structured data type type DHCPRange struct { Start string `json:"start" yaml:"start"` - End string `json:"end" yaml:"end"` + End string `json:"end" yaml:"end"` } // DNSConfig represents a structured data type type DNSConfig struct { InfrastructureZones []DNSZone `json:"infrastructure_zones" yaml:"infrastructure_zones"` - KubernetesZones []DNSZone `json:"kubernetes_zones" yaml:"kubernetes_zones"` - Forwarders []string `json:"forwarders" yaml:"forwarders"` - Backend string `json:"backend" yaml:"backend"` - APIKey string `json:"api_key" yaml:"api_key"` + KubernetesZones []DNSZone `json:"kubernetes_zones" yaml:"kubernetes_zones"` + Forwarders []string `json:"forwarders" yaml:"forwarders"` + Backend string `json:"backend" yaml:"backend"` + APIKey string `json:"api_key" yaml:"api_key"` } // DNSZone represents a structured data type type DNSZone struct { - Name string `json:"name" yaml:"name"` - Public bool `json:"public" yaml:"public"` + Name string `json:"name" yaml:"name"` + Public bool `json:"public" yaml:"public"` PublicCNAME *string `json:"public_cname,omitempty" yaml:"public_cname,omitempty"` } // ClusterConfig represents a structured data type type ClusterConfig struct { - Name string `json:"name" yaml:"name"` - Domain *string `json:"domain,omitempty" yaml:"domain,omitempty"` - PrimaryDomain string `json:"primary_domain" yaml:"primary_domain"` - VIP string `json:"vip" yaml:"vip"` - AllowCGNATVIP *bool `json:"allow_cgnat_vip,omitempty" yaml:"allow_cgnat_vip,omitempty"` + Name string `json:"name" yaml:"name"` + Domain *string `json:"domain,omitempty" yaml:"domain,omitempty"` + PrimaryDomain string `json:"primary_domain" yaml:"primary_domain"` + VIP string `json:"vip" yaml:"vip"` + AllowCGNATVIP *bool `json:"allow_cgnat_vip,omitempty" yaml:"allow_cgnat_vip,omitempty"` } // ComponentMap is a type alias @@ -51,16 +51,16 @@ type ComponentMap map[string]ComponentConfig // ComponentConfig represents a structured data type type ComponentConfig struct { - Version *string `json:"version,omitempty" yaml:"version,omitempty"` - Hosts []string `json:"hosts,omitempty" yaml:"hosts,omitempty"` - Config map[string]any `json:"config" yaml:",inline"` + Version *string `json:"version,omitempty" yaml:"version,omitempty"` + Hosts []string `json:"hosts,omitempty" yaml:"hosts,omitempty"` + Config map[string]any `json:"config" yaml:",inline"` } // ObsConfig represents a structured data type type ObsConfig struct { Prometheus *PrometheusConfig `json:"prometheus,omitempty" yaml:"prometheus,omitempty"` - Loki *LokiConfig `json:"loki,omitempty" yaml:"loki,omitempty"` - Grafana *GrafanaConfig `json:"grafana,omitempty" yaml:"grafana,omitempty"` + Loki *LokiConfig `json:"loki,omitempty" yaml:"loki,omitempty"` + Grafana *GrafanaConfig `json:"grafana,omitempty" yaml:"grafana,omitempty"` } // PrometheusConfig represents a structured data type @@ -80,7 +80,7 @@ type GrafanaConfig struct { // StorageConfig represents a structured data type type StorageConfig struct { - Backend string `json:"backend" yaml:"backend"` + Backend string `json:"backend" yaml:"backend"` TrueNAS *TrueNASConfig `json:"truenas,omitempty" yaml:"truenas,omitempty"` } @@ -92,13 +92,12 @@ type TrueNASConfig struct { // Config represents a structured data type type Config struct { - Network *NetworkConfig `json:"network,omitempty" yaml:"network,omitempty"` - DNS *DNSConfig `json:"dns,omitempty" yaml:"dns,omitempty"` - Cluster ClusterConfig `json:"cluster" yaml:"cluster"` - Components ComponentMap `json:"components" yaml:"components"` - Observability *ObsConfig `json:"observability,omitempty" yaml:"observability,omitempty"` - Storage *StorageConfig `json:"storage,omitempty" yaml:"storage,omitempty"` - Hosts []*host.Host `json:"hosts" yaml:"hosts"` - SetupState *setup.SetupState `json:"setup_state" yaml:"setup_state"` + Network *NetworkConfig `json:"network,omitempty" yaml:"network,omitempty"` + DNS *DNSConfig `json:"dns,omitempty" yaml:"dns,omitempty"` + Cluster ClusterConfig `json:"cluster" yaml:"cluster"` + Components ComponentMap `json:"components" yaml:"components"` + Observability *ObsConfig `json:"observability,omitempty" yaml:"observability,omitempty"` + Storage *StorageConfig `json:"storage,omitempty" yaml:"storage,omitempty"` + Hosts []*host.Host `json:"hosts" yaml:"hosts"` + SetupState *setup.SetupState `json:"setup_state" yaml:"setup_state"` } -