diff --git a/.ansible-lint b/.ansible-lint index a8d7896..bb97762 100644 --- a/.ansible-lint +++ b/.ansible-lint @@ -9,5 +9,7 @@ exclude_paths: warn_list: - yaml[line-length] - var-naming[no-role-prefix] + - jinja[spacing] + - command-instead-of-module - meta-runtime[unsupported-version] - run-once[task] diff --git a/docs/certcheck_README.md b/docs/certcheck_README.md new file mode 100644 index 0000000..a95d31a --- /dev/null +++ b/docs/certcheck_README.md @@ -0,0 +1,506 @@ +# certcheck — IAG5 TLS Certificate Verification + +An Ansible playbook suite that verifies TLS certificate configuration across all IAG5 node types after deployment. Runs against live nodes, reads actual `gateway.conf` files, and performs live TLS handshakes to confirm that mTLS is working end-to-end — not just that files exist. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Directory Structure](#directory-structure) +- [Inventory](#inventory) +- [Usage](#usage) +- [Check Reference](#check-reference) + - [cluster\_server\_to\_runner](#cluster_server_to_runner) + - [cluster\_client\_to\_server](#cluster_client_to_server) + - [connect\_server\_to\_gwm](#connect_server_to_gwm) +- [Output and Status Codes](#output-and-status-codes) +- [Variables Reference](#variables-reference) +- [Integration with itential.iag5 Deployer](#integration-with-itentialiag5-deployer) + +--- + +## Overview + +IAG5 uses three distinct TLS connection paths, each with different certificate requirements: + +| Path | Protocol | TLS Type | Cert requirement | Run condition | +|------|----------|----------|-----------------|---------------| +| Server ↔ Runner | gRPC over TCP | Mutual TLS (mTLS) | Both `serverAuth` + `clientAuth` in EKU | `gateway_server_use_tls: true` | +| Client ↔ Server | gRPC over TCP | Mutual TLS (mTLS) | Both `serverAuth` + `clientAuth` in EKU | `gateway_client_use_tls: true` | +| Server → Gateway Manager | WebSocket (wss://) | One-way TLS | `clientAuth` in EKU; GWM cert publicly trusted | `gateway_server_use_tls: true` | + +certcheck covers all three paths. For each path it runs on both sides of the connection independently, so a misconfiguration on either end is caught. + +--- + +## Architecture + +certcheck is structured as three Ansible roles, one per connection path: + +``` +certcheck/ +├── site.yml Main playbook — runs all three roles +├── inventory.yml Standalone inventory (not used with deployer) +├── vars/ +│ ├── common.yml Shared variables (conf paths, service name) +│ ├── cluster_server_to_runner.yml Port and IP vars for server↔runner checks +│ ├── cluster_client_to_server.yml Port and IP vars for client↔server checks +│ └── connect_server_to_gwm.yml GWM host/port and OS CA bundle path +└── roles/ + ├── cluster_server_to_runner/ 27 checks — server and runner nodes (runs when gateway_server_use_tls: true) + ├── cluster_client_to_server/ 26 checks — client and server nodes (runs when gateway_client_use_tls: true) + ├── connect_server_to_gwm/ 19 checks — server node only (runs when gateway_server_use_tls: true) + └── common/ + └── tasks/summary.yml Shared summary printer +``` + +Each role runs on two host groups in separate plays so that each node produces its own independent summary. This matters because a problem on the runner side is not the same as a problem on the server side, and they need to be diagnosed separately. + +### How node identity is determined + +The roles do not rely on separate play-level inventory groups to determine what config section to read. Each role determines its own identity at runtime: + +```yaml +# cluster_server_to_runner +node_section: "{{ 'server' if inventory_hostname in groups['server'] else 'runner' }}" + +# cluster_client_to_server +node_section: "{{ 'client' if inventory_hostname in groups['client'] else 'server' }}" +``` + +This means the same role file runs on both sides of a connection — the `when:` conditions on individual tasks control what runs where. + +### EKU gating + +The Extended Key Usage check (CHECK 19 in `cluster_server_to_runner`, CHECK 16 in `cluster_client_to_server`) acts as a hard gate. If a cert is missing `clientAuth` or `serverAuth`, the live mTLS handshake checks are skipped rather than run and fail with a misleading error. The gate is set via the `eku_valid` fact: + +```yaml +eku_valid: "{{ tls_enabled and + 'Server Authentication' in (node_eku.stdout | default('')) and + 'Client Authentication' in (node_eku.stdout | default('')) }}" +``` + +Any check that requires a working mTLS connection carries `when: eku_valid | default(false)` on its shell task. + +--- + +## Prerequisites + +- Ansible installed on the control node +- SSH access to all IAG5 nodes +- `openssl` available on all target nodes (standard on RHEL/Rocky) +- `curl` available on the server node (for WebSocket check) +- The IAG5 service (`iagctl`) must be running on all nodes + +--- + +## Directory Structure + +``` +certcheck/ +├── site.yml +├── inventory.yml +├── vars/ +│ ├── common.yml +│ ├── cluster_server_to_runner.yml +│ ├── cluster_client_to_server.yml +│ └── connect_server_to_gwm.yml +└── roles/ + ├── cluster_server_to_runner/ + │ └── tasks/main.yml + ├── cluster_client_to_server/ + │ └── tasks/main.yml + ├── connect_server_to_gwm/ + │ └── tasks/main.yml + └── common/ + └── tasks/summary.yml +``` + +--- + +## Inventory + +The standalone inventory (`inventory.yml`) defines four host groups: + +```yaml +all: + children: + client: + hosts: + gateway_client: + ansible_host: + private_ip: + ansible_user: ec2-user + ansible_ssh_private_key_file: ~/.ssh/your-key.pem + + server: + hosts: + gateway_server: + ansible_host: + private_ip: + ansible_user: ec2-user + ansible_ssh_private_key_file: ~/.ssh/your-key.pem + + runner: + hosts: + gateway_runner: + ansible_host: + private_ip: + ansible_user: ec2-user + ansible_ssh_private_key_file: ~/.ssh/your-key.pem + + gateway_manager: + hosts: + gwm: + ansible_host: + private_ip: + ansible_user: rocky + ansible_ssh_private_key_file: ~/.ssh/your-key.pem +``` + +### `private_ip` vs `ansible_host` + +`ansible_host` is the address Ansible uses to SSH into a node. `private_ip` is the IP that the node advertises to other IAG5 nodes for gRPC connections, and is what must appear in the certificate's Subject Alternative Name. + +In most deployments these are the same. If they differ — for example when using a bastion host — set `private_ip` explicitly per host. certcheck uses `private_ip` for: + +- SAN validation (CHECK 8a/8b, CHECK 17b, CHECK 12c) +- `no_proxy` validation (CHECK 23, CHECK 24) +- Live connection targets (CHECK 25, CHECK 26, CHECK 27) + +--- + +## Usage + +### Run all checks + +```bash +ansible-playbook site.yml -i inventory.yml +``` + +### Run a specific connection path only + +```bash +# Server ↔ Runner mTLS checks +ansible-playbook site.yml -i inventory.yml --tags cluster_server_to_runner + +# Client ↔ Server mTLS checks +ansible-playbook site.yml -i inventory.yml --tags cluster_client_to_server + +# Server → Gateway Manager WebSocket TLS checks +ansible-playbook site.yml -i inventory.yml --tags connect_server_to_gwm +``` + +### Run on a single node + +```bash +ansible-playbook site.yml -i inventory.yml --limit gateway_runner +``` + +### Increase verbosity to see raw openssl output + +```bash +ansible-playbook site.yml -i inventory.yml -v +``` + +--- + +## Check Reference + +### cluster\_server\_to\_runner + +Runs on: **server node** and **runner node** independently. + +TLS type: **Mutual TLS (mTLS)** over gRPC/TCP. + +Config file read: `/etc/gateway/gateway.conf` (both server and runner use the same path). + +| Check | Description | Node | Hard fail? | +|-------|-------------|------|-----------| +| CHECK 1 | `ca_certificate_file` set in `[application]` | Both | Yes | +| CHECK 2 | CA cert file exists on disk | Both | Yes | +| CHECK 3 | CA bundle contains at least 1 cert; PASS if ≥ 2 (root + intermediate), WARN if exactly 1 (root only — valid but no intermediate), FAIL if 0 | Both | Warn if 1, fail if 0 | +| CHECK 4 | CA cert has `CA:TRUE` basic constraint | Both | Yes | +| CHECK 5 | Last cert in CA bundle is self-signed root (subject hash == issuer hash) | Both | Yes | +| CHECK 6 | `use_tls = true` in `[server]` or `[runner]` section | Both | Yes — gates all subsequent cert checks | +| CHECK 7 | `distributed_execution = true` in `[server]` (server); `listen_address` set in `[runner]` (runner) | Split | Yes | +| CHECK 8a | Inventory `private_ip` present on runner's actual network interface | Runner | Yes | +| CHECK 9 | `certificate_file` set in `[server]`/`[runner]` section | Both | Yes | +| CHECK 10 | Certificate file exists on disk | Both | Yes | +| CHECK 10b | Runner cert SAN contains inventory `private_ip` as `IP:` entry | Runner | Yes | +| CHECK 11 | `private_key_file` set in `[server]`/`[runner]` section | Both | Yes | +| CHECK 12 | Private key file exists on disk | Both | Yes | +| CHECK 13 | Cert and key are a matched pair (modulus MD5 comparison) | Both | Yes | +| CHECK 14 | Certificate is not expired | Both | Yes | +| CHECK 15 | Certificate has more than 30 days remaining | Both | Warn if < 30 days | +| CHECK 16 | Certificate is not a self-signed leaf (subject ≠ issuer) | Both | Yes | +| CHECK 17a | Subject Alternative Name extension is present | Both | Yes | +| CHECK 17b | Server cert SAN contains `private_ip` as `IP:` entry | Server | Yes | +| CHECK 17c | Server cert SAN contains `ansible_host` as `DNS:` entry | Server | Warn | +| CHECK 18 | Certificate is signed by the CA (`openssl verify`) | Both | Yes | +| CHECK 19 | EKU contains both `serverAuth` and `clientAuth` | Both | Yes — **gates CHECKs 20 and 27** | +| CHECK 20 | Runner enforces mTLS — rejects connection without client cert | Server | Yes | +| CHECK 21 | `iagctl` service running; GATEWAY env vars visible in process | Both | Warn | +| CHECK 22 | `no_proxy`/`NO_PROXY` set in systemd service | Server | Warn | +| CHECK 23 | Each runner `private_ip` present in `no_proxy` | Server | Yes | +| CHECK 24 | Each runner `ansible_host` present in `no_proxy` | Server | Yes | +| CHECK 25 | Runner IPs resolve via DNS from server | Server | Yes | +| CHECK 26 | TCP connectivity from server to each runner on `runner_port` | Server | Yes | +| CHECK 27 | Live mTLS handshake with IP verification server → each runner | Server | Yes | + +--- + +### cluster\_client\_to\_server + +Runs on: **client node** and **server node** independently. + +TLS type: **Mutual TLS (mTLS)** over gRPC/TCP. + +Config files read: +- Client node: `/home/itential/.gateway.d/gateway.conf` → reads `[client]` section +- Server node: `/etc/gateway/gateway.conf` → reads `[server]` section + +| Check | Description | Node | Hard fail? | +|-------|-------------|------|-----------| +| CHECK 1 | `ca_certificate_file` set in `[application]` | Both | Yes | +| CHECK 2 | CA cert file exists on disk | Both | Yes | +| CHECK 3 | CA bundle contains at least 1 cert; PASS if ≥ 2 (root + intermediate), WARN if exactly 1 (root only — valid but no intermediate), FAIL if 0 | Both | Warn if 1, fail if 0 | +| CHECK 4 | CA cert has `CA:TRUE` basic constraint | Both | Yes | +| CHECK 5 | Last cert in CA bundle is self-signed root | Both | Yes | +| CHECK 6 | `use_tls = true` in `[client]` or `[server]` section | Both | Yes — gates all cert checks | +| CHECK 7 | `certificate_file` set in `[client]`/`[server]` section | Both | Yes | +| CHECK 8 | Certificate file exists on disk | Both | Yes | +| CHECK 9 | `private_key_file` set in `[client]`/`[server]` section | Both | Yes | +| CHECK 10 | Private key file exists on disk | Both | Yes | +| CHECK 11 | Cert and key are a matched pair | Both | Yes | +| CHECK 12 | Certificate is not expired | Both | Yes | +| CHECK 13 | Certificate has more than 30 days remaining | Both | Warn if < 30 days | +| CHECK 14 | Certificate is not a self-signed leaf | Both | Yes | +| CHECK 15 | Certificate is signed by the CA (`openssl verify`) | Both | Yes | +| CHECK 16 | EKU contains both `serverAuth` and `clientAuth` | Both | Yes — **gates CHECKs 22 and 26** | +| CHECK 17a | Subject Alternative Name extension is present | Server | Yes | +| CHECK 17b | Server cert SAN contains `private_ip` as `IP:` entry | Server | Yes | +| CHECK 17c | Server cert SAN contains `ansible_host` as `DNS:` entry | Server | Warn | +| CHECK 19 | `no_proxy`/`NO_PROXY` set in systemd service | Client | Warn | +| CHECK 20 | Server `private_ip` present in `no_proxy` | Client | Yes | +| CHECK 21 | Server `ansible_host` present in `no_proxy` | Client | Yes | +| CHECK 22 | Server enforces mTLS — rejects connection without client cert | Client | Yes | +| CHECK 23 | `iagctl` service running; GATEWAY env vars visible in process | Both | Warn | +| CHECK 24 | Server IP resolves via DNS from client | Client | Yes | +| CHECK 25 | TCP connectivity from client to server on `server_port` | Client | Yes | +| CHECK 26 | Live mTLS handshake with IP verification client → server | Client | Yes | + +--- + +### connect\_server\_to\_gwm + +Runs on: **server node** only. + +TLS type: **One-way TLS** (server authenticates Gateway Manager's cert; GWM does not validate the server cert at the TLS layer — authentication is handled at the application layer). + +Config file read: `/etc/gateway/gateway.conf` → reads `[connect]` section. + +| Check | Description | Hard fail? | +|-------|-------------|-----------| +| CHECK 1 | `[connect] enabled = true` | Yes — gates all checks | +| CHECK 2 | `cluster_id` set and not the default value `cluster_1` | Yes | +| CHECK 3 | `[connect] hosts` set (GWM IP:port) | Yes | +| CHECK 4 | `certificate_file` set in `[connect]` section | Yes | +| CHECK 5 | Certificate file exists on disk | Yes | +| CHECK 6 | `private_key_file` set in `[connect]` section | Yes | +| CHECK 7 | Private key file exists on disk | Yes | +| CHECK 8 | Cert and key are a matched pair | Yes | +| CHECK 9 | Certificate is not expired | Yes | +| CHECK 10 | Certificate has more than 30 days remaining | Warn if < 30 days | +| CHECK 11 | Cert type identified (self-signed leaf is valid for connect) | Info | +| CHECK 12 | EKU contains `clientAuth` (required for GWM app-layer auth) | Warn if missing | +| CHECK 12b | Subject Alternative Name extension present | Warn | +| CHECK 12c | Cert SAN contains server `private_ip` as `IP:` entry | Warn | +| CHECK 13 | `no_proxy`/`NO_PROXY` set in systemd service | Warn | +| CHECK 14 | GWM host present in `no_proxy` | Warn | +| CHECK 15 | GWM hostname resolves from server | Yes | +| CHECK 16 | TCP connectivity from server to GWM | Yes | +| CHECK 17 | GWM server cert trusted by OS CA pool (`openssl s_client` against OS bundle) | Yes | +| CHECK 18 | WebSocket handshake to GWM returns HTTP 101 | Warn | +| CHECK 19 | `iagctl` service running; GATEWAY_CONNECT env vars visible in process | Warn | + +#### Why CHECK 2 (`cluster_id`) matters + +The default `cluster_id` value is `cluster_1`. If multiple IAG5 deployments connect to the same Gateway Manager with the default cluster ID, they collide and only one cluster's services are visible to GWM. This is a configuration error that does not produce an obvious error message, which is why certcheck catches it explicitly. + +#### Why `clientAuth` is only a WARN for connect + +For the server ↔ runner and client ↔ server paths, missing `clientAuth` in EKU causes a hard TLS handshake rejection. For the connect path, the TLS layer is one-way — GWM validates its own cert but does not require a client cert. The `clientAuth` EKU is needed for GWM's **application-layer** authentication, not the TLS layer, so the absence produces a softer error rather than a hard connection failure. + +--- + +## Output and Status Codes + +Each check produces one of five statuses: + +| Status | Meaning | +|--------|---------| +| `✅ PASS` | Check passed | +| `❌ FAIL` | Check failed — connection or deployment will not work | +| `⚠️ WARN` | Non-fatal issue — deployment may work but attention is needed | +| `⏭ SKIPPED` | Check was intentionally skipped because a prerequisite failed (e.g., TLS disabled, EKU invalid) | +| `ℹ️ INFO` | Informational — not a pass or fail | + +At the end of each play, the summary task prints a table of all check results for that node: + +``` +============================================================ +CLUSTER TLS — SERVER ↔ RUNNER (gRPC mTLS) — SERVER NODE — gateway_server +============================================================ +✅ PASS | CHECK 1 — [application] ca_certificate_file is set + Expected : ca_certificate_file = /path/to/ca-bundle.crt + Actual : ca_certificate_file = '/etc/gateway/ssl/ca.crt' +------------------------------------------------------------ +✅ PASS | CHECK 19 — [server] cert has both serverAuth and clientAuth in extendedKeyUsage + Expected : TLS Web Server Authentication, TLS Web Client Authentication + Actual : TLS Web Server Authentication, TLS Web Client Authentication +------------------------------------------------------------ +❌ FAIL | CHECK 27 — TLS handshake with IP verification SERVER → RUNNER (10.222.1.76) + Expected : Verify return code: 0 (ok) + Actual : Verify return code: 21 (unable to verify the first certificate) +------------------------------------------------------------ +``` + +All plays use `ignore_errors: true` so a failure in one check does not abort the remaining checks. The full picture is always shown. + +--- + +## Variables Reference + +### `vars/common.yml` + +| Variable | Default | Description | +|----------|---------|-------------| +| `server_gateway_conf` | `/etc/gateway/gateway.conf` | Path to gateway.conf on server and runner nodes | +| `client_gateway_conf` | `/home/itential/.gateway.d/gateway.conf` | Path to gateway.conf on client nodes | +| `service_name` | `iagctl` | systemd service name — used for PID lookup and env var checks | + +### `vars/cluster_server_to_runner.yml` + +| Variable | Default | Description | +|----------|---------|-------------| +| `runner_port` | `50051` | Port the runner listens on for gRPC from the server | + +### `vars/cluster_client_to_server.yml` + +| Variable | Default | Description | +|----------|---------|-------------| +| `server_port` | `50051` | Port the server listens on for gRPC from clients | + +### `vars/connect_server_to_gwm.yml` + +| Variable | Default | Description | +|----------|---------|-------------| +| `gwm_host` | Derived from `groups['gateway_manager'][0]['ansible_host']` | Gateway Manager hostname or IP | +| `gwm_port` | `8080` | Gateway Manager WebSocket port | +| `os_ca_bundle` | `/etc/pki/tls/certs/ca-bundle.crt` | OS CA bundle used to verify GWM's publicly-signed certificate | + +### Inventory host variables + +| Variable | Required | Description | +|----------|----------|-------------| +| `ansible_host` | Yes | Address Ansible uses to SSH into the node | +| `private_ip` | Yes | IP address the node advertises to other IAG5 nodes; must appear in the cert's SAN | +| `ansible_user` | Yes | SSH user | +| `ansible_ssh_private_key_file` | Yes (or equivalent auth) | SSH key path | + +--- + +## Integration with itential.iag5 Deployer + +certcheck can run integrated into the `itential.iag5` deployer so that TLS verification happens automatically after every deployment. + +### What changes in the deployer + +``` +itential.iag5/ +├── playbooks/ +│ ├── site.yml ← Add certcheck import at end +│ └── certcheck.yml ← New playbook +└── roles/ + ├── certcheck_cluster_server_to_runner/ ← Runs when gateway_server_use_tls: true + │ ├── defaults/main.yml + │ └── tasks/main.yml + ├── certcheck_cluster_client_to_server/ ← Runs when gateway_client_use_tls: true + │ ├── defaults/main.yml + │ └── tasks/main.yml + ├── certcheck_connect_server_to_gwm/ ← Runs when gateway_server_use_tls: true + │ ├── defaults/main.yml + │ └── tasks/main.yml + └── certcheck_common/ + └── tasks/summary.yml +``` + +> **Note:** `gateway_server_use_tls` and `gateway_client_use_tls` both default to `true` in the deployer. certcheck roles are skipped automatically when TLS is disabled — no inventory changes are needed to control this. + +### Run conditions + +Each certcheck role is gated on the deployer's TLS enable variables: + +| Role | Deployer variable | Default | +|------|------------------|---------| +| `certcheck_cluster_server_to_runner` | `gateway_server_use_tls` | `true` | +| `certcheck_cluster_client_to_server` | `gateway_client_use_tls` | `true` | +| `certcheck_connect_server_to_gwm` | `gateway_server_use_tls` | `true` | + +Because both variables default to `true` in the deployer, certcheck runs by default on every deployment that includes the relevant node types. If TLS is explicitly disabled in your inventory, the corresponding certcheck role skips all cert-specific checks and records them as `⏭ SKIPPED — TLS disabled` in the summary. + +To disable certcheck for a specific connection path without disabling TLS, use tags: + +```bash +# Skip server↔runner checks only +ansible-playbook itential.iag5.site -i inventories/dev/hosts --skip-tags cluster_server_to_runner + +# Skip all certcheck entirely +ansible-playbook itential.iag5.site -i inventories/dev/hosts --skip-tags certcheck +``` + +### Deployer group name mapping + +| Standalone certcheck group | Deployer group | +|---------------------------|----------------| +| `server` | `iag5_servers` | +| `runner` | `iag5_runners` | +| `client` | `iag5_clients` | +| `gateway_manager` | `gateway_manager` (add to inventory if using connect checks) | + +### How deployer variables map to certcheck variables + +| certcheck variable | Derived from deployer variable | +|--------------------|-------------------------------| +| `server_gateway_conf` | `{{ gateway_server_config_dir }}/gateway.conf` | +| `client_gateway_conf` | `{{ gateway_client_working_dir }}/gateway.conf` | +| `runner_port` / `server_port` | `{{ gateway_server_port }}` | +| `gwm_host` / `gwm_port` | Split from `{{ gateway_server_connect_hosts }}` | +| `service_name` | `iagctl` (hardcoded — matches deployer systemd unit) | +| `private_ip` | `{{ ansible_host }}` (deployer does not define `private_ip` separately) | + +### Running certcheck standalone against the deployer inventory + +```bash +ansible-playbook certcheck.yml -i inventories/dev/hosts +``` + +### Running a specific check suite + +```bash +ansible-playbook certcheck.yml -i inventories/dev/hosts --tags cluster_server_to_runner +ansible-playbook certcheck.yml -i inventories/dev/hosts --tags cluster_client_to_server +ansible-playbook certcheck.yml -i inventories/dev/hosts --tags connect_server_to_gwm +``` + +### Running the full deployer with certcheck + +When `certcheck.yml` is imported at the end of `site.yml`, certcheck runs automatically after every full deployment: + +```bash +ansible-playbook itential.iag5.site -i inventories/dev/hosts +``` diff --git a/playbooks/certcheck.yml b/playbooks/certcheck.yml new file mode 100644 index 0000000..6e7c29e --- /dev/null +++ b/playbooks/certcheck.yml @@ -0,0 +1,82 @@ +# Copyright (c) 2025, Itential, Inc +# GNU General Public License v3.0+ (see LICENSE or https://www.gnu.org/licenses/gpl-3.0.txt) +--- +# IAG5 TLS Certificate Verification +# +# Runs after deployment to verify that all TLS certificates are correctly +# configured across all IAG5 node types. All variables are defined in each +# role's defaults/main.yml and derived from the deployer's existing variables. +# +# Runs automatically as the final step of site.yml. +# Can also be run standalone: +# +# ansible-playbook itential.iag5.certcheck -i +# +# Or for a specific check suite only: +# ansible-playbook itential.iag5.certcheck -i --tags cluster_server_to_runner +# ansible-playbook itential.iag5.certcheck -i --tags cluster_client_to_server +# ansible-playbook itential.iag5.certcheck -i --tags connect_server_to_gwm + +# ----------------------------------------------------------------------- +# CLUSTER TLS — SERVER ↔ RUNNER (gRPC mTLS) +# ----------------------------------------------------------------------- +- name: "CERTCHECK | Cluster TLS — SERVER to RUNNER — SERVER node" + hosts: iag5_servers + become: true + tags: [certcheck, cluster_server_to_runner] + roles: + - role: itential.iag5.gateway + tags: always + - role: itential.iag5.certcheck_cluster_server_to_runner + when: gateway_server_use_tls | bool + +- name: "CERTCHECK | Cluster TLS — SERVER to RUNNER — RUNNER node" + hosts: iag5_runners + become: true + tags: [certcheck, cluster_server_to_runner] + roles: + - role: itential.iag5.gateway + tags: always + - role: itential.iag5.certcheck_cluster_server_to_runner + when: gateway_server_use_tls | bool + +# ----------------------------------------------------------------------- +# CLUSTER TLS — CLIENT ↔ SERVER (gRPC mTLS) +# ----------------------------------------------------------------------- +- name: "CERTCHECK | Cluster TLS — CLIENT to SERVER — CLIENT node" + hosts: iag5_clients + become: true + tags: [certcheck, cluster_client_to_server] + roles: + - role: itential.iag5.gateway + tags: always + - role: itential.iag5.certcheck_cluster_client_to_server + when: gateway_client_use_tls | bool + +- name: "CERTCHECK | Cluster TLS — CLIENT to SERVER — SERVER node" + hosts: iag5_servers + become: true + tags: [certcheck, cluster_client_to_server] + roles: + - role: itential.iag5.gateway + tags: always + - role: itential.iag5.certcheck_cluster_client_to_server + when: gateway_client_use_tls | bool + +# ----------------------------------------------------------------------- +# CONNECT TLS — SERVER → Gateway Manager (WebSocket) +# Only runs when gateway_manager group is defined in inventory. +# ----------------------------------------------------------------------- +- name: "CERTCHECK | Connect TLS — SERVER to Gateway Manager" + hosts: iag5_servers + become: true + tags: [certcheck, connect_server_to_gwm] + pre_tasks: + - name: "CERTCHECK | Skip connect checks if gateway_manager group not in inventory" + ansible.builtin.meta: end_host + when: "'gateway_manager' not in groups or groups['gateway_manager'] | length == 0" + roles: + - role: itential.iag5.gateway + tags: always + - role: itential.iag5.certcheck_connect_server_to_gwm + when: gateway_server_use_tls | bool diff --git a/playbooks/site.yml b/playbooks/site.yml index 7b2b8d1..2b13253 100644 --- a/playbooks/site.yml +++ b/playbooks/site.yml @@ -9,3 +9,6 @@ - name: Install and configure Gateway5 clients import_playbook: itential.iag5.clients + +- name: Verify IAG5 TLS certificates post-deployment + import_playbook: itential.iag5.certcheck diff --git a/roles/certcheck_cluster_client_to_server/defaults/main.yml b/roles/certcheck_cluster_client_to_server/defaults/main.yml new file mode 100644 index 0000000..3195006 --- /dev/null +++ b/roles/certcheck_cluster_client_to_server/defaults/main.yml @@ -0,0 +1,24 @@ +# Copyright (c) 2025, Itential, Inc +# GNU General Public License v3.0+ (see LICENSE or https://www.gnu.org/licenses/gpl-3.0.txt) +--- +######################################################## +# certcheck_cluster_client_to_server default variables +######################################################## + +# Inventory group names — must match the deployer's group names +iag5_server_group: "iag5_servers" +iag5_client_group: "iag5_clients" + +# Gateway config file paths — derived from deployer role variables +server_gateway_conf: "{{ gateway_server_config_dir }}/gateway.conf" +client_gateway_conf: "{{ gateway_client_working_dir }}/gateway.conf" + +# Service name — matches the systemd unit installed by the deployer +service_name: iagctl + +# Port the server listens on — derived from deployer default +server_port: "{{ gateway_server_port }}" + +# private_ip — used for SAN validation and no_proxy checks. +# Defaults to ansible_host since the deployer does not define private_ip. +private_ip: "{{ hostvars[inventory_hostname]['private_ip'] | default(ansible_host) }}" diff --git a/roles/certcheck_cluster_client_to_server/tasks/main.yml b/roles/certcheck_cluster_client_to_server/tasks/main.yml new file mode 100644 index 0000000..99762d4 --- /dev/null +++ b/roles/certcheck_cluster_client_to_server/tasks/main.yml @@ -0,0 +1,628 @@ +--- +# ROLE: cluster_client_to_server +# TLS Type: Mutual TLS (mTLS) — gRPC over TCP +# node_section is set automatically based on which host group the node belongs to +# Client node uses: ~/.gateway.d/gateway.conf → [client] section +# Server node uses: /etc/gateway/gateway.conf → [server] section + +- name: "INIT | Determine node section and gateway conf" + ansible.builtin.set_fact: + node_section: "{{ 'client' if inventory_hostname in groups[iag5_client_group] else 'server' }}" + gateway_conf: "{{ client_gateway_conf if inventory_hostname in groups[iag5_client_group] else server_gateway_conf }}" + +- name: "INIT | Init results tracker" + ansible.builtin.set_fact: + check_results: [] + +# ----------------------------------------------------------------------- +# [application] section — CA cert checks (same for both client and server) +# ----------------------------------------------------------------------- + +- name: "CHECK 1 | Read ca_certificate_file from [application] section" + ansible.builtin.shell: + cmd: > + awk '/^\[application\]/{f=1} f && /^\[/{if(!/^\[application\]/) f=0} f && /ca_certificate_file/' {{ gateway_conf }} + register: ca_conf_line + changed_when: false + ignore_errors: true + +- name: "CHECK 1 | Extract ca_certificate_file path" + ansible.builtin.set_fact: + ca_cert_path: "{{ ca_conf_line.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: ca_conf_line.stdout != '' + +- name: "CHECK 1 | Record result — ca_certificate_file set in [application]" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 1 — [application] ca_certificate_file is set', + 'expected': 'ca_certificate_file = /path/to/ca-bundle.crt', + 'actual': ca_conf_line.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if ca_conf_line.stdout != '' else '❌ FAIL' + } + ] }}" + +- name: "CHECK 2 | Verify ca_certificate_file exists on disk" + ansible.builtin.stat: + path: "{{ ca_cert_path }}" + register: ca_file_stat + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 2 | Record result — ca_certificate_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 2 — [application] ca_certificate_file exists on disk', + 'expected': 'File exists at ' + (ca_cert_path | default('N/A')), + 'actual': 'EXISTS' if (ca_file_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (ca_file_stat.stat.exists | default(false)) else '❌ FAIL' + } + ] }}" + +- name: "CHECK 3 | Verify CA bundle cert count" + ansible.builtin.command: + cmd: grep -c "BEGIN CERTIFICATE" {{ ca_cert_path }} + register: ca_count + changed_when: false + ignore_errors: true + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 3 | Record result — CA bundle cert count" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 3 — CA bundle cert count', + 'expected': 'At least 1 cert (root only is valid; 2 = root + intermediate)', + 'actual': ca_count.stdout | default('ERROR') + ' cert(s) found', + 'status': '✅ PASS' if ca_count.stdout | int >= 2 + else ('⚠️ WARN — only root CA present, no intermediate (valid but less secure)' + if ca_count.stdout | int == 1 + else '❌ FAIL — no certificates found in CA bundle') + } + ] }}" + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 4 | Verify CA cert has CA:TRUE" + ansible.builtin.shell: + cmd: openssl x509 -in {{ ca_cert_path }} -noout -text | grep -A1 "CA:" + register: ca_basic_constraints + changed_when: false + ignore_errors: true + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 4 | Record result — CA cert has CA:TRUE" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 4 — CA cert has CA:TRUE (can sign other certs)', + 'expected': 'CA:TRUE', + 'actual': ca_basic_constraints.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if 'TRUE' in (ca_basic_constraints.stdout | default('')) else '❌ FAIL' + } + ] }}" + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 5 | Verify last cert in CA bundle is self-signed root (subject hash == issuer hash)" + ansible.builtin.shell: + cmd: > + openssl x509 -in <(awk '/-----BEGIN CERTIFICATE-----/{c++} c==2{print}' {{ ca_cert_path }}) + -noout -subject_hash && + openssl x509 -in <(awk '/-----BEGIN CERTIFICATE-----/{c++} c==2{print}' {{ ca_cert_path }}) + -noout -issuer_hash + args: + executable: /bin/bash + register: ca_hash_check + changed_when: false + ignore_errors: true + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 5 | Record result — last cert in CA bundle is self-signed root" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 5 — Last cert in CA bundle is self-signed root (subject hash == issuer hash)', + 'expected': 'Both hashes match', + 'actual': ca_hash_check.stdout | default('ERROR'), + 'status': '✅ PASS' if (ca_hash_check.stdout_lines | default([]) | length == 2 and ca_hash_check.stdout_lines[0] == ca_hash_check.stdout_lines[1]) else '❌ FAIL' + } + ] }}" + when: ca_cert_path is defined and ca_cert_path != '' + +# ----------------------------------------------------------------------- +# [client] or [server] section — read use_tls first, gate cert checks on it +# ----------------------------------------------------------------------- + +- name: "CHECK 6 | Read use_tls — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /use_tls/' {{ gateway_conf }} + register: node_usetls + changed_when: false + ignore_errors: true + +- name: "CHECK 6 | Set tls_enabled fact" + ansible.builtin.set_fact: + tls_enabled: "{{ 'true' in (node_usetls.stdout | default('')) }}" + +- name: "CHECK 6 | Record result — use_tls — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 6 — [' + node_section + '] use_tls', + 'expected': 'use_tls = true', + 'actual': node_usetls.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if tls_enabled else '❌ FAIL — TLS disabled, skipping cert checks' + } + ] }}" + +# ----------------------------------------------------------------------- +# CHECK 7 — server specific: SANs required +# client specific: SANs not required per doc (no extra check) +# ----------------------------------------------------------------------- + +- name: "CHECK 7 | Read certificate_file — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /certificate_file/' {{ gateway_conf }} + register: node_cert_conf + changed_when: false + ignore_errors: true + when: tls_enabled + +- name: "CHECK 7 | Extract certificate_file path" + ansible.builtin.set_fact: + node_cert_path: "{{ node_cert_conf.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: tls_enabled and node_cert_conf.stdout != '' + +- name: "CHECK 7 | Record result — certificate_file set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 7 — [' + node_section + '] certificate_file is set', + 'expected': 'certificate_file = /path/to/cert', + 'actual': node_cert_conf.stdout | default('NOT FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and (node_cert_conf.stdout | default('')) != '') else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 8 | Verify certificate_file exists on disk" + ansible.builtin.stat: + path: "{{ node_cert_path }}" + register: node_cert_stat + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 8 | Record result — certificate_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 8 — [' + node_section + '] certificate_file exists on disk', + 'expected': 'File exists at ' + (node_cert_path | default('N/A')), + 'actual': 'EXISTS' if (node_cert_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (node_cert_stat.stat.exists | default(false)) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 9 | Read private_key_file — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /private_key_file/' {{ gateway_conf }} + register: node_key_conf + changed_when: false + ignore_errors: true + when: tls_enabled + +- name: "CHECK 9 | Extract private_key_file path" + ansible.builtin.set_fact: + node_key_path: "{{ node_key_conf.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: tls_enabled and node_key_conf.stdout != '' + +- name: "CHECK 9 | Record result — private_key_file set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 9 — [' + node_section + '] private_key_file is set', + 'expected': 'private_key_file = /path/to/key', + 'actual': node_key_conf.stdout | default('NOT FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and (node_key_conf.stdout | default('')) != '') else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 10 | Verify private_key_file exists on disk" + ansible.builtin.stat: + path: "{{ node_key_path }}" + register: node_key_stat + when: tls_enabled and node_key_path is defined and node_key_path != '' + +- name: "CHECK 10 | Record result — private_key_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 10 — [' + node_section + '] private_key_file exists on disk', + 'expected': 'File exists at ' + (node_key_path | default('N/A')), + 'actual': 'EXISTS' if (node_key_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (node_key_stat.stat.exists | default(false)) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 11 | Get cert modulus" + ansible.builtin.shell: + cmd: openssl x509 -noout -modulus -in {{ node_cert_path }} | md5sum + register: node_cert_mod + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 11 | Get key modulus" + ansible.builtin.shell: + cmd: openssl rsa -noout -modulus -in {{ node_key_path }} | md5sum + register: node_key_mod + changed_when: false + ignore_errors: true + when: tls_enabled and node_key_path is defined and node_key_path != '' + +- name: "CHECK 11 | Record result — cert and key matched pair" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 11 — [' + node_section + '] cert and key are a matched pair', + 'expected': 'Matching md5 hashes', + 'actual': 'cert=' + (node_cert_mod.stdout | default('ERROR')) + ' key=' + (node_key_mod.stdout | default('ERROR')) if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and node_cert_mod.stdout == node_key_mod.stdout) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 12 | Verify cert not expired" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -checkend 0 && openssl x509 -in {{ node_cert_path }} -noout -dates + register: node_expiry + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 12 | Record result — cert not expired" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 12 — [' + node_section + '] cert is not expired', + 'expected': 'Certificate is valid', + 'actual': node_expiry.stdout | default(node_expiry.stderr | default('ERROR')) if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and node_expiry.rc == 0) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 13 | Get days remaining on cert" + ansible.builtin.shell: + cmd: echo $(( ( $(date -d "$(openssl x509 -enddate -noout -in {{ node_cert_path }} | cut -d= -f2)" +%s) - $(date +%s) ) / 86400 )) + register: node_days_remaining + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 13 | Record result — cert days remaining" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 13 — [' + node_section + '] cert days remaining until expiry', + 'expected': 'More than 30 days remaining', + 'actual': (node_days_remaining.stdout | default('ERROR')) + ' days remaining', + 'status': '✅ PASS' if (node_days_remaining.stdout | default('0') | int > 30) + else ('⚠️ WARN — expiring within 30 days' if (node_days_remaining.stdout | default('0') | int > 0) + else '❌ FAIL — expired') + } + ] }}" + +- name: "CHECK 14 | Verify cert is not a self-signed leaf (subject != issuer)" + ansible.builtin.command: + cmd: openssl x509 -in {{ node_cert_path }} -noout -subject -issuer + register: node_self_signed + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 14 | Record result — cert is not self-signed leaf" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 14 — [' + node_section + '] cert is not a self-signed leaf (subject != issuer)', + 'expected': 'subject != issuer (CA-signed)', + 'actual': node_self_signed.stdout | default('ERROR') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and + node_self_signed.stdout_lines | default([]) | length == 2 and + node_self_signed.stdout_lines[0] != node_self_signed.stdout_lines[1]) + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL — self-signed leaf rejected by cluster TLS') + } + ] }}" + +- name: "CHECK 15 | Verify cert signed by CA" + ansible.builtin.command: + cmd: openssl verify -CAfile {{ ca_cert_path }} {{ node_cert_path }} + register: node_verify + changed_when: false + ignore_errors: true + when: tls_enabled and ca_cert_path is defined and ca_cert_path != '' and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 15 | Record result — cert signed by CA" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 15 — [' + node_section + '] cert is signed by CA', + 'expected': 'OK', + 'actual': node_verify.stdout | default(node_verify.stderr | default('ERROR')) if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and 'OK' in (node_verify.stdout | default(''))) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 16 | Verify extendedKeyUsage (needs serverAuth + clientAuth for mTLS)" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A3 "Extended Key Usage" + register: node_eku + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 16 | Record result — cert extendedKeyUsage" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 16 — [' + node_section + '] cert has both serverAuth and clientAuth in extendedKeyUsage', + 'expected': 'TLS Web Server Authentication, TLS Web Client Authentication', + 'actual': node_eku.stdout | default('NOT FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and + 'Server Authentication' in (node_eku.stdout | default('')) and + 'Client Authentication' in (node_eku.stdout | default(''))) + else ('⚠️ WARN — extendedKeyUsage not set, relying on defaults' if (tls_enabled and node_eku.stdout == '') + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled + else '❌ FAIL — missing clientAuth or serverAuth, mTLS handshake will be rejected')) + } + ] }}" + +- name: "CHECK 16 | Set eku_valid gate — both serverAuth and clientAuth must be present" + ansible.builtin.set_fact: + eku_valid: "{{ tls_enabled and + 'Server Authentication' in (node_eku.stdout | default('')) and + 'Client Authentication' in (node_eku.stdout | default('')) }}" + +# ----------------------------------------------------------------------- +# Server specific — SANs (required on server cert, not on client per doc) +# ----------------------------------------------------------------------- + +- name: "CHECK 17 | Get SANs from server cert" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A2 "Subject Alternative Name" + register: node_sans + changed_when: false + ignore_errors: true + when: tls_enabled and node_section == 'server' and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 17 | Record result — server cert SANs present" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 17a — [server] cert SANs present', + 'expected': 'Subject Alternative Name extension present', + 'actual': node_sans.stdout | default('NO SANs FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and node_sans.stdout != '') else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL — no SAN extension found') + } + ] }}" + when: node_section == 'server' + +- name: "CHECK 17 | Record result — server cert SAN contains private_ip" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 17b — [server] cert SAN contains server private_ip', + 'expected': 'IP:' + hostvars[inventory_hostname]['private_ip'] + ' in Subject Alternative Name', + 'actual': node_sans.stdout | default('NO SANs FOUND'), + 'status': '✅ PASS' if (tls_enabled and ('IP:' + hostvars[inventory_hostname]['private_ip']) in (node_sans.stdout | default(''))) + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled + else '❌ FAIL — server private_ip missing from cert SANs, client TLS verify will fail') + } + ] }}" + when: tls_enabled and node_section == 'server' + +- name: "CHECK 17 | Record result — server cert SAN contains ansible_host (DNS/hostname)" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 17c — [server] cert SAN contains ansible_host', + 'expected': 'DNS:' + hostvars[inventory_hostname]['ansible_host'] + ' in Subject Alternative Name', + 'actual': node_sans.stdout | default('NO SANs FOUND'), + 'status': '✅ PASS' if (tls_enabled and ('DNS:' + hostvars[inventory_hostname]['ansible_host']) in (node_sans.stdout | default(''))) + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled + else '⚠️ WARN — ansible_host not in cert SANs (only an issue if connecting by hostname)') + } + ] }}" + when: tls_enabled and node_section == 'server' + +# ----------------------------------------------------------------------- +# Proxy checks — verify server IP and hostname are in no_proxy +# ----------------------------------------------------------------------- + +- name: "CHECK 19 | Read no_proxy and NO_PROXY from systemd service" + ansible.builtin.shell: + cmd: systemctl show {{ service_name }} | grep Environ | grep -oE "(no_proxy|NO_PROXY)=[^ ]*" + register: proxy_env + changed_when: false + ignore_errors: true + when: node_section == 'client' + +- name: "CHECK 19 | Record result — no_proxy is set" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 19 — no_proxy/NO_PROXY is set in systemd service', + 'expected': 'no_proxy and NO_PROXY env vars present', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if proxy_env.stdout != '' else '⚠️ WARN — no proxy exclusions set, all traffic may go through proxy' + } + ] }}" + +- name: "CHECK 20 | Verify server private IP is in no_proxy (client node only)" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 20 — Server private IP ' + hostvars[groups[iag5_server_group][0]]['private_ip'] + ' is in no_proxy', + 'expected': hostvars[groups[iag5_server_group][0]]['private_ip'] + ' present in no_proxy', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if hostvars[groups[iag5_server_group][0]]['private_ip'] in (proxy_env.stdout | default('')) else '❌ FAIL — server IP not in no_proxy, gRPC will route through proxy' + } + ] }}" + when: node_section == 'client' + +- name: "CHECK 21 | Verify server hostname is in no_proxy (client node only)" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 21 — Server hostname ' + hostvars[groups[iag5_server_group][0]]['ansible_host'] + ' is in no_proxy', + 'expected': hostvars[groups[iag5_server_group][0]]['ansible_host'] + ' present in no_proxy', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if hostvars[groups[iag5_server_group][0]]['ansible_host'] in (proxy_env.stdout | default('')) else '❌ FAIL — server hostname not in no_proxy, gRPC will route through proxy' + } + ] }}" + when: node_section == 'client' + +# ----------------------------------------------------------------------- +# mTLS enforcement — server enforces client cert (test from client only) +# ----------------------------------------------------------------------- + +- name: "CHECK 22 | Verify server enforces mTLS (reject connection without client cert)" + ansible.builtin.shell: + cmd: > + echo Q | openssl s_client + -connect {{ hostvars[groups[iag5_server_group][0]]['private_ip'] }}:{{ server_port }} + -CAfile {{ ca_cert_path }} + &1 | grep -E "alert|handshake failure|error" + register: sv_mtls_enforce + changed_when: false + ignore_errors: true + when: node_section == 'client' and tls_enabled and eku_valid | default(false) and ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 22 | Record result — server enforces mTLS" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 22 — Server enforces mTLS (rejects connection without client cert)', + 'expected': 'Connection rejected — alert or handshake failure', + 'actual': sv_mtls_enforce.stdout | default('NO OUTPUT') if (eku_valid | default(false)) else 'N/A — skipped, EKU check failed', + 'status': '✅ PASS' if (eku_valid | default(false) and ('alert' in (sv_mtls_enforce.stdout | default('')) or 'handshake failure' in (sv_mtls_enforce.stdout | default('')))) + else ('⏭ SKIPPED — EKU invalid, mTLS will fail at handshake' if not (eku_valid | default(false)) + else '❌ FAIL — server accepted connection without client cert') + } + ] }}" + when: node_section == 'client' + +# ----------------------------------------------------------------------- +# Process environment check +# ----------------------------------------------------------------------- + +- name: "CHECK 23 | Get PID of running service" + ansible.builtin.shell: + cmd: systemctl show {{ service_name }} --property=MainPID | cut -d= -f2 + register: svc_pid + changed_when: false + ignore_errors: true + +- name: "CHECK 23 | Verify GATEWAY vars in running process environment" + ansible.builtin.shell: + cmd: > + cat /proc/{{ svc_pid.stdout }}/environ | tr '\0' '\n' | + grep -E "GATEWAY_APPLICATION_CA|GATEWAY_{{ node_section | upper }}" || true + register: proc_env + changed_when: false + ignore_errors: true + +- name: "CHECK 23 | Record result — process running and has GATEWAY vars" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 23 — Service is running and GATEWAY vars present in process environment', + 'expected': 'GATEWAY vars visible in /proc/PID/environ', + 'actual': proc_env.stdout_lines | default(['NOT FOUND']) | join(', ') if proc_env.stdout != '' else 'NOT FOUND — service may be using gateway.conf only (expected)', + 'status': '✅ PASS' if proc_env.stdout != '' else '⚠️ INFO — vars not in env, confirm gateway.conf is used' + } + ] }}" + +# ----------------------------------------------------------------------- +# Live mTLS connection CLIENT → SERVER (from client node only) +# ----------------------------------------------------------------------- + +- name: "CHECK 24 | Resolve server hostname/IP from client" + ansible.builtin.command: + cmd: "getent ahosts {{ hostvars[groups[iag5_server_group][0]]['private_ip'] }}" + register: sv_resolution + changed_when: false + ignore_errors: true + when: node_section == 'client' and tls_enabled + +- name: "CHECK 24 | Record result — server host resolution" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 24 — Server host resolves from client (' + hostvars[groups[iag5_server_group][0]]['private_ip'] + ')', + 'expected': 'Host resolves successfully', + 'actual': sv_resolution.stdout | default('FAILED TO RESOLVE'), + 'status': '✅ PASS' if sv_resolution.rc == 0 else '❌ FAIL' + } + ] }}" + when: node_section == 'client' + +- name: "CHECK 25 | Test TCP connectivity CLIENT → SERVER" + ansible.builtin.wait_for: + host: "{{ hostvars[groups[iag5_server_group][0]]['private_ip'] }}" + port: "{{ server_port }}" + timeout: 5 + register: tcp_check + ignore_errors: true + when: node_section == 'client' and tls_enabled + +- name: "CHECK 25 | Record result — TCP connectivity to server" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 25 — TCP connectivity CLIENT → SERVER (' + hostvars[groups[iag5_server_group][0]]['private_ip'] + ':' + server_port | string + ')', + 'expected': 'Port reachable within 5 seconds', + 'actual': 'REACHABLE' if not tcp_check.failed else 'UNREACHABLE — check security groups', + 'status': '✅ PASS' if not tcp_check.failed else '❌ FAIL' + } + ] }}" + when: node_section == 'client' + +- name: "CHECK 26 | Run TLS handshake with IP verification CLIENT → SERVER" + ansible.builtin.shell: | + set -o pipefail + openssl s_client \ + -connect {{ hostvars[groups[iag5_server_group][0]]['private_ip'] }}:{{ server_port }} \ + -verify_ip {{ hostvars[groups[iag5_server_group][0]]['private_ip'] }} \ + -verify_return_error \ + -cert {{ node_cert_path }} \ + -key {{ node_key_path }} \ + -CAfile {{ ca_cert_path }} \ + -showcerts + awk '/^\[application\]/{f=1} f && /^\[/{if(!/^\[application\]/) f=0} f && /ca_certificate_file/' {{ gateway_conf }} + register: ca_conf_line + changed_when: false + ignore_errors: true + +- name: "CHECK 1 | Extract ca_certificate_file path" + ansible.builtin.set_fact: + ca_cert_path: "{{ ca_conf_line.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + +- name: "CHECK 1 | Record result — ca_certificate_file set in [application]" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 1 — [application] ca_certificate_file is set', + 'expected': 'ca_certificate_file = /path/to/ca-bundle.crt', + 'actual': ca_conf_line.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if ca_conf_line.stdout != '' else '❌ FAIL' + } + ] }}" + +- name: "CHECK 2 | Verify ca_certificate_file exists on disk" + ansible.builtin.stat: + path: "{{ ca_cert_path }}" + register: ca_file_stat + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 2 | Record result — ca_certificate_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 2 — [application] ca_certificate_file exists on disk', + 'expected': 'File exists at ' + ca_cert_path, + 'actual': 'EXISTS' if (ca_file_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (ca_file_stat.stat.exists | default(false)) else '❌ FAIL' + } + ] }}" + +- name: "CHECK 3 | Verify CA bundle cert count" + ansible.builtin.command: + cmd: grep -c "BEGIN CERTIFICATE" {{ ca_cert_path }} + register: ca_count + changed_when: false + ignore_errors: true + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 3 | Record result — CA bundle cert count" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 3 — CA bundle cert count', + 'expected': 'At least 1 cert (root only is valid; 2 = root + intermediate)', + 'actual': ca_count.stdout | default('ERROR') + ' cert(s) found', + 'status': '✅ PASS' if ca_count.stdout | int >= 2 + else ('⚠️ WARN — only root CA present, no intermediate (valid but less secure)' + if ca_count.stdout | int == 1 + else '❌ FAIL — no certificates found in CA bundle') + } + ] }}" + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 4 | Verify CA cert has CA:TRUE" + ansible.builtin.shell: + cmd: openssl x509 -in {{ ca_cert_path }} -noout -text | grep -A1 "CA:" + register: ca_basic_constraints + changed_when: false + ignore_errors: true + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 4 | Record result — CA cert has CA:TRUE" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 4 — CA cert has CA:TRUE (can sign other certs)', + 'expected': 'CA:TRUE', + 'actual': ca_basic_constraints.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if 'TRUE' in (ca_basic_constraints.stdout | default('')) else '❌ FAIL' + } + ] }}" + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 5 | Verify last cert in CA bundle is self-signed root (subject hash == issuer hash)" + ansible.builtin.shell: + cmd: > + openssl x509 -in <(awk '/-----BEGIN CERTIFICATE-----/{c++} c==2{print}' {{ ca_cert_path }}) + -noout -subject_hash && + openssl x509 -in <(awk '/-----BEGIN CERTIFICATE-----/{c++} c==2{print}' {{ ca_cert_path }}) + -noout -issuer_hash + args: + executable: /bin/bash + register: ca_hash_check + changed_when: false + ignore_errors: true + when: ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 5 | Record result — last cert in CA bundle is self-signed root" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 5 — Last cert in CA bundle is self-signed root (subject hash == issuer hash)', + 'expected': 'Both hashes match', + 'actual': ca_hash_check.stdout | default('ERROR'), + 'status': '✅ PASS' if (ca_hash_check.stdout_lines | default([]) | length == 2 and ca_hash_check.stdout_lines[0] == ca_hash_check.stdout_lines[1]) else '❌ FAIL' + } + ] }}" + when: ca_cert_path is defined and ca_cert_path != '' + +# ----------------------------------------------------------------------- +# [server] or [runner] section — read use_tls first, gate cert checks on it +# ----------------------------------------------------------------------- + +- name: "CHECK 6 | Read use_tls — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /use_tls/' {{ gateway_conf }} + register: node_usetls + changed_when: false + ignore_errors: true + +- name: "CHECK 6 | Set tls_enabled fact" + ansible.builtin.set_fact: + tls_enabled: "{{ 'true' in (node_usetls.stdout | default('')) }}" + +- name: "CHECK 6 | Record result — use_tls — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 6 — [' + node_section + '] use_tls', + 'expected': 'use_tls = true', + 'actual': node_usetls.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if tls_enabled else '❌ FAIL — TLS disabled, skipping cert checks' + } + ] }}" + +# ----------------------------------------------------------------------- +# CHECK 7 — server specific: distributed_execution +# runner specific: listen_address +# ----------------------------------------------------------------------- + +- name: "CHECK 7 | Read distributed_execution from [server] section" + ansible.builtin.shell: + cmd: > + awk '/^\[server\]/{f=1} f && /^\[/{if(!/^\[server\]/) f=0} f && /distributed_execution/' {{ gateway_conf }} + register: dist_exec + changed_when: false + ignore_errors: true + when: node_section == 'server' + +- name: "CHECK 7 | Record result — [server] distributed_execution" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 7 — [server] distributed_execution = true', + 'expected': 'distributed_execution = true', + 'actual': dist_exec.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if 'true' in (dist_exec.stdout | default('')) else '❌ FAIL' + } + ] }}" + when: node_section == 'server' + +- name: "CHECK 7 | Read listen_address from [runner] section" + ansible.builtin.shell: + cmd: > + awk '/^\[runner\]/{f=1} f && /^\[/{if(!/^\[runner\]/) f=0} f && /listen_address/' {{ gateway_conf }} + register: runner_listen + changed_when: false + ignore_errors: true + when: node_section == 'runner' + +- name: "CHECK 7 | Record result — [runner] listen_address" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 7 — [runner] listen_address is set', + 'expected': 'listen_address = ', + 'actual': runner_listen.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if runner_listen.stdout != '' else '❌ FAIL' + } + ] }}" + when: node_section == 'runner' + +# ----------------------------------------------------------------------- +# Runner SAN IP validation (runner only) +# Three-leg check: inventory private_ip → cert SAN → actual interface IP +# ----------------------------------------------------------------------- + +- name: "CHECK 8 | Get actual interface IPs on runner" + ansible.builtin.shell: + cmd: ip addr show | grep "inet " | awk '{print $2}' | cut -d/ -f1 + register: runner_interface_ips + changed_when: false + ignore_errors: true + when: node_section == 'runner' + +- name: "CHECK 8 | Record result — inventory private_ip present on runner interface" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 8a — [runner] inventory private_ip present on actual interface', + 'expected': 'IP ' + hostvars[inventory_hostname]['private_ip'] + ' found on interface', + 'actual': runner_interface_ips.stdout_lines | default([]) | join(', '), + 'status': '✅ PASS' if hostvars[inventory_hostname]['private_ip'] in (runner_interface_ips.stdout_lines | default([])) else '❌ FAIL — private_ip not found on any interface' + } + ] }}" + when: node_section == 'runner' + + +# ----------------------------------------------------------------------- +# CHECK 9-16 — cert checks, same logic for both nodes +# ----------------------------------------------------------------------- + +- name: "CHECK 9 | Read certificate_file — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /certificate_file/' {{ gateway_conf }} + register: node_cert_conf + changed_when: false + ignore_errors: true + when: tls_enabled + +- name: "CHECK 9 | Extract certificate_file path" + ansible.builtin.set_fact: + node_cert_path: "{{ node_cert_conf.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: tls_enabled + +- name: "CHECK 9 | Record result — certificate_file set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 9 — [' + node_section + '] certificate_file is set', + 'expected': 'certificate_file = /path/to/cert', + 'actual': node_cert_conf.stdout | default('NOT FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and (node_cert_conf.stdout | default('')) != '') else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 10 | Verify certificate_file exists on disk" + ansible.builtin.stat: + path: "{{ node_cert_path }}" + register: node_cert_stat + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 10 | Record result — certificate_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 10 — [' + node_section + '] certificate_file exists on disk', + 'expected': 'File exists at ' + (node_cert_path | default('N/A')), + 'actual': 'EXISTS' if (node_cert_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (node_cert_stat.stat.exists | default(false)) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 10b | Get SANs from runner cert" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A2 "Subject Alternative Name" + register: runner_cert_sans + changed_when: false + ignore_errors: true + when: node_section == 'runner' and tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 10b | Record result — runner inventory private_ip present in cert SANs" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 10b — [runner] inventory private_ip present in cert SANs', + 'expected': 'IP:' + hostvars[inventory_hostname]['private_ip'] + ' in Subject Alternative Name', + 'actual': runner_cert_sans.stdout | default('NO SANs FOUND'), + 'status': '✅ PASS' if ('IP:' + hostvars[inventory_hostname]['private_ip']) in (runner_cert_sans.stdout | default('')) + else '❌ FAIL — runner private_ip missing from cert SANs, TLS will reject connection' + } + ] }}" + when: node_section == 'runner' and tls_enabled + +- name: "CHECK 11 | Read private_key_file — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /private_key_file/' {{ gateway_conf }} + register: node_key_conf + changed_when: false + ignore_errors: true + when: tls_enabled + +- name: "CHECK 11 | Extract private_key_file path" + ansible.builtin.set_fact: + node_key_path: "{{ node_key_conf.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: tls_enabled + +- name: "CHECK 11 | Record result — private_key_file set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 11 — [' + node_section + '] private_key_file is set', + 'expected': 'private_key_file = /path/to/key', + 'actual': node_key_conf.stdout | default('NOT FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and (node_key_conf.stdout | default('')) != '') else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 12 | Verify private_key_file exists on disk" + ansible.builtin.stat: + path: "{{ node_key_path }}" + register: node_key_stat + when: tls_enabled and node_key_path is defined and node_key_path != '' + +- name: "CHECK 12 | Record result — private_key_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 12 — [' + node_section + '] private_key_file exists on disk', + 'expected': 'File exists at ' + (node_key_path | default('N/A')), + 'actual': 'EXISTS' if (node_key_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (node_key_stat.stat.exists | default(false)) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 13 | Get cert modulus" + ansible.builtin.shell: + cmd: openssl x509 -noout -modulus -in {{ node_cert_path }} | md5sum + register: node_cert_mod + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 13 | Get key modulus" + ansible.builtin.shell: + cmd: openssl rsa -noout -modulus -in {{ node_key_path }} | md5sum + register: node_key_mod + changed_when: false + ignore_errors: true + when: tls_enabled and node_key_path is defined and node_key_path != '' + +- name: "CHECK 13 | Record result — cert and key matched pair" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 13 — [' + node_section + '] cert and key are a matched pair', + 'expected': 'Matching md5 hashes', + 'actual': 'cert=' + (node_cert_mod.stdout | default('ERROR')) + ' key=' + (node_key_mod.stdout | default('ERROR')) if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and node_cert_mod.stdout == node_key_mod.stdout) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 14 | Verify cert not expired" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -checkend 0 && openssl x509 -in {{ node_cert_path }} -noout -dates + register: node_expiry + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 14 | Record result — cert not expired" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 14 — [' + node_section + '] cert is not expired', + 'expected': 'Certificate is valid', + 'actual': node_expiry.stdout | default(node_expiry.stderr | default('ERROR')) if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and node_expiry.rc == 0) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 15 | Get days remaining on cert" + ansible.builtin.shell: + cmd: > + echo $(( ( $(date -d "$(openssl x509 -enddate -noout + -in {{ node_cert_path }} + | cut -d= -f2)" +%s) - $(date +%s) ) / 86400 )) + register: node_days_remaining + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 15 | Record result — cert days remaining" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 15 — [' + node_section + '] cert days remaining until expiry', + 'expected': 'More than 30 days remaining', + 'actual': (node_days_remaining.stdout | default('ERROR')) + ' days remaining', + 'status': '✅ PASS' if (node_days_remaining.stdout | default('0') | int > 30) + else ('⚠️ WARN — expiring within 30 days' if (node_days_remaining.stdout | default('0') | int > 0) + else '❌ FAIL — expired') + } + ] }}" + +- name: "CHECK 16 | Verify cert is not a self-signed leaf (subject != issuer)" + ansible.builtin.command: + cmd: openssl x509 -in {{ node_cert_path }} -noout -subject -issuer + register: node_self_signed + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 16 | Record result — cert is not self-signed leaf" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 16 — [' + node_section + '] cert is not a self-signed leaf (subject != issuer)', + 'expected': 'subject != issuer (CA-signed)', + 'actual': node_self_signed.stdout | default('ERROR') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and + node_self_signed.stdout_lines | default([]) | length == 2 and + node_self_signed.stdout_lines[0] != node_self_signed.stdout_lines[1]) + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL — self-signed leaf rejected by cluster TLS') + } + ] }}" + +- name: "CHECK 17 | Get SANs from cert" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A2 "Subject Alternative Name" + register: node_sans + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 17 | Record result — cert SANs present" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 17a — [' + node_section + '] cert SANs present', + 'expected': 'Subject Alternative Name extension present', + 'actual': node_sans.stdout | default('NO SANs FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and node_sans.stdout != '') else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL — no SAN extension found') + } + ] }}" + +- name: "CHECK 17 | Record result — server cert SAN contains private_ip" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 17b — [server] cert SAN contains server private_ip', + 'expected': 'IP:' + hostvars[inventory_hostname]['private_ip'] + ' in Subject Alternative Name', + 'actual': node_sans.stdout | default('NO SANs FOUND'), + 'status': '✅ PASS' if (tls_enabled and ('IP:' + hostvars[inventory_hostname]['private_ip']) in (node_sans.stdout | default(''))) + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled + else '❌ FAIL — server private_ip missing from cert SANs') + } + ] }}" + when: tls_enabled and node_section == 'server' + +- name: "CHECK 17 | Record result — server cert SAN contains ansible_host (DNS/hostname)" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 17c — [server] cert SAN contains ansible_host', + 'expected': 'DNS:' + hostvars[inventory_hostname]['ansible_host'] + ' in Subject Alternative Name', + 'actual': node_sans.stdout | default('NO SANs FOUND'), + 'status': '✅ PASS' if (tls_enabled and ('DNS:' + hostvars[inventory_hostname]['ansible_host']) in (node_sans.stdout | default(''))) + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled + else '⚠️ WARN — ansible_host not in cert SANs (only an issue if connecting by hostname)') + } + ] }}" + when: tls_enabled and node_section == 'server'" + +- name: "CHECK 18 | Verify cert signed by CA" + ansible.builtin.command: + cmd: openssl verify -CAfile {{ ca_cert_path }} {{ node_cert_path }} + register: node_verify + changed_when: false + ignore_errors: true + when: tls_enabled and ca_cert_path is defined and ca_cert_path != '' and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 18 | Record result — cert signed by CA" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 18 — [' + node_section + '] cert is signed by CA', + 'expected': 'OK', + 'actual': node_verify.stdout | default(node_verify.stderr | default('ERROR')) if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and 'OK' in (node_verify.stdout | default(''))) else ('⏭ SKIPPED — TLS disabled' if not tls_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 19 | Verify extendedKeyUsage on cert (needs serverAuth + clientAuth for mTLS)" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A3 "Extended Key Usage" + register: node_eku + changed_when: false + ignore_errors: true + when: tls_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 19 | Record result — cert extendedKeyUsage" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 19 — [' + node_section + '] cert has both serverAuth and clientAuth in extendedKeyUsage', + 'expected': 'TLS Web Server Authentication, TLS Web Client Authentication', + 'actual': node_eku.stdout | default('NOT FOUND') if tls_enabled else 'N/A', + 'status': '✅ PASS' if (tls_enabled and + 'Server Authentication' in (node_eku.stdout | default('')) and + 'Client Authentication' in (node_eku.stdout | default(''))) + else ('⚠️ WARN — extendedKeyUsage not set, relying on defaults' if (tls_enabled and node_eku.stdout == '') + else ('⏭ SKIPPED — TLS disabled' if not tls_enabled + else '❌ FAIL — missing clientAuth or serverAuth, mTLS handshake will be rejected')) + } + ] }}" + +- name: "CHECK 19 | Set eku_valid gate — both serverAuth and clientAuth must be present" + ansible.builtin.set_fact: + eku_valid: "{{ tls_enabled and + 'Server Authentication' in (node_eku.stdout | default('')) and + 'Client Authentication' in (node_eku.stdout | default('')) }}" + +# ----------------------------------------------------------------------- +# Server enforces mTLS — server node only +# (CHECK 20b — separate from EKU check above) +# ----------------------------------------------------------------------- + +- name: "CHECK 20 | Verify runner enforces mTLS (reject connection without client cert)" + ansible.builtin.shell: | + set -o pipefail + echo Q | openssl s_client \ + -connect {{ hostvars[groups[iag5_runner_group][0]]['private_ip'] }}:{{ runner_port }} \ + -CAfile {{ ca_cert_path }} \ + &1 | grep -E "alert|handshake failure|error" + args: + executable: /bin/bash + register: sv_mtls_enforce + changed_when: false + ignore_errors: true + when: node_section == 'server' and tls_enabled and eku_valid | default(false) and ca_cert_path is defined and ca_cert_path != '' + +- name: "CHECK 20 | Record result — runner enforces mTLS" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 20 — Runner enforces mTLS (rejects connection without client cert)', + 'expected': 'Connection rejected — alert or handshake failure', + 'actual': sv_mtls_enforce.stdout | default('NO OUTPUT') if (eku_valid | default(false)) else 'N/A — skipped, EKU check failed', + 'status': '✅ PASS' if (eku_valid | default(false) and ('alert' in (sv_mtls_enforce.stdout | default('')) or 'handshake failure' in (sv_mtls_enforce.stdout | default('')))) + else ('⏭ SKIPPED — EKU invalid, mTLS will fail at handshake' if not (eku_valid | default(false)) + else '❌ FAIL — runner accepted connection without client cert') + } + ] }}" + when: node_section == 'server' + +# ----------------------------------------------------------------------- +# Process environment check +# ----------------------------------------------------------------------- + +- name: "CHECK 21 | Get PID of running service" + ansible.builtin.shell: + cmd: systemctl show {{ service_name }} --property=MainPID | cut -d= -f2 + register: svc_pid + changed_when: false + ignore_errors: true + +- name: "CHECK 21 | Verify GATEWAY vars in running process environment" + ansible.builtin.shell: + cmd: > + cat /proc/{{ svc_pid.stdout }}/environ | tr '\0' '\n' | + grep -E "GATEWAY_APPLICATION_CA|GATEWAY_{{ node_section | upper }}" || true + register: proc_env + changed_when: false + ignore_errors: true + +- name: "CHECK 21 | Record result — process running and has GATEWAY vars" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 21 — Service is running and GATEWAY vars present in process environment', + 'expected': 'GATEWAY vars visible in /proc/PID/environ', + 'actual': proc_env.stdout_lines | default(['NOT FOUND']) | join(', ') if proc_env.stdout != '' else 'NOT FOUND — service may be using gateway.conf only (expected)', + 'status': '✅ PASS' if proc_env.stdout != '' else '⚠️ INFO — vars not in env, confirm gateway.conf is used' + } + ] }}" + +# ----------------------------------------------------------------------- +# Proxy checks — SERVER only +# Verify runner hostnames and IPs are in no_proxy +# Go reads proxy env vars for all outbound connections including gRPC +# openssl s_client bypasses proxy so this must be checked separately +# ----------------------------------------------------------------------- + +- name: "CHECK 22 | Read no_proxy and NO_PROXY from systemd service" + ansible.builtin.shell: + cmd: systemctl show {{ service_name }} | grep Environ | grep -oE "(no_proxy|NO_PROXY)=[^ ]*" + register: proxy_env + changed_when: false + ignore_errors: true + when: node_section == 'server' + +- name: "CHECK 22 | Record result — no_proxy is set" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 22 — no_proxy/NO_PROXY is set in systemd service', + 'expected': 'no_proxy and NO_PROXY env vars present', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if proxy_env.stdout != '' else '⚠️ WARN — no proxy exclusions set, all traffic may go through proxy' + } + ] }}" + when: node_section == 'server' + +- name: "CHECK 23 | Verify each runner private IP is in no_proxy" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 23 — Runner private IP ' + hostvars[item]['private_ip'] + ' is in no_proxy', + 'expected': hostvars[item]['private_ip'] + ' present in no_proxy', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if hostvars[item]['private_ip'] in (proxy_env.stdout | default('')) else '❌ FAIL — runner IP not in no_proxy, gRPC will route through proxy' + } + ] }}" + loop: "{{ groups[iag5_runner_group] }}" + loop_control: + label: "{{ hostvars[item]['private_ip'] }}" + when: node_section == 'server' + +- name: "CHECK 24 | Verify each runner hostname is in no_proxy" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 24 — Runner hostname ' + hostvars[item]['ansible_host'] + ' is in no_proxy', + 'expected': hostvars[item]['ansible_host'] + ' present in no_proxy', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if hostvars[item]['ansible_host'] in (proxy_env.stdout | default('')) else '❌ FAIL — runner hostname not in no_proxy, gRPC will route through proxy' + } + ] }}" + loop: "{{ groups[iag5_runner_group] }}" + loop_control: + label: "{{ hostvars[item]['ansible_host'] }}" + when: node_section == 'server' + +# ----------------------------------------------------------------------- +# Live mTLS connection — SERVER only, loops over all runners +# ----------------------------------------------------------------------- + +- name: "CHECK 25 | Resolve all runner IPs from server" + ansible.builtin.command: + cmd: "getent ahosts {{ hostvars[item]['private_ip'] }}" + register: runner_resolutions + changed_when: false + ignore_errors: true + loop: "{{ groups[iag5_runner_group] }}" + loop_control: + label: "{{ hostvars[item]['private_ip'] }}" + when: node_section == 'server' and tls_enabled + +- name: "CHECK 25 | Record result — runner host resolution" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 25 — Runner host resolves from server (' + hostvars[item.item]['private_ip'] + ')', + 'expected': 'Host resolves successfully', + 'actual': item.stdout | default('FAILED TO RESOLVE'), + 'status': '✅ PASS' if item.rc == 0 else '❌ FAIL' + } + ] }}" + loop: "{{ runner_resolutions.results }}" + loop_control: + label: "{{ hostvars[item.item]['private_ip'] }}" + when: node_section == 'server' and tls_enabled + +- name: "CHECK 26 | Test TCP connectivity SERVER → all runners" + ansible.builtin.wait_for: + host: "{{ hostvars[item]['private_ip'] }}" + port: "{{ runner_port }}" + timeout: 5 + register: tcp_checks + ignore_errors: true + loop: "{{ groups[iag5_runner_group] }}" + loop_control: + label: "{{ hostvars[item]['private_ip'] }}" + when: node_section == 'server' and tls_enabled + +- name: "CHECK 26 | Record result — TCP connectivity to all runners" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 26 — TCP connectivity SERVER → RUNNER (' + hostvars[item.item]['private_ip'] + ':' + runner_port | string + ')', + 'expected': 'Port reachable within 5 seconds', + 'actual': 'REACHABLE' if not item.failed else 'UNREACHABLE — check security groups', + 'status': '✅ PASS' if not item.failed else '❌ FAIL' + } + ] }}" + loop: "{{ tcp_checks.results }}" + loop_control: + label: "{{ hostvars[item.item]['private_ip'] }}" + when: node_section == 'server' and tls_enabled + +- name: "CHECK 27 | Run TLS handshake with IP verification SERVER → all runners" + ansible.builtin.shell: | + set -o pipefail + openssl s_client \ + -connect {{ hostvars[item]['private_ip'] }}:{{ runner_port }} \ + -verify_ip {{ hostvars[item]['private_ip'] }} \ + -verify_return_error \ + -cert {{ node_cert_path }} \ + -key {{ node_key_path }} \ + -CAfile {{ ca_cert_path }} \ + -showcerts + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /^\s*enabled/' {{ gateway_conf }} + register: connect_enabled + changed_when: false + ignore_errors: true + +- name: "CHECK 1 | Set connect_is_enabled fact" + ansible.builtin.set_fact: + connect_is_enabled: "{{ 'true' in (connect_enabled.stdout | default('')) }}" + +- name: "CHECK 1 | Record result — enabled — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 1 — [' + node_section + '] enabled', + 'expected': 'enabled = true', + 'actual': connect_enabled.stdout | default('NOT FOUND'), + 'status': '✅ PASS' if connect_is_enabled else '❌ FAIL — Connect disabled, skipping all connect cert checks' + } + ] }}" + +# ----------------------------------------------------------------------- +# [application] section — cluster_id +# ----------------------------------------------------------------------- + +- name: "CHECK 2 | Read cluster_id from [application] section" + ansible.builtin.shell: + cmd: > + awk '/^\[application\]/{f=1} f && /^\[/{if(!/^\[application\]/) f=0} f && /cluster_id/' {{ gateway_conf }} + register: cluster_id_conf + changed_when: false + ignore_errors: true + +- name: "CHECK 2 | Record result — cluster_id set and not default" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 2 — [application] cluster_id set and not default (cluster_1 causes collisions in GWM)', + 'expected': 'cluster_id = (not cluster_1)', + 'actual': cluster_id_conf.stdout | default('NOT SET — defaults to cluster_1'), + 'status': '✅ PASS' if (cluster_id_conf.stdout != '' and 'cluster_1' not in cluster_id_conf.stdout) else '❌ FAIL' + } + ] }}" + +# ----------------------------------------------------------------------- +# [connect] section — hosts, cert, key (gated on connect_is_enabled) +# ----------------------------------------------------------------------- + +- name: "CHECK 3 | Read hosts — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /^\s*hosts/' {{ gateway_conf }} + register: connect_hosts + changed_when: false + ignore_errors: true + when: connect_is_enabled + +- name: "CHECK 3 | Extract GWM endpoint from hosts" + ansible.builtin.set_fact: + gwm_endpoint: "{{ connect_hosts.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: connect_is_enabled and connect_hosts.stdout != '' + +- name: "CHECK 3 | Record result — hosts set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 3 — [' + node_section + '] hosts is set (IP:port of Gateway Manager)', + 'expected': 'hosts = :', + 'actual': connect_hosts.stdout | default('NOT FOUND') if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and connect_hosts.stdout != '') else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 4 | Read certificate_file — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /certificate_file/' {{ gateway_conf }} + register: node_cert_conf + changed_when: false + ignore_errors: true + when: connect_is_enabled + +- name: "CHECK 4 | Extract certificate_file path" + ansible.builtin.set_fact: + node_cert_path: "{{ node_cert_conf.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: connect_is_enabled and node_cert_conf.stdout != '' + +- name: "CHECK 4 | Record result — certificate_file set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 4 — [' + node_section + '] certificate_file is set', + 'expected': 'certificate_file = /path/to/cert', + 'actual': node_cert_conf.stdout | default('NOT FOUND') if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and node_cert_conf.stdout != '') else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 5 | Verify certificate_file exists on disk" + ansible.builtin.stat: + path: "{{ node_cert_path }}" + register: node_cert_stat + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 5 | Record result — certificate_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 5 — [' + node_section + '] certificate_file exists on disk', + 'expected': 'File exists at ' + (node_cert_path | default('N/A')), + 'actual': 'EXISTS' if (node_cert_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (node_cert_stat.stat.exists | default(false)) else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 6 | Read private_key_file — node section {{ node_section }}" + ansible.builtin.shell: + cmd: > + awk '/^\[{{ node_section }}\]/{f=1} f && /^\[/{if(!/^\[{{ node_section }}\]/) f=0} f && /private_key_file/' {{ gateway_conf }} + register: node_key_conf + changed_when: false + ignore_errors: true + when: connect_is_enabled + +- name: "CHECK 6 | Extract private_key_file path" + ansible.builtin.set_fact: + node_key_path: "{{ node_key_conf.stdout.split('=')[1] | trim | replace(\"'\", '') | replace('\"', '') }}" + when: connect_is_enabled and node_key_conf.stdout != '' + +- name: "CHECK 6 | Record result — private_key_file set — node section {{ node_section }}" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 6 — [' + node_section + '] private_key_file is set', + 'expected': 'private_key_file = /path/to/key', + 'actual': node_key_conf.stdout | default('NOT FOUND') if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and node_key_conf.stdout != '') else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 7 | Verify private_key_file exists on disk" + ansible.builtin.stat: + path: "{{ node_key_path }}" + register: node_key_stat + when: connect_is_enabled and node_key_path is defined and node_key_path != '' + +- name: "CHECK 7 | Record result — private_key_file exists on disk" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 7 — [' + node_section + '] private_key_file exists on disk', + 'expected': 'File exists at ' + (node_key_path | default('N/A')), + 'actual': 'EXISTS' if (node_key_stat.stat.exists | default(false)) else 'NOT FOUND on disk', + 'status': '✅ PASS' if (node_key_stat.stat.exists | default(false)) else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 8 | Get cert modulus" + ansible.builtin.shell: + cmd: openssl x509 -noout -modulus -in {{ node_cert_path }} | md5sum + register: node_cert_mod + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 8 | Get key modulus" + ansible.builtin.shell: + cmd: openssl rsa -noout -modulus -in {{ node_key_path }} | md5sum + register: node_key_mod + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_key_path is defined and node_key_path != '' + +- name: "CHECK 8 | Record result — cert and key matched pair" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 8 — [' + node_section + '] cert and key are a matched pair', + 'expected': 'Matching md5 hashes', + 'actual': 'cert=' + (node_cert_mod.stdout | default('ERROR')) + ' key=' + (node_key_mod.stdout | default('ERROR')) if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and node_cert_mod.stdout == node_key_mod.stdout) else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 9 | Verify cert not expired" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -checkend 0 && openssl x509 -in {{ node_cert_path }} -noout -dates + register: node_expiry + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 9 | Record result — cert not expired" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 9 — [' + node_section + '] cert is not expired', + 'expected': 'Certificate is valid', + 'actual': node_expiry.stdout | default(node_expiry.stderr | default('ERROR')) if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and node_expiry.rc == 0) else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + +- name: "CHECK 10 | Get days remaining on cert" + ansible.builtin.shell: + cmd: echo $(( ( $(date -d "$(openssl x509 -enddate -noout -in {{ node_cert_path }} | cut -d= -f2)" +%s) - $(date +%s) ) / 86400 )) + register: node_days_remaining + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 10 | Record result — cert days remaining" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 10 — [' + node_section + '] cert days remaining until expiry', + 'expected': 'More than 30 days remaining', + 'actual': (node_days_remaining.stdout | default('ERROR')) + ' days remaining', + 'status': '✅ PASS' if (node_days_remaining.stdout | default('0') | int > 30) + else ('⚠️ WARN — expiring within 30 days' if (node_days_remaining.stdout | default('0') | int > 0) + else '❌ FAIL — expired') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 11 | Identify cert type (self-signed leaf vs CA-signed — both valid for connect)" + ansible.builtin.command: + cmd: openssl x509 -in {{ node_cert_path }} -noout -subject -issuer + register: node_self_signed + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 11 | Record result — cert type" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 11 — [' + node_section + '] cert type (self-signed leaf is OK for connect per doc)', + 'expected': 'Self-signed (subject == issuer) or CA-signed (subject != issuer) — both valid', + 'actual': node_self_signed.stdout | default('ERROR') if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and node_self_signed.stdout != '') else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 12 | Verify extendedKeyUsage on cert (clientAuth needed for GWM auth)" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A3 "Extended Key Usage" + register: node_eku + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 12 | Record result — cert extendedKeyUsage" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 12 — [' + node_section + '] cert extendedKeyUsage (clientAuth needed for GWM auth)', + 'expected': 'TLS Web Client Authentication (at minimum)', + 'actual': node_eku.stdout | default('NOT FOUND — may be self-signed leaf with no EKU') if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and 'Client Authentication' in (node_eku.stdout | default(''))) + else ('⚠️ INFO — no extendedKeyUsage set, self-signed leaf without EKU is valid for connect per doc' if (connect_is_enabled and node_eku.stdout == '') + else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled + else '⚠️ WARN — EKU present but clientAuth missing')) + } + ] }}" + when: connect_is_enabled + +# ----------------------------------------------------------------------- +# SAN check — verify server cert SANs contain private_ip and ansible_host +# ----------------------------------------------------------------------- + +- name: "CHECK 12b | Get SANs from server cert" + ansible.builtin.shell: + cmd: openssl x509 -in {{ node_cert_path }} -noout -text | grep -A2 "Subject Alternative Name" + register: node_sans + changed_when: false + ignore_errors: true + when: connect_is_enabled and node_cert_path is defined and node_cert_path != '' + +- name: "CHECK 12b | Record result — cert SANs present" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 12b — [' + node_section + '] cert SANs present', + 'expected': 'Subject Alternative Name extension present', + 'actual': node_sans.stdout | default('NO SANs FOUND') if connect_is_enabled else 'N/A', + 'status': '✅ PASS' if (connect_is_enabled and node_sans.stdout != '') + else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled + else '⚠️ WARN — no SAN extension (self-signed leaf without SANs may still work for connect)') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 12c | Record result — cert SAN contains server private_ip" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 12c — [' + node_section + '] cert SAN contains server private_ip', + 'expected': 'IP:' + hostvars[inventory_hostname]['private_ip'] + ' in Subject Alternative Name', + 'actual': node_sans.stdout | default('NO SANs FOUND'), + 'status': '✅ PASS' if (connect_is_enabled and ('IP:' + hostvars[inventory_hostname]['private_ip']) in (node_sans.stdout | default(''))) + else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled + else '⚠️ WARN — private_ip not in cert SANs (required if GWM validates client cert by IP)') + } + ] }}" + when: connect_is_enabled and node_sans is defined + +# ----------------------------------------------------------------------- +# Proxy checks — verify GWM host is in no_proxy (if internal) +# ----------------------------------------------------------------------- + +- name: "CHECK 13 | Read no_proxy and NO_PROXY from systemd service" + ansible.builtin.shell: + cmd: systemctl show {{ service_name }} | grep Environ | grep -oE "(no_proxy|NO_PROXY)=[^ ]*" + register: proxy_env + changed_when: false + ignore_errors: true + +- name: "CHECK 13 | Record result — no_proxy is set" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 13 — no_proxy/NO_PROXY is set in systemd service', + 'expected': 'no_proxy and NO_PROXY env vars present', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if proxy_env.stdout != '' else '⚠️ WARN — no proxy exclusions set' + } + ] }}" + +- name: "CHECK 14 | Verify GWM host exclusion in no_proxy" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 14 — GWM host ' + (gwm_endpoint.split(':')[0] | default('N/A')) + ' exclusion in no_proxy', + 'expected': 'GWM host in no_proxy OR proxy intentionally used for GWM', + 'actual': proxy_env.stdout | default('NOT SET'), + 'status': '✅ PASS' if (gwm_endpoint.split(':')[0] | default('')) in (proxy_env.stdout | default('')) else '⚠️ WARN — GWM host not in no_proxy (may be intentional if GWM is external)' + } + ] }}" + when: connect_is_enabled and gwm_endpoint is defined and gwm_endpoint != '' + +# ----------------------------------------------------------------------- +# GWM connectivity checks +# ----------------------------------------------------------------------- + +- name: "CHECK 15 | Set GWM host and port" + ansible.builtin.set_fact: + gwm_host: "{{ gwm_endpoint.split(':')[0] }}" + gwm_port: "{{ gwm_endpoint.split(':')[1] }}" + when: connect_is_enabled and gwm_endpoint is defined and gwm_endpoint != '' + +- name: "CHECK 15 | Resolve GWM host from server" + ansible.builtin.command: + cmd: "getent ahosts {{ gwm_host }}" + register: gwm_resolution + changed_when: false + ignore_errors: true + when: connect_is_enabled and gwm_host is defined and gwm_host != '' + +- name: "CHECK 15 | Record result — GWM host resolution" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 15 — GWM host resolves from server (' + (gwm_host | default('N/A')) + ')', + 'expected': 'Host resolves successfully', + 'actual': gwm_resolution.stdout | default('FAILED TO RESOLVE'), + 'status': '✅ PASS' if (gwm_resolution.rc | default(1)) == 0 else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 16 | Test TCP connectivity SERVER → GWM" + ansible.builtin.wait_for: + host: "{{ gwm_host }}" + port: "{{ gwm_port }}" + timeout: 5 + register: tcp_check + ignore_errors: true + when: connect_is_enabled and gwm_host is defined and gwm_host != '' + +- name: "CHECK 16 | Record result — TCP connectivity to GWM" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 16 — TCP connectivity SERVER → GWM (' + (gwm_host | default('N/A')) + ':' + (gwm_port | default('N/A') | string) + ')', + 'expected': 'Port reachable within 5 seconds', + 'actual': 'REACHABLE' if not tcp_check.failed else 'UNREACHABLE — check security groups', + 'status': '✅ PASS' if not tcp_check.failed else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '❌ FAIL') + } + ] }}" + when: connect_is_enabled + +- name: "CHECK 17 | Verify GWM server cert trusted by OS CA pool (Layer 1 one-way TLS)" + ansible.builtin.shell: | + set -o pipefail + openssl s_client \ + -connect {{ gwm_host }}:{{ gwm_port }} \ + -verify_ip {{ gwm_host }} \ + -verify_return_error \ + -CAfile {{ os_ca_bundle }} \ + -showcerts &1 + register: gwm_ws_test + changed_when: false + ignore_errors: true + when: connect_is_enabled and gwm_host is defined and gwm_host != '' + +- name: "CHECK 18 | Record result — WebSocket connectivity to GWM" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 18 — WebSocket connectivity to GWM (wss://' + (gwm_host | default('N/A')) + ':' + (gwm_port | default('N/A') | string) + '/ws)', + 'expected': 'HTTP 101 Switching Protocols', + 'actual': 'HTTP ' + (gwm_ws_test.stdout | default('NO RESPONSE')), + 'status': '✅ PASS' if '101' in (gwm_ws_test.stdout | default('')) else ('⏭ SKIPPED — Connect disabled' if not connect_is_enabled else '⚠️ WARN — got HTTP ' + (gwm_ws_test.stdout | default('NO RESPONSE')) + ' (101 expected)') + } + ] }}" + when: connect_is_enabled + +# ----------------------------------------------------------------------- +# Process environment check +# ----------------------------------------------------------------------- + +- name: "CHECK 19 | Get PID of running service" + ansible.builtin.shell: + cmd: systemctl show {{ service_name }} --property=MainPID | cut -d= -f2 + register: svc_pid + changed_when: false + ignore_errors: true + +- name: "CHECK 19 | Verify GATEWAY_CONNECT vars in running process environment" + ansible.builtin.shell: + cmd: cat /proc/{{ svc_pid.stdout }}/environ | tr '\0' '\n' | grep -E "GATEWAY_CONNECT|GATEWAY_APPLICATION_CLUSTER" || true + register: proc_env + changed_when: false + ignore_errors: true + +- name: "CHECK 19 | Record result — process running and has GATEWAY_CONNECT vars" + ansible.builtin.set_fact: + check_results: "{{ check_results + [ + { + 'check': 'CHECK 19 — Service is running and GATEWAY_CONNECT vars present in process environment', + 'expected': 'GATEWAY_CONNECT vars visible in /proc/PID/environ', + 'actual': proc_env.stdout_lines | default(['NOT FOUND']) | join(', ') if proc_env.stdout != '' else 'NOT FOUND — service may be using gateway.conf only (expected)', + 'status': '✅ PASS' if proc_env.stdout != '' else '⚠️ INFO — vars not in env, confirm gateway.conf is used' + } + ] }}" + +# ----------------------------------------------------------------------- +# SUMMARY +# ----------------------------------------------------------------------- +- name: "SUMMARY | Print results" + ansible.builtin.include_tasks: ../../certcheck_common/tasks/summary.yml + vars: + role_title: "CONNECT TLS — SERVER → GATEWAY MANAGER (one-way TLS + app-layer auth) — {{ node_section | upper }} NODE"