From cebd8cc82805c47f576697777ca4f95068d17aaa Mon Sep 17 00:00:00 2001 From: Tukue Gebregergis Date: Thu, 26 Mar 2026 14:37:21 +0100 Subject: [PATCH 1/6] Add conftest policy guardrails for GitOps manifests --- .github/workflows/app-gitops-guardrails.yml | 18 ++++++- Makefile | 7 ++- README.md | 2 + applications/gitops/base/sample-service.yaml | 3 +- applications/policy/README.md | 17 +++++++ applications/policy/deployment-security.rego | 50 ++++++++++++++++++++ docs/platform-product-progress.md | 6 +-- 7 files changed, 96 insertions(+), 7 deletions(-) create mode 100644 applications/policy/README.md create mode 100644 applications/policy/deployment-security.rego diff --git a/.github/workflows/app-gitops-guardrails.yml b/.github/workflows/app-gitops-guardrails.yml index c079b09..2894309 100644 --- a/.github/workflows/app-gitops-guardrails.yml +++ b/.github/workflows/app-gitops-guardrails.yml @@ -35,6 +35,20 @@ jobs: ./kubeconform -strict -summary "${manifest_files[@]}" - - name: Policy test placeholder (OPA/Kyverno) + - name: Policy checks with Conftest + shell: bash run: | - echo "Run conftest / kyverno CLI checks here" + set -euo pipefail + + curl -sSL -o conftest.tar.gz \ + https://github.com/open-policy-agent/conftest/releases/latest/download/conftest_$(uname -s)_$(uname -m).tar.gz + tar -xzf conftest.tar.gz conftest + + mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) + + if [ "${#manifest_files[@]}" -eq 0 ]; then + echo "No Kubernetes manifests found in applications/gitops/base" + exit 1 + fi + + ./conftest test "${manifest_files[@]}" -p applications/policy diff --git a/Makefile b/Makefile index d25651f..36d06ec 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ ENV ?= dev SERVICE ?= sample-service TAG ?= latest -.PHONY: help build test synth platform-check platform-plan platform-apply app-bootstrap app-deploy platform-progress +.PHONY: help build test synth platform-check platform-plan platform-apply app-bootstrap app-deploy app-policy-test platform-progress help: @echo "make build # Build TypeScript" @@ -14,6 +14,7 @@ help: @echo "make platform-apply ENV=dev # Apply platform changes" @echo "make app-bootstrap SERVICE=name # Bootstrap app from template" @echo "make app-deploy ENV=dev SERVICE=name TAG=v1.0.0" + @echo "make app-policy-test # Run local policy bundle checks" @echo "make platform-progress # Show platform-as-product progress tracker" build: @@ -44,5 +45,9 @@ app-deploy: @echo "[app-deploy] ENV=$(ENV) SERVICE=$(SERVICE) TAG=$(TAG)" @echo "Update GitOps manifest tag and let Argo CD reconcile" +app-policy-test: + @echo "[app-policy-test] run conftest against applications/gitops/base with applications/policy" + @echo "conftest test applications/gitops/base/*.yaml -p applications/policy" + platform-progress: @cat docs/platform-product-progress.md diff --git a/README.md b/README.md index 721be44..20fad73 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ It now provides opinionated architecture, repository layout, templates, and deli - Backstage software template example for self-service service creation - CI pipeline for platform IaC quality gates (fmt/validate/lint/security) - GitOps-oriented app delivery guardrails +- OPA/Conftest policy bundle for Kubernetes deployment security checks - Day-2 DX helpers via `Makefile` ## Repository structure @@ -89,6 +90,7 @@ make platform-plan ENV=dev make platform-apply ENV=dev make app-bootstrap SERVICE=my-api make app-deploy ENV=dev SERVICE=my-api TAG=v1.2.3 +make app-policy-test ``` ## Notes diff --git a/applications/gitops/base/sample-service.yaml b/applications/gitops/base/sample-service.yaml index 5e2c97d..dc45598 100644 --- a/applications/gitops/base/sample-service.yaml +++ b/applications/gitops/base/sample-service.yaml @@ -24,7 +24,7 @@ spec: spec: containers: - name: app - image: nginx:1.27 + image: nginx:1.27.0 ports: - containerPort: 80 resources: @@ -37,6 +37,7 @@ spec: securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true + runAsNonRoot: true --- apiVersion: v1 kind: Service diff --git a/applications/policy/README.md b/applications/policy/README.md new file mode 100644 index 0000000..7fef53d --- /dev/null +++ b/applications/policy/README.md @@ -0,0 +1,17 @@ +# Application GitOps Policy Bundle + +This directory contains OPA/Rego policies evaluated in CI with `conftest`. + +## Scope + +Policies currently validate Kubernetes manifests in `applications/gitops/base` and enforce: + +- non-`latest` immutable image tags +- CPU/memory requests and limits +- secure container defaults (`runAsNonRoot`, `allowPrivilegeEscalation: false`) + +## Local validation + +```bash +conftest test applications/gitops/base/*.yaml -p applications/policy +``` diff --git a/applications/policy/deployment-security.rego b/applications/policy/deployment-security.rego new file mode 100644 index 0000000..1d98209 --- /dev/null +++ b/applications/policy/deployment-security.rego @@ -0,0 +1,50 @@ +package main + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.securityContext.runAsNonRoot + msg := sprintf("deployment %q container %q must set securityContext.runAsNonRoot=true", [input.metadata.name, container.name]) +} + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + container.securityContext.allowPrivilegeEscalation != false + msg := sprintf("deployment %q container %q must set securityContext.allowPrivilegeEscalation=false", [input.metadata.name, container.name]) +} + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.requests.cpu + msg := sprintf("deployment %q container %q must define resources.requests.cpu", [input.metadata.name, container.name]) +} + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.requests.memory + msg := sprintf("deployment %q container %q must define resources.requests.memory", [input.metadata.name, container.name]) +} + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.limits.cpu + msg := sprintf("deployment %q container %q must define resources.limits.cpu", [input.metadata.name, container.name]) +} + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + not container.resources.limits.memory + msg := sprintf("deployment %q container %q must define resources.limits.memory", [input.metadata.name, container.name]) +} + +deny[msg] { + input.kind == "Deployment" + container := input.spec.template.spec.containers[_] + endswith(container.image, ":latest") + msg := sprintf("deployment %q container %q must not use mutable image tags like :latest", [input.metadata.name, container.name]) +} diff --git a/docs/platform-product-progress.md b/docs/platform-product-progress.md index 5d8c304..5e3ea34 100644 --- a/docs/platform-product-progress.md +++ b/docs/platform-product-progress.md @@ -1,6 +1,6 @@ # Platform as a Product Progress Tracker -_Last updated: 2026-03-24_ +_Last updated: 2026-03-26_ ## Delivery status snapshot @@ -12,7 +12,7 @@ _Last updated: 2026-03-24_ | App GitOps guardrails | ✅ Complete | 100% | kubeconform validation enabled and fail-fast behavior enforced. | | Secure-by-default CDK sample hardening | ✅ Complete | 100% | KMS, VPC, DLQ, IAM auth, caching, encrypted logs implemented. | | Environment overlays (dev/stage/prod) | 🟡 In Progress | 40% | Structure exists; env-specific manifests and policy sets pending. | -| Policy-as-code enforcement (OPA/Kyverno) | 🟡 In Progress | 30% | Placeholder step exists; enforceable policy bundles pending. | +| Policy-as-code enforcement (OPA/Kyverno) | 🟡 In Progress | 60% | Conftest policy bundle and CI enforcement added for deployment security/image/resource guardrails. | | Observability productization | 🟡 In Progress | 35% | Architecture defined; Prometheus/Grafana/Loki/OTel deployments pending. | | EKS + Argo CD platform runtime | ⏳ Planned | 20% | Target model documented; implementation modules still to be added. | | Backstage portal deployment | ⏳ Planned | 15% | Template exists; portal deployment and catalog automation pending. | @@ -29,7 +29,7 @@ _Last updated: 2026-03-24_ 1. Implement EKS runtime module under `platform/modules/eks` and bootstrap cluster add-ons. 2. Stand up Argo CD in `platform/services/argocd` with app-of-apps model. -3. Add policy bundles and CI checks (`conftest` and/or `kyverno apply`) in `app-gitops-guardrails`. +3. Expand policy bundle coverage beyond Deployment controls (Ingress, NetworkPolicy, PodDisruptionBudget). 4. Add observability baseline (Prometheus, Grafana, Loki, OpenTelemetry Collector). 5. Expand service repo structure with CI, Dockerfile, Helm chart, and SLO/runbook assets. From c86ba7fcfa06a67f053a722584b309298dbb28e7 Mon Sep 17 00:00:00 2001 From: Tukue Gebregergis Date: Thu, 26 Mar 2026 14:43:39 +0100 Subject: [PATCH 2/6] Fix Conftest installation in GitOps guardrails workflow --- .github/workflows/app-gitops-guardrails.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/app-gitops-guardrails.yml b/.github/workflows/app-gitops-guardrails.yml index 2894309..f927fa4 100644 --- a/.github/workflows/app-gitops-guardrails.yml +++ b/.github/workflows/app-gitops-guardrails.yml @@ -35,14 +35,18 @@ jobs: ./kubeconform -strict -summary "${manifest_files[@]}" + - name: Setup Go for Conftest install + uses: actions/setup-go@v5 + with: + go-version: '1.22' + - name: Policy checks with Conftest shell: bash run: | set -euo pipefail - curl -sSL -o conftest.tar.gz \ - https://github.com/open-policy-agent/conftest/releases/latest/download/conftest_$(uname -s)_$(uname -m).tar.gz - tar -xzf conftest.tar.gz conftest + go install github.com/open-policy-agent/conftest@latest + export PATH="$PATH:$(go env GOPATH)/bin" mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) @@ -51,4 +55,4 @@ jobs: exit 1 fi - ./conftest test "${manifest_files[@]}" -p applications/policy + conftest test "${manifest_files[@]}" -p applications/policy From cd319e845a240946e90c18aabd6b142dc8245135 Mon Sep 17 00:00:00 2001 From: Tukue Gebregergis Date: Thu, 26 Mar 2026 14:50:10 +0100 Subject: [PATCH 3/6] Stabilize GitOps CI by installing kubeconform and conftest via Go --- .github/workflows/app-gitops-guardrails.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/app-gitops-guardrails.yml b/.github/workflows/app-gitops-guardrails.yml index f927fa4..53381c3 100644 --- a/.github/workflows/app-gitops-guardrails.yml +++ b/.github/workflows/app-gitops-guardrails.yml @@ -17,14 +17,18 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Setup Go for policy toolchain + uses: actions/setup-go@v5 + with: + go-version: '1.22' + - name: Validate Kubernetes manifests with kubeconform shell: bash run: | set -euo pipefail - curl -sSL -o kubeconform.tar.gz \ - https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz - tar -xzf kubeconform.tar.gz kubeconform + go install github.com/yannh/kubeconform/cmd/kubeconform@latest + export PATH="$PATH:$(go env GOPATH)/bin" mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) @@ -33,12 +37,7 @@ jobs: exit 1 fi - ./kubeconform -strict -summary "${manifest_files[@]}" - - - name: Setup Go for Conftest install - uses: actions/setup-go@v5 - with: - go-version: '1.22' + kubeconform -strict -summary "${manifest_files[@]}" - name: Policy checks with Conftest shell: bash From e2e529e0ee755d7a02e82659c5712058b35d2835 Mon Sep 17 00:00:00 2001 From: Tukue Gebregergis Date: Thu, 26 Mar 2026 14:56:09 +0100 Subject: [PATCH 4/6] Update Rego policies for OPA v1 syntax compatibility --- applications/policy/deployment-security.rego | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/applications/policy/deployment-security.rego b/applications/policy/deployment-security.rego index 1d98209..409c1f0 100644 --- a/applications/policy/deployment-security.rego +++ b/applications/policy/deployment-security.rego @@ -1,48 +1,50 @@ package main -deny[msg] { +import rego.v1 + +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] not container.securityContext.runAsNonRoot msg := sprintf("deployment %q container %q must set securityContext.runAsNonRoot=true", [input.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] container.securityContext.allowPrivilegeEscalation != false msg := sprintf("deployment %q container %q must set securityContext.allowPrivilegeEscalation=false", [input.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] not container.resources.requests.cpu msg := sprintf("deployment %q container %q must define resources.requests.cpu", [input.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] not container.resources.requests.memory msg := sprintf("deployment %q container %q must define resources.requests.memory", [input.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] not container.resources.limits.cpu msg := sprintf("deployment %q container %q must define resources.limits.cpu", [input.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] not container.resources.limits.memory msg := sprintf("deployment %q container %q must define resources.limits.memory", [input.metadata.name, container.name]) } -deny[msg] { +deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] endswith(container.image, ":latest") From 10f79194611f99d9cdfbb4da1100e5def794ec3b Mon Sep 17 00:00:00 2001 From: Tukue Gebregergis Date: Thu, 26 Mar 2026 15:04:43 +0100 Subject: [PATCH 5/6] Address CI policy review issues: pin versions and fix checks --- .github/workflows/app-gitops-guardrails.yml | 4 ++-- Makefile | 2 +- applications/policy/deployment-security.rego | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/app-gitops-guardrails.yml b/.github/workflows/app-gitops-guardrails.yml index 53381c3..c7ed6fc 100644 --- a/.github/workflows/app-gitops-guardrails.yml +++ b/.github/workflows/app-gitops-guardrails.yml @@ -27,7 +27,7 @@ jobs: run: | set -euo pipefail - go install github.com/yannh/kubeconform/cmd/kubeconform@latest + go install github.com/yannh/kubeconform/cmd/kubeconform@v0.6.7 export PATH="$PATH:$(go env GOPATH)/bin" mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) @@ -44,7 +44,7 @@ jobs: run: | set -euo pipefail - go install github.com/open-policy-agent/conftest@latest + go install github.com/open-policy-agent/conftest@v0.57.0 export PATH="$PATH:$(go env GOPATH)/bin" mapfile -t manifest_files < <(find applications/gitops/base -type f \( -name '*.yaml' -o -name '*.yml' \) | sort) diff --git a/Makefile b/Makefile index 36d06ec..f3d5e11 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ app-deploy: app-policy-test: @echo "[app-policy-test] run conftest against applications/gitops/base with applications/policy" - @echo "conftest test applications/gitops/base/*.yaml -p applications/policy" + conftest test applications/gitops/base/*.yaml -p applications/policy platform-progress: @cat docs/platform-product-progress.md diff --git a/applications/policy/deployment-security.rego b/applications/policy/deployment-security.rego index 409c1f0..6457604 100644 --- a/applications/policy/deployment-security.rego +++ b/applications/policy/deployment-security.rego @@ -12,7 +12,7 @@ deny contains msg if { deny contains msg if { input.kind == "Deployment" container := input.spec.template.spec.containers[_] - container.securityContext.allowPrivilegeEscalation != false + not container.securityContext.allowPrivilegeEscalation == false msg := sprintf("deployment %q container %q must set securityContext.allowPrivilegeEscalation=false", [input.metadata.name, container.name]) } From bf89955de3c5de3018dc4639fd927799ef96fdfe Mon Sep 17 00:00:00 2001 From: Tukue Gebregergis Date: Thu, 26 Mar 2026 15:13:23 +0100 Subject: [PATCH 6/6] Add platform-as-product operating model documentation --- README.md | 1 + docs/platform-product-operating-model.md | 95 ++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 docs/platform-product-operating-model.md diff --git a/README.md b/README.md index 20fad73..ab2fe6d 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ Review feedback and implemented fixes are tracked in: Track implementation maturity and next milestones in: - `docs/platform-product-progress.md` +- `docs/platform-product-operating-model.md` ## Quick commands diff --git a/docs/platform-product-operating-model.md b/docs/platform-product-operating-model.md new file mode 100644 index 0000000..fc3a41d --- /dev/null +++ b/docs/platform-product-operating-model.md @@ -0,0 +1,95 @@ +# Platform as a Product Operating Model + +_Last updated: 2026-03-26_ + +This guide defines how the platform is managed as an internal product, not only as infrastructure. + +## Product mission + +Enable product teams to ship secure, observable services to production quickly by providing paved roads with self-service workflows and built-in guardrails. + +## Product users and ownership + +| Area | Owner | Responsibility | +|---|---|---| +| Platform product strategy | Platform Product Manager | Roadmap, prioritization, adoption, stakeholder communication | +| Runtime and infrastructure | Platform Engineering | EKS, networking, compute, shared services | +| Security guardrails | Security + Platform | Policy bundles, IAM patterns, vulnerability controls | +| Developer portal and templates | Platform Developer Experience | Backstage catalog, templates, golden paths | +| Service onboarding consumers | Product Engineering Teams | Build services using templates and follow platform contracts | + +## Product capabilities + +The platform product is organized into capabilities with explicit contracts: + +1. **Service scaffolding** + - Backstage template-driven repository bootstrap + - Standardized service metadata and ownership tags +2. **Delivery orchestration** + - CI checks for manifest validation and policy enforcement + - GitOps reconciliation through Argo CD +3. **Runtime baseline** + - EKS runtime and namespace conventions + - Network, compute, and secret management patterns +4. **Security and policy** + - OPA/Conftest policy checks in PR workflow + - Secure defaults for workload manifests +5. **Observability and reliability** + - Metrics, logs, traces and SLO conventions + - Alerting integration and runbook expectations + +## Platform contracts (golden path) + +Every onboarded service is expected to provide: + +- a catalog entry with service owner and tier +- deployable GitOps manifests for `dev`, `stage`, and `prod` +- CPU/memory requests and limits on workload containers +- non-root runtime and no privilege escalation +- immutable image references (no `:latest`) +- minimum observability signals (health, metrics, logs) + +## Intake and prioritization workflow + +1. Teams submit platform requests through backlog intake. +2. Requests are triaged weekly by Platform PM + lead engineer. +3. Prioritization uses impact, adoption, risk reduction, and effort. +4. Decisions are published in roadmap updates. +5. Completed features include migration docs and rollout notes. + +## Release and change management + +- **Cadence**: bi-weekly platform release train. +- **Change types**: + - additive (non-breaking): immediate release + - behavioral (potentially breaking): release note + deprecation window +- **Versioning approach**: + - templates and policy bundles use semantic tags + - breaking policy changes require staged enforcement (warn -> block) + +## Adoption metrics + +Track platform outcomes as product KPIs: + +- lead time to first deployment +- percentage of services onboarded via template +- PR policy compliance pass rate +- failed deployment rollback rate +- developer satisfaction (quarterly pulse) + +## Operating rituals + +- Weekly platform triage and incident review +- Bi-weekly roadmap/demo for stakeholders +- Monthly policy and compliance review with security +- Quarterly platform maturity review against success metrics + +## Documentation standards + +For every new platform capability, include: + +- capability description and user story +- onboarding instructions +- operational runbook and escalation path +- rollback/deprecation guidance +- success metric and owner