diff --git a/adapter/s3_admin.go b/adapter/s3_admin.go index 8cef130d3..aad03e102 100644 --- a/adapter/s3_admin.go +++ b/adapter/s3_admin.go @@ -4,8 +4,10 @@ import ( "bytes" "context" "sort" + "strings" "github.com/bootjp/elastickv/internal/s3keys" + "github.com/bootjp/elastickv/kv" "github.com/bootjp/elastickv/store" "github.com/cockroachdb/errors" ) @@ -175,3 +177,265 @@ func summaryFromBucketMeta(name string, meta *s3BucketMeta) AdminBucketSummary { Owner: meta.Owner, } } + +// Sentinel errors the admin write methods return so the bridge in +// main_admin.go can translate them into admin-package vocabulary +// without sniffing strings. Named separately from +// ErrAdminTableAlreadyExists / ErrAdminTableNotFound on the Dynamo +// side so a future per-resource role / status divergence does not +// require renaming both packages' callers. +var ( + // ErrAdminBucketAlreadyExists signals that AdminCreateBucket + // targeted a name already in use. Maps to 409 Conflict. + ErrAdminBucketAlreadyExists = errors.New("s3 admin: bucket already exists") + // ErrAdminBucketNotFound signals that AdminDeleteBucket / + // AdminPutBucketAcl targeted a missing bucket. Maps to 404. + ErrAdminBucketNotFound = errors.New("s3 admin: bucket not found") + // ErrAdminBucketNotEmpty signals that AdminDeleteBucket targeted + // a bucket that still has objects. Maps to 409 Conflict to match + // the SigV4 path's BucketNotEmpty response (the dashboard cannot + // force a recursive delete; the operator must clean up first). + ErrAdminBucketNotEmpty = errors.New("s3 admin: bucket is not empty") + // ErrAdminInvalidBucketName signals that AdminCreateBucket got + // a name that does not satisfy validateS3BucketName. Maps to 400. + ErrAdminInvalidBucketName = errors.New("s3 admin: invalid bucket name") + // ErrAdminInvalidACL signals that the ACL string did not pass + // validateS3CannedAcl. Maps to 400 (the SigV4 path returns 501 + // NotImplemented for unsupported canned ACLs, but the admin API + // is documented as private/public-read only and rejecting other + // values as invalid input is a more useful contract for the + // dashboard). + ErrAdminInvalidACL = errors.New("s3 admin: invalid ACL") +) + +// AdminCreateBucket creates a bucket on behalf of the admin +// dashboard. The principal MUST be re-validated by the caller (the +// admin HTTP handler does this against the live RoleStore); this +// method enforces the authorisation invariant a second time so a +// follower-forwarded call cannot smuggle a read-only principal past +// the check on the leader side (Section 3.2 "認可の真実は常に +// adapter 側"). +// +// The transaction is atomic: bucket meta + generation + ACL all land +// in a single OperationGroup, mirroring the SigV4 createBucket path. +// On success returns the freshly-stored summary; on conflict returns +// ErrAdminBucketAlreadyExists; on a non-leader / non-full-role / bad +// input returns the corresponding sentinel. +func (s *S3Server) AdminCreateBucket(ctx context.Context, principal AdminPrincipal, name, acl string) (*AdminBucketSummary, error) { + if !principal.Role.canWrite() { + return nil, ErrAdminForbidden + } + if !s.isVerifiedS3Leader() { + return nil, ErrAdminNotLeader + } + if err := validateS3BucketName(name); err != nil { + return nil, errors.Wrapf(ErrAdminInvalidBucketName, "%s", err.Error()) + } + acl = adminCanonicalACL(acl) + if err := validateS3CannedAcl(acl); err != nil { + return nil, errors.Wrapf(ErrAdminInvalidACL, "%s", err.Error()) + } + + var summary *AdminBucketSummary + err := s.retryS3Mutation(ctx, func() error { + out, err := s.adminCreateBucketTxn(ctx, principal, name, acl) + if err != nil { + return err + } + summary = out + return nil + }) + if err != nil { + return nil, err //nolint:wrapcheck // sentinel errors propagate as-is; structured errors are already wrapped above. + } + return summary, nil +} + +// adminCreateBucketTxn is the per-attempt body retryS3Mutation +// invokes. Pulled out so AdminCreateBucket stays under the +// cyclomatic ceiling without hiding the bucket-existence / +// generation / commit-ts dance — every step has a meaningful +// error path that the wrapping retry harness needs to see. +func (s *S3Server) adminCreateBucketTxn(ctx context.Context, principal AdminPrincipal, name, acl string) (*AdminBucketSummary, error) { + readTS := s.readTS() + startTS := s.txnStartTS(readTS) + readPin := s.pinReadTS(readTS) + defer readPin.Release() + + existing, exists, err := s.loadBucketMetaAt(ctx, name, readTS) + if err != nil { + return nil, errors.WithStack(err) + } + if exists && existing != nil { + return nil, ErrAdminBucketAlreadyExists + } + nextGeneration, err := s.nextBucketGenerationAt(ctx, name, readTS) + if err != nil { + return nil, errors.WithStack(err) + } + commitTS, err := s.nextTxnCommitTS(startTS) + if err != nil { + return nil, errors.WithStack(err) + } + meta := &s3BucketMeta{ + BucketName: name, + Generation: nextGeneration, + CreatedAtHLC: commitTS, + Region: s.effectiveRegion(), + Owner: principal.AccessKey, + Acl: acl, + } + body, err := encodeS3BucketMeta(meta) + if err != nil { + return nil, errors.WithStack(err) + } + _, err = s.coordinator.Dispatch(ctx, &kv.OperationGroup[kv.OP]{ + IsTxn: true, + StartTS: startTS, + CommitTS: commitTS, + Elems: []*kv.Elem[kv.OP]{ + {Op: kv.Put, Key: s3keys.BucketMetaKey(name), Value: body}, + {Op: kv.Put, Key: s3keys.BucketGenerationKey(name), Value: encodeS3Generation(nextGeneration)}, + }, + }) + if err != nil { + return nil, errors.WithStack(err) + } + out := summaryFromBucketMeta(name, meta) + return &out, nil +} + +// AdminPutBucketAcl swaps the canned ACL on an existing bucket. +// Same authorisation contract as AdminCreateBucket. Mutates only +// the meta.Acl field; generation is preserved so existing object +// references stay valid. +func (s *S3Server) AdminPutBucketAcl(ctx context.Context, principal AdminPrincipal, name, acl string) error { + if !principal.Role.canWrite() { + return ErrAdminForbidden + } + if !s.isVerifiedS3Leader() { + return ErrAdminNotLeader + } + acl = adminCanonicalACL(acl) + if err := validateS3CannedAcl(acl); err != nil { + return errors.Wrapf(ErrAdminInvalidACL, "%s", err.Error()) + } + + err := s.retryS3Mutation(ctx, func() error { + readTS := s.readTS() + startTS := s.txnStartTS(readTS) + readPin := s.pinReadTS(readTS) + defer readPin.Release() + + meta, exists, err := s.loadBucketMetaAt(ctx, name, readTS) + if err != nil { + return errors.WithStack(err) + } + if !exists || meta == nil { + return ErrAdminBucketNotFound + } + meta.Acl = acl + body, err := encodeS3BucketMeta(meta) + if err != nil { + return errors.WithStack(err) + } + _, err = s.coordinator.Dispatch(ctx, &kv.OperationGroup[kv.OP]{ + IsTxn: true, + StartTS: startTS, + Elems: []*kv.Elem[kv.OP]{ + {Op: kv.Put, Key: s3keys.BucketMetaKey(name), Value: body}, + }, + }) + return errors.WithStack(err) + }) + if err != nil { + return err //nolint:wrapcheck // sentinel errors propagate as-is. + } + return nil +} + +// AdminDeleteBucket removes a bucket if it is empty. Same +// authorisation contract as the other admin write methods. The +// bucket-must-be-empty rule mirrors the SigV4 deleteBucket path — +// the dashboard cannot force a recursive delete, by design. +// +// Known orphan-race limitation (coderabbitai 🔴 / 🟠 on PR #669): +// the empty-bucket probe (ScanAt with limit=1 on +// ObjectManifestPrefixForBucket) reads at readTS but the +// subsequent BucketMetaKey delete only carries that single point +// key in its ReadKeys set. A concurrent PutObject that inserts a +// manifest key in the scanned prefix between readTS and the +// delete's commitTS will not conflict — the OCC validator only +// inspects keys that appear in ReadKeys, and there is no +// ReadRanges mechanism today. The object's manifest key survives +// under a now-deleted bucket meta and becomes orphaned. +// +// This race exists pre-existing in the SigV4 path +// (adapter/s3.go:deleteBucket — same shape, same limitation), so +// AdminDeleteBucket inherits the contract; closing the gap +// requires either (a) bumping BucketGenerationKey on every +// PutObject so it can serve as an OCC token in this read set, or +// (b) extending OperationGroup with ReadRanges and teaching the +// FSM to validate range emptiness atomically with commit. Both +// are larger changes outside this PR's scope; tracked in +// docs/design/2026_04_24_partial_admin_dashboard.md under the +// Outstanding open items section. Operators concerned about the +// orphan window today should pause writes against the target +// bucket before issuing the admin delete. +func (s *S3Server) AdminDeleteBucket(ctx context.Context, principal AdminPrincipal, name string) error { + if !principal.Role.canWrite() { + return ErrAdminForbidden + } + if !s.isVerifiedS3Leader() { + return ErrAdminNotLeader + } + + err := s.retryS3Mutation(ctx, func() error { + readTS := s.readTS() + startTS := s.txnStartTS(readTS) + readPin := s.pinReadTS(readTS) + defer readPin.Release() + + meta, exists, err := s.loadBucketMetaAt(ctx, name, readTS) + if err != nil { + return errors.WithStack(err) + } + if !exists || meta == nil { + return ErrAdminBucketNotFound + } + start := s3keys.ObjectManifestPrefixForBucket(name, meta.Generation) + kvs, err := s.store.ScanAt(ctx, start, prefixScanEnd(start), 1, readTS) + if err != nil { + return errors.WithStack(err) + } + if len(kvs) > 0 { + return ErrAdminBucketNotEmpty + } + _, err = s.coordinator.Dispatch(ctx, &kv.OperationGroup[kv.OP]{ + IsTxn: true, + StartTS: startTS, + Elems: []*kv.Elem[kv.OP]{ + {Op: kv.Del, Key: s3keys.BucketMetaKey(name)}, + }, + }) + return errors.WithStack(err) + }) + if err != nil { + return err //nolint:wrapcheck // sentinel errors propagate as-is. + } + return nil +} + +// adminCanonicalACL normalises an empty input to the canned +// "private" default. The SigV4 createBucket / putBucketAcl paths +// apply the same default after trimming the x-amz-acl header. +// Pulled out so the admin write methods do not silently accept a +// blank string and create / mutate with whatever validateS3CannedAcl +// happens to allow on its empty branch. +func adminCanonicalACL(acl string) string { + trimmed := strings.TrimSpace(acl) + if trimmed == "" { + return s3AclPrivate + } + return trimmed +} diff --git a/adapter/s3_admin_test.go b/adapter/s3_admin_test.go index 70760649d..6634a4f81 100644 --- a/adapter/s3_admin_test.go +++ b/adapter/s3_admin_test.go @@ -5,6 +5,7 @@ import ( "fmt" "net/http" "net/http/httptest" + "strings" "testing" "github.com/bootjp/elastickv/store" @@ -99,6 +100,215 @@ func TestS3Server_AdminDescribeBucket_Missing(t *testing.T) { require.Nil(t, got) } +// fullAdminPrincipal returns a Full-role principal so write-path +// adapter tests can dispatch without standing up the HTTP layer. +// Mirrors fullAdminPrincipal in dynamodb_admin_test.go (kept +// distinct so the S3 tests can diverge if the role model splits). +func fullAdminBucketsPrincipal() AdminPrincipal { + return AdminPrincipal{AccessKey: "AKIA_FULL", Role: AdminRoleFull} +} + +// readOnlyAdminBucketsPrincipal mirrors the above for the +// read-only role. +func readOnlyAdminBucketsPrincipal() AdminPrincipal { + return AdminPrincipal{AccessKey: "AKIA_RO", Role: AdminRoleReadOnly} +} + +func TestS3Server_AdminCreateBucket_HappyPath(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + got, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "public-assets", s3AclPublicRead) + require.NoError(t, err) + require.NotNil(t, got) + require.Equal(t, "public-assets", got.Name) + require.Equal(t, s3AclPublicRead, got.ACL) + require.NotZero(t, got.CreatedAtHLC) + require.NotZero(t, got.Generation) + require.Equal(t, "AKIA_FULL", got.Owner, + "AdminCreateBucket must persist the principal access key as the bucket owner") + + // Round-trip: AdminDescribeBucket should see what we just stored. + round, exists, err := server.AdminDescribeBucket(context.Background(), "public-assets") + require.NoError(t, err) + require.True(t, exists) + require.Equal(t, s3AclPublicRead, round.ACL) +} + +func TestS3Server_AdminCreateBucket_DefaultsACL(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + got, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "private-assets", "") + require.NoError(t, err) + require.Equal(t, s3AclPrivate, got.ACL, + "empty ACL must default to private (matches the SigV4 path)") +} + +func TestS3Server_AdminCreateBucket_RejectsReadOnly(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + got, err := server.AdminCreateBucket(context.Background(), + readOnlyAdminBucketsPrincipal(), "any", s3AclPrivate) + require.ErrorIs(t, err, ErrAdminForbidden) + require.Nil(t, got) +} + +func TestS3Server_AdminCreateBucket_RejectsInvalidACL(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + got, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "any", "public-write") + require.ErrorIs(t, err, ErrAdminInvalidACL) + require.Nil(t, got) +} + +func TestS3Server_AdminCreateBucket_RejectsInvalidBucketName(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + got, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "BAD_NAME", s3AclPrivate) + require.ErrorIs(t, err, ErrAdminInvalidBucketName) + require.Nil(t, got) +} + +func TestS3Server_AdminCreateBucket_AlreadyExists(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + _, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "duplicate", s3AclPrivate) + require.NoError(t, err) + + _, err = server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "duplicate", s3AclPrivate) + require.ErrorIs(t, err, ErrAdminBucketAlreadyExists) +} + +func TestS3Server_AdminPutBucketAcl_RoundTrips(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + _, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "orders", s3AclPrivate) + require.NoError(t, err) + + err = server.AdminPutBucketAcl(context.Background(), + fullAdminBucketsPrincipal(), "orders", s3AclPublicRead) + require.NoError(t, err) + + got, _, err := server.AdminDescribeBucket(context.Background(), "orders") + require.NoError(t, err) + require.Equal(t, s3AclPublicRead, got.ACL) +} + +func TestS3Server_AdminPutBucketAcl_MissingBucket(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + err := server.AdminPutBucketAcl(context.Background(), + fullAdminBucketsPrincipal(), "missing", s3AclPublicRead) + require.ErrorIs(t, err, ErrAdminBucketNotFound) +} + +func TestS3Server_AdminPutBucketAcl_RejectsReadOnly(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + err := server.AdminPutBucketAcl(context.Background(), + readOnlyAdminBucketsPrincipal(), "any", s3AclPrivate) + require.ErrorIs(t, err, ErrAdminForbidden) +} + +func TestS3Server_AdminDeleteBucket_HappyPath(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + _, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "to-delete", s3AclPrivate) + require.NoError(t, err) + + err = server.AdminDeleteBucket(context.Background(), + fullAdminBucketsPrincipal(), "to-delete") + require.NoError(t, err) + + _, exists, err := server.AdminDescribeBucket(context.Background(), "to-delete") + require.NoError(t, err) + require.False(t, exists) +} + +func TestS3Server_AdminDeleteBucket_MissingBucket(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + err := server.AdminDeleteBucket(context.Background(), + fullAdminBucketsPrincipal(), "no-such") + require.ErrorIs(t, err, ErrAdminBucketNotFound) +} + +func TestS3Server_AdminDeleteBucket_RejectsReadOnly(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + err := server.AdminDeleteBucket(context.Background(), + readOnlyAdminBucketsPrincipal(), "any") + require.ErrorIs(t, err, ErrAdminForbidden) +} + +func TestS3Server_AdminDeleteBucket_RejectsNonEmpty(t *testing.T) { + t.Parallel() + + st := store.NewMVCCStore() + server := NewS3Server(nil, "", st, newLocalAdapterCoordinator(st), nil) + + _, err := server.AdminCreateBucket(context.Background(), + fullAdminBucketsPrincipal(), "with-objects", s3AclPrivate) + require.NoError(t, err) + + // Place an object via the SigV4 path so the deletion path sees + // a non-empty bucket. Reusing the existing handler avoids + // reaching into the storage layer's encoding directly. + rec := httptest.NewRecorder() + req := newS3TestRequest(http.MethodPut, "/with-objects/file.txt", + strings.NewReader("hello")) + req.Header.Set("Content-Type", "text/plain") + server.handle(rec, req) + require.Equal(t, http.StatusOK, rec.Code) + + err = server.AdminDeleteBucket(context.Background(), + fullAdminBucketsPrincipal(), "with-objects") + require.ErrorIs(t, err, ErrAdminBucketNotEmpty) +} + // TestS3Server_AdminListBuckets_PaginatesPastSinglePage pins the // fix for the truncation bug Codex P1 / Claude Issue 1 / Gemini // flagged on PR #658: AdminListBuckets must walk the metadata diff --git a/docs/admin.md b/docs/admin.md new file mode 100644 index 000000000..28c771b7e --- /dev/null +++ b/docs/admin.md @@ -0,0 +1,341 @@ +# elastickv admin dashboard — operator guide + +This document covers configuration and day-2 operation of the admin +HTTP listener. Architecture and design rationale live in +[docs/design/2026_04_24_proposed_admin_dashboard.md](design/2026_04_24_proposed_admin_dashboard.md); +read that first if you're touching the code. + +## What the admin dashboard is + +A separate HTTP listener (default `127.0.0.1:8080`) that exposes a +React SPA + JSON API for inspecting the cluster and managing +DynamoDB tables / S3 buckets without having to construct SigV4 +requests. It is **disabled by default**: set `-adminEnabled` to turn +it on. + +The listener is independent of the data-plane DynamoDB +(`-dynamoAddress`) and S3 (`-s3Address`) endpoints — credentials, +TLS, and auth are configured separately. + +## Quick start (loopback dev) + +The minimum invocation that produces a working dashboard: + +```sh +./elastickv \ + -raftId=n1 -raftBootstrap \ + -dynamoAddress=127.0.0.1:8000 \ + -s3Address=127.0.0.1:9000 \ + -s3CredentialsFile=/path/to/creds.json \ + -adminEnabled \ + -adminSessionSigningKeyFile=/path/to/admin-hs256.b64 \ + -adminFullAccessKeys=AKIA_ADMIN +``` + +Then open `http://127.0.0.1:8080/admin/` in a browser and log in +with the access key + secret pair from the credentials file. + +## Configuration reference + +### Required when `-adminEnabled=true` + +| Flag | Description | +|---|---| +| `-adminEnabled` | Master on/off switch. Default `false`. | +| `-adminSessionSigningKey` *or* `-adminSessionSigningKeyFile` *or* `ELASTICKV_ADMIN_SESSION_SIGNING_KEY` | Cluster-shared base64-encoded HS256 key — **exactly 64 raw bytes** (88 base64 chars with standard padding, or 86 with `RawURLEncoding`). The validator rejects any other length at startup with a precise error message. **Must be the same on every node** — JWTs minted by node A are verified by node B during follower→leader forwarding, so a mismatch breaks the dashboard's read paths on follower nodes. The `*File` / env-var forms keep the secret out of `/proc//cmdline`. | +| `-s3CredentialsFile` | JSON file with at least one access key + secret key pair. Same file the S3 adapter uses for SigV4; the admin dashboard reuses it for login authentication. | +| `-adminFullAccessKeys` *and/or* `-adminReadOnlyAccessKeys` | Comma-separated allow-lists. Only access keys listed here may log into the dashboard, even if their SigV4 secret validates against the credentials file. Keys must not appear in both lists. | + +### Optional + +| Flag | Description | +|---|---| +| `-adminListen` | host:port for the admin listener. Defaults to `127.0.0.1:8080`. | +| `-adminTLSCertFile` / `-adminTLSKeyFile` | PEM cert + key. Both must be set together; a partial config fails validation at startup. | +| `-adminAllowPlaintextNonLoopback` | Explicit opt-out for the non-loopback-without-TLS startup hard-error. **Strongly discouraged** — lets the listener accept plaintext on a non-loopback bind. **Does not** affect the cookie `Secure` attribute (that is `-adminAllowInsecureDevCookie` below); a deployment that sets only this flag will mint `Secure` cookies that the browser refuses to send over the plaintext channel, breaking session lifetime end-to-end. Pair it with `-adminAllowInsecureDevCookie` if the goal is a working plaintext rig. | +| `-adminSessionSigningKeyPrevious` *or* `-adminSessionSigningKeyPreviousFile` *or* `ELASTICKV_ADMIN_SESSION_SIGNING_KEY_PREVIOUS` | Previous HS256 key used only for verification during a rotation window. New tokens always use the primary key; existing tokens minted under the previous key continue to verify until they expire. | +| `-adminAllowInsecureDevCookie` | Mints session cookies without `Secure` for local plaintext development. Do not set on any deployment that touches a network. | + +### Hard-error startup conditions + +The process fails to start (non-zero exit) when: + +- `-adminEnabled=true` but `-s3CredentialsFile` is empty or missing, or its parsed map has zero entries — without credentials every login is rejected, and "locked-down admin" is `-adminEnabled=false`. +- `-adminEnabled=true` but `-adminSessionSigningKey` (and the `*File` / env var) all decode to empty. +- `-adminEnabled=true` but `-adminListen` is empty or not a valid host:port. +- `-adminTLSCertFile` xor `-adminTLSKeyFile` is set (partial TLS config). +- `-adminListen` is bound to a non-loopback address, TLS is not configured, **and** `-adminAllowPlaintextNonLoopback` is not set. The error message names the flag combinations that resolve it. +- `-adminFullAccessKeys` and `-adminReadOnlyAccessKeys` overlap (the same access key listed in both). + +These are deliberate — silent fallbacks to "auth disabled" or "TLS +off" would downgrade security guarantees the operator is unaware of. + +## TLS setup + +Two supported topologies: + +### A. Loopback only (`127.0.0.1` / `::1`) + +No TLS required. By default the dashboard mints cookies with +`Secure=true`, which most modern browsers accept on the loopback +origin even without TLS (the loopback-is-trusted policy). If a +specific browser refuses the cookie in this configuration, set +`-adminAllowInsecureDevCookie` to mint without `Secure` — the flag +is intentionally distinct from `-adminAllowPlaintextNonLoopback` +because the listener can be plaintext for entirely separate +reasons (loopback) than the cookie needing to drop `Secure`. + +### B. Reachable address with TLS + +Set `-adminListen` to the public bind, plus `-adminTLSCertFile` and +`-adminTLSKeyFile`. TLS 1.2+ is enforced. Cookies are issued with +`Secure; SameSite=Strict; HttpOnly`. + +Cert renewal: the listener picks up the cert files at startup only; +restart the process after rotating certs. Hot-reload is not +implemented (out of scope for the dashboard's maintenance model). + +### Discouraged: plaintext non-loopback + +`-adminAllowPlaintextNonLoopback` exists as an escape hatch for +short-lived test deployments. The session JWT and its bearer cookie +travel in clear text in this mode; anyone on the path can replay +the token until it expires. Do not enable on a long-running +deployment. + +A working plaintext rig also needs `-adminAllowInsecureDevCookie` — +otherwise the dashboard mints cookies with `Secure=true` and the +browser refuses to send them back over plaintext, so login appears +to succeed but every subsequent request 401s. The two flags are +deliberately separate so a misconfigured deployment fails closed +on either axis (TLS guard or cookie attribute) rather than +silently downgrading both at once. + +## Roles + +Two roles, both checked against the live `-adminFullAccessKeys` / +`-adminReadOnlyAccessKeys` lists on **every** state-changing +request (not just at login): + +- **read-only** — may list / describe Dynamo tables and S3 buckets, view cluster status. Cannot create, mutate ACL, or delete. +- **full** — adds POST / PUT / DELETE on `/dynamo/tables` and `/s3/buckets`. + +A key revoked from `-adminFullAccessKeys` immediately loses +write access on the next request — the dashboard does not wait for +the token to expire. The token's role claim is treated as a hint; +the live role index is authoritative. + +## API surface + +All endpoints are under `/admin/api/v1/`. Authentication: cookie +session minted by `POST /auth/login`; CSRF: double-submit token in +`admin_csrf` cookie + `X-Admin-CSRF` header on every state-changing +method. + +| Method | Path | Role | Notes | +|---|---|---|---| +| `POST` | `/auth/login` | none | Body `{access_key, secret_key}`. Sets `admin_session` and `admin_csrf` cookies. | +| `POST` | `/auth/logout` | any | Invalidates the session cookie. | +| `GET` | `/cluster` | any | Node ID, Raft leader, version. | +| `GET` | `/dynamo/tables` | any | Paginated list. `?limit=` (default 100, max 1000). | +| `POST` | `/dynamo/tables` | full | Body schema in design 4.2. | +| `GET` | `/dynamo/tables/{name}` | any | Schema + GSI summary. | +| `DELETE` | `/dynamo/tables/{name}` | full | 204 on success. | +| `GET` | `/s3/buckets` | any | Paginated list with the same `?limit=` semantics. | +| `POST` | `/s3/buckets` | full | Body `{bucket_name, acl?}`. ACL omitted defaults to `private`. | +| `GET` | `/s3/buckets/{name}` | any | Bucket meta + ACL. | +| `PUT` | `/s3/buckets/{name}/acl` | full | Body `{acl}`. Only `private` and `public-read` are accepted. | +| `DELETE` | `/s3/buckets/{name}` | full | 204 on success. The bucket must be empty (no objects); a non-empty bucket returns 409 `bucket_not_empty`. | + +## Follower → leader forwarding + +Writes (`POST` / `PUT` / `DELETE`) require the local node to be the +Raft leader. When the SPA's request hits a follower, the dashboard +transparently forwards the call to the leader over an internal +gRPC service (`AdminForward`). The leader re-validates the +principal against its own `adminFullAccessKeys` list before +acting — a follower cannot smuggle a downgraded key past the +leader's view. + +This means there is **no need to point the SPA at a specific +node**: any node with `-adminEnabled` can serve the dashboard. +Operators that fan out behind a load balancer get the same +behaviour as a single-node cluster, with one caveat below. + +### Follower forwarding caveat: rolling configuration changes + +A configuration change (e.g. adding `AKIA_NEW` to +`-adminFullAccessKeys`) must propagate to **every node** before +the new key works against any follower's dashboard. During the +rollout window: + +- A login against a node that has not yet been restarted with the new flags fails with 403. +- A token minted by an updated node, replayed against a not-yet-updated node, will be re-validated against that node's stale role list. If the key is missing on the older node, the request fails with 403 even though the token is structurally valid. + +The dashboard does not have an automatic role-refresh path — restart +each node after editing the access-key flags. + +### Election-period 503 + +When the leader steps down mid-write (or has not yet been elected +after a fresh start), the forwarder cannot reach a leader and the +SPA receives `503 Service Unavailable` with a `Retry-After: 1` +header. The current SPA client (`web/admin/src/api/client.ts`) +makes a single `fetch` call with no automatic retry, so the user +sees the 503 surfaced directly and must re-issue the action. The +`Retry-After: 1` header is still emitted so a future client (or an +external operator script driving the JSON API) can implement the +one-second back-off the server is asking for. Operators +investigating "intermittent 503s" should look at Raft leader-churn +logs first. + +## Audit log + +Every state-changing admin request emits structured slog lines at +`INFO` level under the `admin_audit` key on the leader's stdout (or +wherever the process slog handler is wired). A protected-chain +mutation (Dynamo / S3 / cluster / keyviz writes) typically produces +**two** audit lines: one operation-specific line from the source +that performed the mutation, plus one generic HTTP-shaped line from +the `Audit` middleware. Auth endpoints (`/auth/login`, `/auth/logout`) +produce **one** line — the action-specific one from `AuthService` — +because the generic middleware is intentionally not wrapped around +them (see the per-shape section below for why). The shapes differ +by source — log parsers should treat the `admin_audit` key as a +union and dispatch on the fields present. + +**`Audit` middleware** — emitted for non-GET/HEAD/OPTIONS requests +on the **protected mux chain** (Dynamo, S3, cluster, keyviz) after +`SessionAuth` accepts the session, but **before** `CSRFDoubleSubmit` +runs. That ordering is deliberate: a CSRF-rejected protected +request still produces an audit line because the actor is already +known, but an unauthenticated request (no / invalid session) is +rejected at `SessionAuth` and never reaches the middleware. The +following endpoints are **not** wrapped by this middleware and rely +on their own `admin_audit` emission instead: + +- `/auth/login` — runs without a pre-existing session, so the + generic middleware cannot identify the actor; `AuthService` + emits `admin_audit action=login` (success and failure) directly. +- `/auth/logout` — runs through `protectNoAudit` so logout produces + exactly one `admin_audit action=logout` line from `AuthService` + rather than two (a generic line plus the action-specific one). + +For requests that *do* reach the middleware, the line is always +present on the node that received the HTTP request — which may be +a follower if the request was then forwarded: + +``` +admin_audit actor=AKIA_ADMIN role=full method=POST path=/admin/api/v1/s3/buckets status=201 remote=10.0.0.7:51234 duration=8.2ms +``` + +**`S3Handler` operation line** — emitted on the leader after a +successful bucket mutation. Only the S3 admin path emits these; the +DynamoDB admin path relies on the middleware line plus the forwarded +line below for its audit trail: + +``` +admin_audit actor=AKIA_ADMIN role=full operation=create_bucket bucket=my-bucket +admin_audit actor=AKIA_ADMIN role=full operation=put_bucket_acl bucket=my-bucket acl=public-read +admin_audit actor=AKIA_ADMIN role=full operation=delete_bucket bucket=my-bucket +``` + +**`ForwardServer` operation line** — emitted on the leader when a +follower forwarded the request via `AdminForward`. Carries the +originating follower's node ID in `forwarded_from`. Covers both +DynamoDB and S3 admin operations: + +``` +admin_audit actor=AKIA_ADMIN role=full forwarded_from=n2 operation=create_table table=orders +admin_audit actor=AKIA_ADMIN role=full forwarded_from=n2 operation=delete_table table=orders +admin_audit actor=AKIA_ADMIN role=full forwarded_from=n2 operation=put_bucket_acl bucket=my-bucket acl=public-read +``` + +CR and LF in `forwarded_from` are stripped at the entry point — a +hostile follower cannot split a single audit line into two by +smuggling control characters into its node ID. + +Login and logout emit their own `admin_audit` lines so the JWT's +lifetime can be correlated with the mutations it authorised. The +two shapes differ on a single field — login carries `claimed_actor` +because the access key the operator typed is distinct from the +authenticated `actor` (a successful login proves they match; a +failed login records what was claimed), while logout has no claim +to verify and omits the field: + +``` +admin_audit action=login actor=AKIA_ADMIN claimed_actor=AKIA_ADMIN remote=10.0.0.7:51234 status=200 +admin_audit action=logout actor=AKIA_ADMIN remote=10.0.0.7:51234 status=200 +``` + +Log parsers consuming this shape should treat `claimed_actor` as +present-only-on-login. + +## Troubleshooting + +### "admin listener is enabled but no static credentials are configured" + +Either `-s3CredentialsFile` is unset or the file parses to an empty +map. Check the file exists and contains at least one entry: +```json +{"credentials":[{"access_key_id":"AKIA_ADMIN","secret_access_key":"..."}]} +``` + +### "is not loopback but TLS is not configured" + +Default-deny safety net. Either set `-adminTLSCertFile` + +`-adminTLSKeyFile`, or pass `-adminAllowPlaintextNonLoopback` (and +read the TLS section above before doing so). + +### Login returns 401 invalid_credentials + +The access key + secret pair did not match an entry in +`-s3CredentialsFile`. Either the access key is unknown or the secret +is wrong. Verify the credentials file is the one the running process +loaded (it is read once at startup) and that the secret matches +exactly — secrets are compared with `subtle.ConstantTimeCompare`, so +trailing whitespace counts. + +### Login returns 403 forbidden + +The credentials matched, but the access key is not listed in either +`-adminFullAccessKeys` or `-adminReadOnlyAccessKeys`. This is a +distinct case from the 401 above: the operator has valid SigV4 +credentials for the data plane but no admin role assignment. Add the +key to one of the role flags and **restart every node** so each +node's live role index picks up the change. + +### Write returns 403 forbidden + +The principal's role is read-only. Move the access key into +`-adminFullAccessKeys` (and remove it from +`-adminReadOnlyAccessKeys`), then **restart every node** so each +node's live role index picks up the change. + +### Write returns 503 leader_unavailable + +The Raft cluster is mid-election. Re-issue the request after the +`Retry-After: 1` header tells you to. If it persists past one or +two seconds, check Raft leader status via the admin +`/admin/api/v1/cluster` endpoint or `cmd/elastickv-admin`. + +### `bucket_not_empty` on DELETE + +The dashboard cannot force a recursive delete by design — the +SPA's job is to surface the error and guide the operator to clean +up first. Use the SigV4 S3 path (`aws s3 rm s3:// --recursive`) +to drain the bucket, then retry the DELETE on the dashboard. + +### Stuck SPA / blank screen + +The dashboard ships a placeholder `internal/admin/dist/index.html` +that renders a "bundle missing" page when `make` was run without +the SPA build step. Run `cd web/admin && npm install && npm run build` +to populate the embedded `dist` directory, then rebuild the binary. + +## Cross-references + +- Deployment runbook: [docs/admin_deployment.md](admin_deployment.md) (login flow, rollout via `scripts/rolling-update.sh`, key/TLS rotation, failure-mode runbooks) +- Design rationale: [docs/design/2026_04_24_proposed_admin_dashboard.md](design/2026_04_24_proposed_admin_dashboard.md) (renamed to `_partial_` in PR #675; this link will follow once that lands) +- Architecture overview: [docs/architecture_overview.md](architecture_overview.md) +- AdminForward RPC contract: `proto/admin_forward.proto` diff --git a/docs/admin_deployment.md b/docs/admin_deployment.md new file mode 100644 index 000000000..5160f2a12 --- /dev/null +++ b/docs/admin_deployment.md @@ -0,0 +1,374 @@ +# elastickv admin dashboard — deployment runbook + +This document covers **operational procedures** for the admin +dashboard: how to deploy with admin enabled, how operators log in, +how to roll out role / key / TLS changes safely, and which +runbooks fire when something breaks. + +For the configuration reference (per-flag semantics, hard-error +startup conditions, audit log shape, troubleshooting catalogue), +read [`docs/admin.md`](admin.md) first — this doc assumes you have +already skimmed it. + +For design rationale, see +[`docs/design/2026_04_24_partial_admin_dashboard.md`](design/2026_04_24_partial_admin_dashboard.md). + +--- + +## 1. Pre-deployment checklist + +Before turning the admin listener on, prepare the following on a +machine you control (not on a shared bastion): + +### 1.1 HS256 session signing key + +Exactly **64 raw bytes**. The validator rejects any other length at +startup with a precise error. + +```sh +# Generate 64 random bytes, base64-encode with standard padding (88 chars). +openssl rand 64 | base64 > admin-hs256.b64 + +# Or with RawURLEncoding (86 chars, no padding) — both are accepted. +openssl rand 64 | base64 | tr '+/' '-_' | tr -d '=' > admin-hs256.b64 +``` + +Verify the length: + +```sh +# Standard padding: 88 chars including newline trailer = 89 bytes total +wc -c admin-hs256.b64 +# Decoded byte count: +base64 -d admin-hs256.b64 | wc -c # → 64 +``` + +Distribute the same file to every node — under `/etc/elastickv/` +on each remote host with `0400` permissions owned by the runtime +user. The dashboard read paths re-validate JWTs locally on each +node, so a key mismatch breaks follower → leader forwarding the +moment a follower is elected leader. + +### 1.2 SigV4 credentials file + +The same `-s3CredentialsFile` the data-plane S3 adapter consumes. +Add an entry per admin operator (each operator gets a distinct +access key): + +```json +{ + "credentials": [ + {"access_key_id": "AKIA_ADMIN_ALICE", "secret_access_key": "..."}, + {"access_key_id": "AKIA_OBSERVER_BOB", "secret_access_key": "..."} + ] +} +``` + +The dashboard authenticates against this file using the same +`subtle.ConstantTimeCompare` the SigV4 path uses; secrets must +match exactly (trailing whitespace counts). + +### 1.3 Role allow-lists + +Decide who gets `full` (mutate) vs `read-only` (inspect) and write +the access keys into two comma-separated lists. The same key must +NOT appear in both — startup rejects an overlap with a hard error. + +``` +ADMIN_FULL_ACCESS_KEYS="AKIA_ADMIN_ALICE" +ADMIN_READ_ONLY_ACCESS_KEYS="AKIA_OBSERVER_BOB" +``` + +The role index is checked **on every state-changing request**, not +only at login. Promotions and demotions take effect on the next +admin write — but the JWT freezes the role at login (the live store +can only narrow the JWT's role, never widen it; see +[`docs/admin.md`](admin.md) §Roles). + +### 1.4 Admin TLS material (only if non-loopback) + +If the listener will bind to anything other than `127.0.0.1` / +`::1`, prepare a PEM cert + key pair on each remote host. Both +files must be readable by the runtime user. Hot-reload is **not** +supported — rotating certs requires a full container restart, so +prefer cert lifetimes that align with your existing rolling-update +cadence. + +If you genuinely need plaintext on a non-loopback bind for a +short-lived test rig, see §3.3 below for the two-flag escape hatch +(both `ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK=true` and +`ADMIN_ALLOW_INSECURE_DEV_COOKIE=true` are needed — the former +without the latter is a broken-end-to-end configuration). + +--- + +## 2. First-time deployment + +Use `scripts/rolling-update.sh` for both first-time deploys and +subsequent rollouts. The script bind-mounts the referenced files +into the container read-only, validates that each path exists on +the remote host before stopping the previous container, and passes +the corresponding flags to the daemon. + +### 2.1 Configure the deploy environment + +Copy `scripts/rolling-update.env.example` and fill in the +`ADMIN_*` block: + +```sh +# admin-cluster.env +NODES="n1=raft-1.example,n2=raft-2.example,n3=raft-3.example" +IMAGE="ghcr.io/bootjp/elastickv:v0.42.0" +SSH_USER="deploy" +S3_CREDENTIALS_FILE="/etc/elastickv/s3-credentials.json" + +ADMIN_ENABLED="true" +ADMIN_ADDRESS="0.0.0.0:8080" +ADMIN_FULL_ACCESS_KEYS="AKIA_ADMIN_ALICE" +ADMIN_READ_ONLY_ACCESS_KEYS="AKIA_OBSERVER_BOB" +ADMIN_SESSION_SIGNING_KEY_FILE="/etc/elastickv/admin-hs256.b64" +ADMIN_TLS_CERT_FILE="/etc/elastickv/admin-tls.crt" +ADMIN_TLS_KEY_FILE="/etc/elastickv/admin-tls.key" +``` + +Place the credentials file, signing key, and TLS pair on every +remote host at the configured paths *before* invoking the script — +the validation pass aborts the rollout on the first node where any +referenced path is missing, so you do not get partial-cluster +states. + +### 2.2 Run the rollout + +```sh +ROLLING_UPDATE_ENV_FILE=admin-cluster.env ./scripts/rolling-update.sh +``` + +The script rolls one node at a time: + +1. Transfers Raft leadership away from the node (if it is leader). +2. Stops the existing container. +3. Starts a new container with the admin flags + bind-mounts. +4. Waits for the gRPC health endpoint to come up. +5. Moves on to the next node after `ROLLING_DELAY_SECONDS`. + +If a node fails the health check, the script exits non-zero +**before** touching the next node — the cluster is left with at +most one partially-updated node, which preserves quorum on a +3-node deployment. + +### 2.3 Verify the listener + +After the rollout finishes, hit the listener from a host that can +reach the admin port: + +```sh +# Loopback (TLS optional) +curl -sf http://127.0.0.1:8080/admin/healthz + +# Non-loopback (TLS required by default) +curl -sf https://raft-1.example:8080/admin/healthz +``` + +The endpoint returns `204 No Content` when the listener is up. + +--- + +## 3. Operator login workflow + +### 3.1 Browser flow (the primary path) + +1. Open `https://:/admin/` in a browser. +2. Enter the access key + secret pair from + `-s3CredentialsFile`. The SPA POSTs to `/admin/api/v1/auth/login`, + the server validates the credentials with constant-time + comparison, and on success sets two cookies — the session JWT + and the CSRF double-submit token, both `Secure; HttpOnly; + SameSite=Strict; Path=/admin/`. +3. The dashboard becomes available. Every state-changing request + carries the CSRF token in a header so a cross-site page cannot + force an action against the session. + +### 3.2 Login failure modes (operator-visible) + +| HTTP code | Cause | Remediation | +|---|---|---| +| `401 invalid_credentials` | access key + secret pair did not match `-s3CredentialsFile` | Verify the credentials file is the one the running process loaded (it is read once at startup) and that the secret matches exactly. | +| `403 forbidden` (login) | credentials matched, but the access key is not in `-adminFullAccessKeys` / `-adminReadOnlyAccessKeys` | Add the key to one of the role flags and **restart every node**. | +| `403 forbidden` (write) | session is read-only | Move the access key into `-adminFullAccessKeys`, then restart every node. | +| `503 leader_unavailable` | Raft mid-election | Re-issue after a moment; the SPA does not auto-retry today (see `docs/admin.md` §Election-period 503). | + +The leader's audit log distinguishes 401 vs 403 with the precise +reason; the dashboard does not surface it for security. See +[`docs/admin.md`](admin.md) §Audit log for the slog shapes. + +### 3.3 Plaintext development setups + +**Loopback only** (the default `127.0.0.1:8080` bind): no TLS +required. Cookies are minted with `Secure=true`; modern browsers +accept them on the loopback origin without TLS via the +loopback-is-trusted policy. If a specific browser refuses the +cookie, set `ADMIN_ALLOW_INSECURE_DEV_COOKIE=true` to drop the +`Secure` flag. + +**Plaintext non-loopback** (test rig only): set both +`ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK=true` AND +`ADMIN_ALLOW_INSECURE_DEV_COOKIE=true`. The former bypasses the +startup TLS guard; the latter drops `Secure` from the cookie. Each +flag is independent on purpose so a misconfigured deployment fails +closed on either axis instead of silently downgrading both. + +--- + +## 4. Day-2 operations + +### 4.1 Adding or removing an admin key + +1. Edit `ADMIN_FULL_ACCESS_KEYS` / `ADMIN_READ_ONLY_ACCESS_KEYS` + in your env file. The key must NOT appear in both lists — + startup will reject the overlap. +2. If the key is being added, also add the access key + secret to + `S3_CREDENTIALS_FILE` on every node. (Removing a key only + requires the role-flag edit; the credentials file can be + trimmed later.) +3. Re-run `scripts/rolling-update.sh`. +4. The change takes effect on each node immediately at restart. + During the rollout window, sessions whose JWT was minted on a + not-yet-restarted node will see role enforcement against the + stale list on those nodes — operators may briefly see 403s on + read-only operations replayed against a not-yet-updated node. + +If you cannot tolerate the rollout-window inconsistency, drain +admin traffic before starting the rollout (close all dashboard +tabs and wait for the JWT TTL to expire on every issued token). + +### 4.2 Rotating the HS256 signing key + +The cluster supports a **two-key verification window** so a +rotation does not invalidate existing sessions. + +``` + primary previous + before rotation: key A (none) + rotation step 1: key B key A ← roll out + rotation step 2: key B (none) ← roll out (after JWT TTL) +``` + +Procedure: + +1. Generate the new key: + `openssl rand 64 | base64 > admin-hs256-new.b64`. +2. Copy `admin-hs256-new.b64` to every node at a stable path + (e.g. `/etc/elastickv/admin-hs256.next.b64`) and the previous + key remains at its existing path. +3. Edit the env file: + ``` + ADMIN_SESSION_SIGNING_KEY_FILE="/etc/elastickv/admin-hs256.next.b64" + ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE="/etc/elastickv/admin-hs256.b64" + ``` +4. Roll the cluster. + New tokens are minted with the new key; existing tokens minted + under the previous key continue to verify until they expire. +5. Wait at least one full session TTL (default 1h — see + `sessionTTL` in `internal/admin/auth_handler.go`) to ensure + every previously-issued JWT has expired. +6. Promote the new key to be the only key — clear the previous- + file variable and rename the path on each node: + ``` + ADMIN_SESSION_SIGNING_KEY_FILE="/etc/elastickv/admin-hs256.b64" + # ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE= + ``` +7. Roll the cluster again. The previous-key file can now be + deleted from every node. + +### 4.3 Rotating admin TLS certificates + +Hot-reload is not implemented (out of scope for the dashboard's +maintenance model). Rotation steps: + +1. Place the new cert + key at a temporary path on every node + (e.g. `/etc/elastickv/admin-tls.next.crt` / + `admin-tls.next.key`). +2. Edit the env file to point at the new paths. +3. Roll the cluster. +4. Once all nodes have started with the new cert, delete the old + files at your leisure. + +### 4.4 Switching the admin listener to a different port / bind + +A bind change is a regular config change — edit `ADMIN_ADDRESS` +and roll. Operators with the dashboard open will see their next +request fail (the old listener is gone); reload the SPA. + +If you are switching from loopback-only to a reachable bind, you +**must** prepare TLS material (§1.4) before flipping the bind. +The startup hard-error fires the moment a non-loopback bind is +configured without TLS or without the +`ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK` flag. The script's pre-flight +validation will catch a missing TLS pair before stopping the +existing container. + +### 4.5 Disabling the dashboard + +Set `ADMIN_ENABLED="false"` (or remove `ADMIN_ENABLED` from the +env file entirely) and roll. The script emits no admin flags and +no admin bind-mounts; the daemon comes up with the listener off. + +The signing key file and TLS material can stay on disk — they +only have effect when `--adminEnabled` is passed. + +--- + +## 5. Failure-mode runbooks + +### 5.1 "admin listener is enabled but no static credentials are configured" + +Daemon hard-errored at startup. Fix on the affected node: + +1. Verify `S3_CREDENTIALS_FILE` is set in the env file. +2. SSH to the node, confirm the file exists and contains at least + one entry. +3. Re-run the rollout. + +### 5.2 "is not loopback but TLS is not configured" + +Daemon hard-errored. Either supply +`ADMIN_TLS_CERT_FILE`/`ADMIN_TLS_KEY_FILE`, or accept the +plaintext escape hatch in §3.3. + +### 5.3 Sessions intermittently 403 during a rollout + +Expected during a role-flag change rollout (§4.1). The dashboard +does not auto-refresh the role index — operators need to log in +again on a node with the updated list. Wait for the rollout to +finish, then ask the affected operator to refresh the SPA. + +### 5.4 Login succeeds but every subsequent request 401s + +Almost always a cookie-Secure mismatch: + +- Listener is plaintext (loopback or + `ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK=true`) +- But the cookie is `Secure=true` (default — the browser refuses + to send it back over HTTP) + +Fix by setting `ADMIN_ALLOW_INSECURE_DEV_COOKIE=true` and rolling. +For a production deployment, use TLS instead. + +### 5.5 Forwarded write returns 503 leader_unavailable + +The follower's `LeaderForwarder` could not reach a leader. This is +expected during Raft elections — see `docs/admin.md` §Election-period 503. +Investigate Raft leader-churn logs first; persistent 503s usually +mean the cluster has lost quorum. + +--- + +## 6. Cross-references + +- [`docs/admin.md`](admin.md) — per-flag configuration reference, + audit log shapes, troubleshooting catalogue. +- [`docs/design/2026_04_24_partial_admin_dashboard.md`](design/2026_04_24_partial_admin_dashboard.md) — + design rationale, acceptance criteria, outstanding items. +- [`scripts/rolling-update.sh`](../scripts/rolling-update.sh) — + the rollout driver this doc references throughout. +- [`scripts/rolling-update.env.example`](../scripts/rolling-update.env.example) — + starting point for your `.env` file. diff --git a/internal/admin/buckets_source.go b/internal/admin/buckets_source.go index 19234d163..ddf84b6a9 100644 --- a/internal/admin/buckets_source.go +++ b/internal/admin/buckets_source.go @@ -12,10 +12,13 @@ import ( // admin-package vocabulary without the adapter package importing // internal/admin. // -// All methods are read-only in this slice. Write methods -// (AdminCreateBucket, AdminPutBucketAcl, AdminDeleteBucket) ship in -// the next slice with AdminForward integration so a follower can -// hand them off to the leader transparently. +// Read methods (List / Describe) are SigV4-bypass equivalents of +// the corresponding HTTP handlers. Write methods (Create / Delete / +// PutACL) additionally enforce a leader check and a Full-role +// principal check inside the adapter — duplicating the role check +// the handler already did defends against a follower-forwarded +// call smuggling a downgraded principal past the leader's view +// (Section 3.2 "認可の真実は常に adapter 側"). type BucketsSource interface { // AdminListBuckets returns every bucket this server knows about, // in stable lexicographic order. The empty list is a valid @@ -29,6 +32,21 @@ type BucketsSource interface { // 404 for missing buckets without sniffing sentinels; storage // failures still surface via the error return. AdminDescribeBucket(ctx context.Context, name string) (*BucketSummary, bool, error) + // AdminCreateBucket creates a bucket atomically with its + // initial ACL. Returns the freshly-stored summary on success; + // ErrBucketsAlreadyExists / ErrBucketsForbidden / + // ErrBucketsNotLeader / *ValidationError on the structured + // failure paths. + AdminCreateBucket(ctx context.Context, principal AuthPrincipal, in CreateBucketRequest) (*BucketSummary, error) + // AdminPutBucketAcl updates the canned ACL on an existing + // bucket. Returns ErrBucketsNotFound when the bucket is missing + // and the same role / leader / validation sentinels as Create. + AdminPutBucketAcl(ctx context.Context, principal AuthPrincipal, name, acl string) error + // AdminDeleteBucket removes a bucket if it is empty. Returns + // ErrBucketsNotFound for a missing bucket and ErrBucketsNotEmpty + // when objects remain (the dashboard cannot force a recursive + // delete; the SPA renders the 409 with a hint to clean up first). + AdminDeleteBucket(ctx context.Context, principal AuthPrincipal, name string) error } // BucketSummary is the bucket-level DTO the SPA receives. The JSON @@ -50,6 +68,25 @@ type BucketSummary struct { Owner string `json:"owner,omitempty"` } +// CreateBucketRequest is the JSON body shape for POST +// /admin/api/v1/s3/buckets per design Section 4.2. ACL is optional; +// when omitted, the adapter defaults to "private", matching the +// SigV4 createBucket path's behaviour for a missing x-amz-acl +// header. +type CreateBucketRequest struct { + BucketName string `json:"bucket_name"` + ACL string `json:"acl,omitempty"` +} + +// PutBucketACLRequest is the JSON body shape for PUT +// /admin/api/v1/s3/buckets/{name}/acl. Single field so the body +// stays simple, but kept as a typed struct so a future "owner +// override" or "grants" extension does not require revisiting the +// wire format. +type PutBucketACLRequest struct { + ACL string `json:"acl"` +} + // ErrBucketsForbidden is returned when the principal lacks the // role required for the operation. Maps to 403. Kept as its own // sentinel (rather than reusing ErrTablesForbidden) so a future @@ -58,18 +95,24 @@ type BucketSummary struct { var ErrBucketsForbidden = errors.New("admin buckets: principal lacks required role") // ErrBucketsNotLeader is returned when the local node is not the -// Raft leader for the S3 group. Read-only methods do NOT return -// this — list / describe are leader-agnostic in this slice. Kept -// here so the next slice's write methods can wire it without -// adding a new sentinel. +// Raft leader for the S3 group. The HTTP handler maps this to +// 503 + Retry-After: 1 today; the next slice's AdminForward +// integration will catch this as the trigger to forward. var ErrBucketsNotLeader = errors.New("admin buckets: local node is not the raft leader") -// ErrBucketsNotFound is returned when DELETE / DESCRIBE / a -// follow-up read targets a bucket that does not exist. The triple -// (nil, false, nil) is the preferred signal for the read path; -// this sentinel covers the future write paths only. +// ErrBucketsNotFound is returned when DELETE / DESCRIBE / PutACL +// targets a bucket that does not exist. AdminDescribeBucket's +// (nil, false, nil) tuple is the preferred signal for the read +// path; this sentinel covers the write paths. var ErrBucketsNotFound = errors.New("admin buckets: bucket does not exist") -// ErrBucketsAlreadyExists is returned when CREATE targets a name -// that is already in use. Maps to 409. Reserved for the next slice. +// ErrBucketsAlreadyExists is returned when CreateBucket targets +// a name already in use. Maps to 409 Conflict. var ErrBucketsAlreadyExists = errors.New("admin buckets: bucket already exists") + +// ErrBucketsNotEmpty is returned when DeleteBucket targets a +// bucket that still has objects. Maps to 409 Conflict — the +// dashboard cannot force a recursive delete; the SPA must guide +// the operator to clean up first. Mirrors the SigV4 deleteBucket +// path's BucketNotEmpty response. +var ErrBucketsNotEmpty = errors.New("admin buckets: bucket is not empty") diff --git a/internal/admin/forward_client.go b/internal/admin/forward_client.go index edeadcd84..2d5dba684 100644 --- a/internal/admin/forward_client.go +++ b/internal/admin/forward_client.go @@ -32,6 +32,17 @@ type LeaderForwarder interface { ForwardCreateTable(ctx context.Context, principal AuthPrincipal, in CreateTableRequest) (*ForwardResult, error) // ForwardDeleteTable is the delete-side counterpart. ForwardDeleteTable(ctx context.Context, principal AuthPrincipal, name string) (*ForwardResult, error) + // ForwardCreateBucket issues a forwarded POST /s3/buckets on + // the leader's behalf. The leader echoes back the same JSON + // envelope a leader-direct call would produce. + ForwardCreateBucket(ctx context.Context, principal AuthPrincipal, in CreateBucketRequest) (*ForwardResult, error) + // ForwardDeleteBucket is the delete-side counterpart. + ForwardDeleteBucket(ctx context.Context, principal AuthPrincipal, name string) (*ForwardResult, error) + // ForwardPutBucketAcl issues a forwarded PUT + // /s3/buckets/{name}/acl. Both the bucket name and the + // new ACL travel inside the proto payload — see the leader-side + // handlePutBucketAcl for the JSON shape. + ForwardPutBucketAcl(ctx context.Context, principal AuthPrincipal, name, acl string) (*ForwardResult, error) } // ForwardResult is the leader's response replayed for the SPA. The @@ -142,6 +153,43 @@ func (c *gRPCForwardClient) ForwardDeleteTable(ctx context.Context, principal Au return c.forward(ctx, pb.AdminOperation_ADMIN_OP_DELETE_TABLE, principal, payload) } +// ForwardCreateBucket serialises `in` as JSON and dispatches the +// CreateBucket operation. Same contract as ForwardCreateTable — +// gRPC errors are wrapped, ErrLeaderUnavailable on no-leader. +func (c *gRPCForwardClient) ForwardCreateBucket(ctx context.Context, principal AuthPrincipal, in CreateBucketRequest) (*ForwardResult, error) { + payload, err := json.Marshal(in) + if err != nil { + return nil, pkgerrors.Wrap(err, "admin forward: marshal create-bucket request") + } + return c.forward(ctx, pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, principal, payload) +} + +// ForwardDeleteBucket serialises the bucket name as `{"name":"..."}` +// to match the leader-side handleDeleteBucket contract. +func (c *gRPCForwardClient) ForwardDeleteBucket(ctx context.Context, principal AuthPrincipal, name string) (*ForwardResult, error) { + payload, err := json.Marshal(struct { + Name string `json:"name"` + }{Name: name}) + if err != nil { + return nil, pkgerrors.Wrap(err, "admin forward: marshal delete-bucket request") + } + return c.forward(ctx, pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, principal, payload) +} + +// ForwardPutBucketAcl carries both the bucket name (from the URL +// path) and the new ACL (from the request body) in the proto +// payload — see handlePutBucketAcl for the leader-side decode. +func (c *gRPCForwardClient) ForwardPutBucketAcl(ctx context.Context, principal AuthPrincipal, name, acl string) (*ForwardResult, error) { + payload, err := json.Marshal(struct { + Name string `json:"name"` + ACL string `json:"acl"` + }{Name: name, ACL: acl}) + if err != nil { + return nil, pkgerrors.Wrap(err, "admin forward: marshal put-bucket-acl request") + } + return c.forward(ctx, pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, principal, payload) +} + func (c *gRPCForwardClient) forward(ctx context.Context, op pb.AdminOperation, principal AuthPrincipal, payload []byte) (*ForwardResult, error) { addr := c.resolver() if addr == "" { diff --git a/internal/admin/forward_client_test.go b/internal/admin/forward_client_test.go index df4a814f0..bf22fdad8 100644 --- a/internal/admin/forward_client_test.go +++ b/internal/admin/forward_client_test.go @@ -182,3 +182,72 @@ func TestGRPCForwardClient_FillsMissingContentType(t *testing.T) { CreateTableRequest{TableName: "t", PartitionKey: CreateTableAttribute{Name: "id", Type: "S"}}) require.Equal(t, "application/json; charset=utf-8", res.ContentType) } + +func TestGRPCForwardClient_ForwardCreateBucket_HappyPath(t *testing.T) { + conn := &stubForwardConn{resp: &pb.AdminForwardResponse{ + StatusCode: http.StatusCreated, + Payload: []byte(`{"bucket_name":"public-assets","acl":"public-read"}`), + ContentType: "application/json; charset=utf-8", + }} + dial := &stubConnFactory{conn: conn} + fwd, err := NewGRPCForwardClient(fixedResolver("leader.local:7000"), dial, "follower-2") + require.NoError(t, err) + + in := CreateBucketRequest{BucketName: "public-assets", ACL: "public-read"} + res, err := fwd.ForwardCreateBucket(context.Background(), + AuthPrincipal{AccessKey: "AKIA_F", Role: RoleFull}, in) + require.NoError(t, err) + require.Equal(t, http.StatusCreated, res.StatusCode) + require.Equal(t, pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, conn.lastReq.GetOperation()) + require.Equal(t, "follower-2", conn.lastReq.GetForwardedFrom()) + var roundtripped CreateBucketRequest + require.NoError(t, json.Unmarshal(conn.lastReq.GetPayload(), &roundtripped)) + require.Equal(t, in, roundtripped) +} + +func TestGRPCForwardClient_ForwardDeleteBucket_HappyPath(t *testing.T) { + conn := &stubForwardConn{resp: &pb.AdminForwardResponse{ + StatusCode: http.StatusNoContent, + ContentType: "application/json; charset=utf-8", + }} + dial := &stubConnFactory{conn: conn} + fwd, _ := NewGRPCForwardClient(fixedResolver("leader.local:7000"), dial, "follower-3") + + res, err := fwd.ForwardDeleteBucket(context.Background(), + AuthPrincipal{AccessKey: "AKIA_F", Role: RoleFull}, "orders") + require.NoError(t, err) + require.Equal(t, http.StatusNoContent, res.StatusCode) + require.Equal(t, pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, conn.lastReq.GetOperation()) + require.JSONEq(t, `{"name":"orders"}`, string(conn.lastReq.GetPayload())) +} + +func TestGRPCForwardClient_ForwardPutBucketAcl_HappyPath(t *testing.T) { + conn := &stubForwardConn{resp: &pb.AdminForwardResponse{ + StatusCode: http.StatusNoContent, + ContentType: "application/json; charset=utf-8", + }} + dial := &stubConnFactory{conn: conn} + fwd, _ := NewGRPCForwardClient(fixedResolver("leader.local:7000"), dial, "follower-4") + + res, err := fwd.ForwardPutBucketAcl(context.Background(), + AuthPrincipal{AccessKey: "AKIA_F", Role: RoleFull}, "orders", "public-read") + require.NoError(t, err) + require.Equal(t, http.StatusNoContent, res.StatusCode) + require.Equal(t, pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, conn.lastReq.GetOperation()) + // Both name and acl travel in the payload — see handlePutBucketAcl + // for the leader-side decode contract. + require.JSONEq(t, `{"name":"orders","acl":"public-read"}`, string(conn.lastReq.GetPayload())) +} + +func TestGRPCForwardClient_BucketOps_NoLeaderReturnsErrLeaderUnavailable(t *testing.T) { + dial := &stubConnFactory{conn: &stubForwardConn{}} + fwd, _ := NewGRPCForwardClient(fixedResolver(""), dial, "f") + + _, err := fwd.ForwardCreateBucket(context.Background(), AuthPrincipal{Role: RoleFull}, + CreateBucketRequest{BucketName: "x"}) + require.ErrorIs(t, err, ErrLeaderUnavailable) + _, err = fwd.ForwardDeleteBucket(context.Background(), AuthPrincipal{Role: RoleFull}, "x") + require.ErrorIs(t, err, ErrLeaderUnavailable) + _, err = fwd.ForwardPutBucketAcl(context.Background(), AuthPrincipal{Role: RoleFull}, "x", "private") + require.ErrorIs(t, err, ErrLeaderUnavailable) +} diff --git a/internal/admin/forward_integration_test.go b/internal/admin/forward_integration_test.go index 54bd1bcf4..fec7e186e 100644 --- a/internal/admin/forward_integration_test.go +++ b/internal/admin/forward_integration_test.go @@ -20,11 +20,28 @@ type stubLeaderForwarder struct { createErr error deleteRes *ForwardResult deleteErr error - - lastCreatePrincipal AuthPrincipal - lastCreateInput CreateTableRequest - lastDeletePrincipal AuthPrincipal - lastDeleteName string + // Bucket-side counterparts ship with P2 slice 2b. Tests that + // only exercise Dynamo behaviour leave these zero values; the + // stub honours the same nil-result-and-no-error contract the + // table side uses on a "did not call us" path. + createBucketRes *ForwardResult + createBucketErr error + deleteBucketRes *ForwardResult + deleteBucketErr error + putACLRes *ForwardResult + putACLErr error + + lastCreatePrincipal AuthPrincipal + lastCreateInput CreateTableRequest + lastDeletePrincipal AuthPrincipal + lastDeleteName string + lastCreateBucketPrincipal AuthPrincipal + lastCreateBucketInput CreateBucketRequest + lastDeleteBucketPrincipal AuthPrincipal + lastDeleteBucketName string + lastPutACLPrincipal AuthPrincipal + lastPutACLBucket string + lastPutACLValue string } func (s *stubLeaderForwarder) ForwardCreateTable(_ context.Context, principal AuthPrincipal, in CreateTableRequest) (*ForwardResult, error) { @@ -45,6 +62,34 @@ func (s *stubLeaderForwarder) ForwardDeleteTable(_ context.Context, principal Au return s.deleteRes, nil } +func (s *stubLeaderForwarder) ForwardCreateBucket(_ context.Context, principal AuthPrincipal, in CreateBucketRequest) (*ForwardResult, error) { + s.lastCreateBucketPrincipal = principal + s.lastCreateBucketInput = in + if s.createBucketErr != nil { + return nil, s.createBucketErr + } + return s.createBucketRes, nil +} + +func (s *stubLeaderForwarder) ForwardDeleteBucket(_ context.Context, principal AuthPrincipal, name string) (*ForwardResult, error) { + s.lastDeleteBucketPrincipal = principal + s.lastDeleteBucketName = name + if s.deleteBucketErr != nil { + return nil, s.deleteBucketErr + } + return s.deleteBucketRes, nil +} + +func (s *stubLeaderForwarder) ForwardPutBucketAcl(_ context.Context, principal AuthPrincipal, name, acl string) (*ForwardResult, error) { + s.lastPutACLPrincipal = principal + s.lastPutACLBucket = name + s.lastPutACLValue = acl + if s.putACLErr != nil { + return nil, s.putACLErr + } + return s.putACLRes, nil +} + // notLeaderSource is a TablesSource that always returns // ErrTablesNotLeader on writes — i.e., it simulates the local // node being a follower. Used to exercise the forwarder path diff --git a/internal/admin/forward_server.go b/internal/admin/forward_server.go index 068f3bee6..09ea83e26 100644 --- a/internal/admin/forward_server.go +++ b/internal/admin/forward_server.go @@ -37,14 +37,17 @@ const adminForwardPayloadLimit = 64 << 10 type ForwardServer struct { pb.UnimplementedAdminForwardServer - source TablesSource - roles RoleStore - logger *slog.Logger + source TablesSource + buckets BucketsSource + roles RoleStore + logger *slog.Logger } // NewForwardServer wires a TablesSource and a RoleStore behind the // gRPC AdminForward service. logger may be nil; defaults to -// slog.Default(). +// slog.Default(). The S3 BucketsSource is plumbed via WithBucketsSource +// so deployments that ship without the S3 adapter can still register +// the gRPC service for Dynamo forwarding. func NewForwardServer(source TablesSource, roles RoleStore, logger *slog.Logger) *ForwardServer { if logger == nil { logger = slog.Default() @@ -52,6 +55,17 @@ func NewForwardServer(source TablesSource, roles RoleStore, logger *slog.Logger) return &ForwardServer{source: source, roles: roles, logger: logger} } +// WithBucketsSource enables S3 admin operation forwarding. Returns +// the receiver so wiring code can chain the call: +// `NewForwardServer(...).WithBucketsSource(...)`. A nil BucketsSource +// leaves S3 forwarding disabled — the Forward dispatcher rejects +// CREATE_BUCKET / DELETE_BUCKET / PUT_BUCKET_ACL with 501 in that +// case so a follower can detect the missing capability. +func (s *ForwardServer) WithBucketsSource(b BucketsSource) *ForwardServer { + s.buckets = b + return s +} + // Forward is the gRPC entrypoint. It performs the principal // re-evaluation the design mandates, then dispatches by operation. // Errors that the SPA can act on are returned as a structured @@ -86,11 +100,37 @@ func (s *ForwardServer) Forward(ctx context.Context, req *pb.AdminForwardRequest return rejectForward(http.StatusForbidden, "forbidden", "this endpoint requires a full-access role") } - switch req.GetOperation() { + return s.dispatchForward(ctx, principal, forwardedFrom, req) +} + +// dispatchForward routes the validated request to the per-operation +// handler. Pulled out so Forward stays under the cyclomatic ceiling +// as the operation enum grows; the principal-validation + +// forwarded_from sanitisation logic stays in Forward where it belongs. +// +// Source-availability checks live in checkOpAvailability rather than +// in each handler: a Dynamo-only build has s.source != nil but +// s.buckets == nil, and an S3-only build (Codex P1 on PR #673) has +// the inverse. Centralising the check means every operation gets a +// consistent 501 error shape and a future op cannot ship without the +// operator-visible "not configured" message that the existing ops +// promise. +func (s *ForwardServer) dispatchForward(ctx context.Context, principal AuthPrincipal, forwardedFrom string, req *pb.AdminForwardRequest) (*pb.AdminForwardResponse, error) { + op := req.GetOperation() + if resp, err, ok := s.checkOpAvailability(op); !ok { + return resp, err + } + switch op { case pb.AdminOperation_ADMIN_OP_CREATE_TABLE: return s.handleCreate(ctx, principal, forwardedFrom, req) case pb.AdminOperation_ADMIN_OP_DELETE_TABLE: return s.handleDelete(ctx, principal, forwardedFrom, req) + case pb.AdminOperation_ADMIN_OP_CREATE_BUCKET: + return s.handleCreateBucket(ctx, principal, forwardedFrom, req) + case pb.AdminOperation_ADMIN_OP_DELETE_BUCKET: + return s.handleDeleteBucket(ctx, principal, forwardedFrom, req) + case pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL: + return s.handlePutBucketAcl(ctx, principal, forwardedFrom, req) case pb.AdminOperation_ADMIN_OP_UNSPECIFIED: return rejectForward(http.StatusBadRequest, "invalid_request", "unknown admin operation") default: @@ -98,6 +138,43 @@ func (s *ForwardServer) Forward(ctx context.Context, req *pb.AdminForwardRequest } } +// checkOpAvailability returns (resp, err, true) when dispatchForward +// should continue to the per-op handler, or (resp, err, false) when +// the leader's build does not include the source the requested +// operation needs (S3-only deployment served a Dynamo op, or vice +// versa). Pulling the per-op switch out keeps dispatchForward's +// cyclomatic count under the linter ceiling as the enum grows. +func (s *ForwardServer) checkOpAvailability(op pb.AdminOperation) (*pb.AdminForwardResponse, error, bool) { + switch op { + case pb.AdminOperation_ADMIN_OP_CREATE_TABLE, pb.AdminOperation_ADMIN_OP_DELETE_TABLE: + if s.source == nil { + resp, err := notImplementedForwardResponse("DynamoDB") + return resp, err, false + } + case pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, + pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL: + if s.buckets == nil { + resp, err := notImplementedForwardResponse("S3") + return resp, err, false + } + case pb.AdminOperation_ADMIN_OP_UNSPECIFIED: + // Unknown-op rejection is dispatchForward's responsibility, + // not this gate's. Falling through to ok=true lets the main + // switch's default branch produce the canonical 400 message. + } + return nil, nil, true +} + +// notImplementedForwardResponse produces the 501 response a follower +// receives when the leader is built without the source for this +// operation's surface. surface is the human-facing label that ends +// up in the error message ("DynamoDB" or "S3"). +func notImplementedForwardResponse(surface string) (*pb.AdminForwardResponse, error) { + return rejectForward(http.StatusNotImplemented, "not_implemented", + surface+" admin forwarding is not configured on this leader") +} + func (s *ForwardServer) validatePrincipal(p *pb.AdminPrincipal) (AuthPrincipal, bool) { accessKey := p.GetAccessKey() if accessKey == "" { @@ -154,55 +231,202 @@ func (s *ForwardServer) handleDelete(ctx context.Context, principal AuthPrincipa // proto stays operation-agnostic — there is no operation-specific // field in AdminForwardRequest, by design (adding one per op // would couple every new admin endpoint to the proto schema). + name, rejection, err := decodeNamedPayload(req.GetPayload(), "delete") + if rejection != nil || err != nil { + return rejection, err + } + if err := s.source.AdminDeleteTable(ctx, principal, name); err != nil { + s.logUnexpectedSourceError(ctx, "delete_table", name, forwardedFrom, err) + return forwardErrorResponse("delete", err), nil + } + s.auditDeleteSuccess(ctx, principal, forwardedFrom, "delete_table", "table", name) + return &pb.AdminForwardResponse{StatusCode: http.StatusNoContent}, nil +} + +// handleCreateBucket dispatches a forwarded POST /s3/buckets call. +// Mirrors handleCreate (Dynamo) but decodes a CreateBucketRequest +// and routes through BucketsSource. dispatchForward gates this on +// s.buckets != nil so callers reach here only when S3 forwarding is +// configured. +func (s *ForwardServer) handleCreateBucket(ctx context.Context, principal AuthPrincipal, forwardedFrom string, req *pb.AdminForwardRequest) (*pb.AdminForwardResponse, error) { + payload := req.GetPayload() + if len(payload) > adminForwardPayloadLimit { + return rejectForward(http.StatusRequestEntityTooLarge, "payload_too_large", + "forwarded payload exceeds the 64 KiB admin limit") + } + if bytes.IndexByte(payload, 0) >= 0 { + return rejectForward(http.StatusBadRequest, "invalid_body", + "create-bucket payload contains a NUL byte") + } + dec := json.NewDecoder(bytes.NewReader(payload)) + dec.DisallowUnknownFields() + var body CreateBucketRequest + if err := dec.Decode(&body); err != nil { + return rejectForward(http.StatusBadRequest, "invalid_body", + "create-bucket payload is not valid JSON") + } + if dec.More() { + return rejectForward(http.StatusBadRequest, "invalid_body", + "create-bucket payload has trailing data") + } + // Reuse the HTTP handler's validateCreateBucketRequest so the + // forwarded path enforces identical rules — empty / whitespace- + // padded bucket_name produces the same 400 message a leader- + // direct call would, instead of slipping through here and + // hitting the adapter's lower-level validateS3BucketName with + // a less actionable error (Gemini security-high + Claude #2 on + // PR #673). + if err := validateCreateBucketRequest(body); err != nil { + return rejectForward(http.StatusBadRequest, "invalid_body", err.Error()) + } + summary, err := s.buckets.AdminCreateBucket(ctx, principal, body) + if err != nil { + s.logUnexpectedSourceError(ctx, "create_bucket", body.BucketName, forwardedFrom, err) + return forwardBucketsErrorResponse("create", err), nil + } + s.logger.LogAttrs(ctx, slog.LevelInfo, "admin_audit", + slog.String("actor", principal.AccessKey), + slog.String("role", string(principal.Role)), + slog.String("forwarded_from", forwardedFrom), + slog.String("operation", "create_bucket"), + slog.String("bucket", body.BucketName), + ) + return jsonForwardResponse(http.StatusCreated, summary) +} + +// handleDeleteBucket dispatches a forwarded DELETE +// /s3/buckets/{name} call. Same payload shape as the Dynamo delete: +// a JSON object with a single "name" field, which the bridge +// generates from the URL path. +func (s *ForwardServer) handleDeleteBucket(ctx context.Context, principal AuthPrincipal, forwardedFrom string, req *pb.AdminForwardRequest) (*pb.AdminForwardResponse, error) { + name, rejection, err := decodeNamedPayload(req.GetPayload(), "delete-bucket") + if rejection != nil || err != nil { + return rejection, err + } + if err := s.buckets.AdminDeleteBucket(ctx, principal, name); err != nil { + s.logUnexpectedSourceError(ctx, "delete_bucket", name, forwardedFrom, err) + return forwardBucketsErrorResponse("delete", err), nil + } + s.auditDeleteSuccess(ctx, principal, forwardedFrom, "delete_bucket", "bucket", name) + return &pb.AdminForwardResponse{StatusCode: http.StatusNoContent}, nil +} + +// auditDeleteSuccess emits the admin_audit slog line both Dynamo and +// S3 forwarded delete handlers need. Centralised so handleDelete and +// handleDeleteBucket do not diverge on the field set, and so a future +// handler that mirrors the same delete shape (e.g. delete-namespace) +// keeps the audit-log contract by reusing this helper rather than +// re-emitting a hand-rolled subset. resourceField is "table" or +// "bucket"; opLabel is the audit "operation" value. +func (s *ForwardServer) auditDeleteSuccess(ctx context.Context, principal AuthPrincipal, forwardedFrom, opLabel, resourceField, name string) { + s.logger.LogAttrs(ctx, slog.LevelInfo, "admin_audit", + slog.String("actor", principal.AccessKey), + slog.String("role", string(principal.Role)), + slog.String("forwarded_from", forwardedFrom), + slog.String("operation", opLabel), + slog.String(resourceField, name), + ) +} + +// handlePutBucketAcl dispatches a forwarded PUT +// /s3/buckets/{name}/acl call. The bridge encodes both the bucket +// name and the new ACL into the payload so the proto stays +// operation-agnostic — same approach handleDeleteBucket takes for +// the bucket name. +func (s *ForwardServer) handlePutBucketAcl(ctx context.Context, principal AuthPrincipal, forwardedFrom string, req *pb.AdminForwardRequest) (*pb.AdminForwardResponse, error) { payload := req.GetPayload() if len(payload) > adminForwardPayloadLimit { return rejectForward(http.StatusRequestEntityTooLarge, "payload_too_large", "forwarded payload exceeds the 64 KiB admin limit") } - // Mirror decodeCreateTableRequest's NUL-byte guard: goccy/go-json - // treats raw NUL as end-of-input so dec.More() would otherwise - // miss `{"name":"users"}\x00{"extra":1}` payloads. Codex P2 on - // PR #635 flagged this as the same smuggling vector that the - // HTTP create path already covers. if bytes.IndexByte(payload, 0) >= 0 { - return rejectForward(http.StatusBadRequest, "invalid_body", "delete payload contains a NUL byte") + return rejectForward(http.StatusBadRequest, "invalid_body", + "put-bucket-acl payload contains a NUL byte") } dec := json.NewDecoder(bytes.NewReader(payload)) dec.DisallowUnknownFields() var body struct { Name string `json:"name"` + ACL string `json:"acl"` } if err := dec.Decode(&body); err != nil { - return rejectForward(http.StatusBadRequest, "invalid_body", "delete payload is not valid JSON") + return rejectForward(http.StatusBadRequest, "invalid_body", + "put-bucket-acl payload is not valid JSON") } if dec.More() { - return rejectForward(http.StatusBadRequest, "invalid_body", "delete payload has trailing data") + return rejectForward(http.StatusBadRequest, "invalid_body", + "put-bucket-acl payload has trailing data") } if body.Name == "" { - return rejectForward(http.StatusBadRequest, "invalid_body", "delete payload missing name") + return rejectForward(http.StatusBadRequest, "invalid_body", + "put-bucket-acl payload missing name") } - // Reject slash-bearing names symmetrically with the HTTP - // handleDelete and handleDescribe paths. Without this, a - // forwarded call could act on `foo/bar` while a leader-direct - // call would 404 — divergent behaviour Codex P2 flagged on - // PR #635. if strings.ContainsRune(body.Name, '/') { - return rejectForward(http.StatusBadRequest, "invalid_body", "delete payload name must not contain '/'") + return rejectForward(http.StatusBadRequest, "invalid_body", + "put-bucket-acl payload name must not contain '/'") } - if err := s.source.AdminDeleteTable(ctx, principal, body.Name); err != nil { - s.logUnexpectedSourceError(ctx, "delete_table", body.Name, forwardedFrom, err) - return forwardErrorResponse("delete", err), nil + if strings.TrimSpace(body.ACL) == "" { + return rejectForward(http.StatusBadRequest, "invalid_body", + "put-bucket-acl payload missing acl") + } + if err := s.buckets.AdminPutBucketAcl(ctx, principal, body.Name, body.ACL); err != nil { + s.logUnexpectedSourceError(ctx, "put_bucket_acl", body.Name, forwardedFrom, err) + return forwardBucketsErrorResponse("put_acl", err), nil } s.logger.LogAttrs(ctx, slog.LevelInfo, "admin_audit", slog.String("actor", principal.AccessKey), slog.String("role", string(principal.Role)), slog.String("forwarded_from", forwardedFrom), - slog.String("operation", "delete_table"), - slog.String("table", body.Name), + slog.String("operation", "put_bucket_acl"), + slog.String("bucket", body.Name), + slog.String("acl", body.ACL), ) return &pb.AdminForwardResponse{StatusCode: http.StatusNoContent}, nil } +// forwardBucketsErrorResponse re-encodes a BucketsSource error into +// the structured shape the follower's bridge can re-emit verbatim. +// Mirrors forwardErrorResponse on the Dynamo side: the same status +// codes and JSON envelopes the leader-direct HTTP path produces in +// writeBucketsError. +func forwardBucketsErrorResponse(op string, err error) *pb.AdminForwardResponse { + switch { + case errors.Is(err, ErrBucketsForbidden): + return mustForwardJSON(http.StatusForbidden, + errorResponse{Error: "forbidden", Message: "this endpoint requires a full-access role"}) + case errors.Is(err, ErrBucketsNotLeader): + // Should never happen on the leader path — the leader just + // verified itself — but a leadership transfer racing with + // the dispatch makes this theoretically reachable. Carry + // retry_after_seconds=1 so the follower's bridge translates + // it back into an HTTP Retry-After header. + resp := mustForwardJSON(http.StatusServiceUnavailable, + errorResponse{Error: "leader_unavailable", Message: "leader stepped down mid-request"}) + resp.RetryAfterSeconds = 1 + return resp + case errors.Is(err, ErrBucketsNotFound): + return mustForwardJSON(http.StatusNotFound, + errorResponse{Error: "not_found", Message: "bucket does not exist"}) + case errors.Is(err, ErrBucketsAlreadyExists): + return mustForwardJSON(http.StatusConflict, + errorResponse{Error: "already_exists", Message: "bucket already exists"}) + case errors.Is(err, ErrBucketsNotEmpty): + return mustForwardJSON(http.StatusConflict, + errorResponse{Error: "bucket_not_empty", + Message: "bucket still has objects; remove them and retry"}) + } + var verr *ValidationError + if errors.As(err, &verr) { + return mustForwardJSON(http.StatusBadRequest, + errorResponse{Error: "invalid_request", Message: verr.Error()}) + } + return mustForwardJSON(http.StatusInternalServerError, + errorResponse{ + Error: "s3_" + op + "_failed", + Message: "failed to " + op + " bucket; see leader logs", + }) +} + // sanitiseForwardedFrom strips CR/LF from a follower-supplied // node id so a malicious value cannot split a single audit log // line into two when slog is using a text-format handler. JSON @@ -219,6 +443,64 @@ func sanitiseForwardedFrom(s string) string { }, s) } +// decodeNamedPayload validates and decodes the {"name": "..."} JSON +// shape both the Dynamo and S3 delete forwarders accept. Returns the +// decoded name on success, or a populated rejection response (and +// nil name) on a 400 / 413. opLabel is the human-facing prefix that +// goes into the rejection messages ("delete" for Dynamo, +// "delete-bucket" for S3) so the response identifies which path +// rejected — and so a future op (e.g. "describe-bucket") that +// reuses this helper still produces an actionable error. +// +// All four guards mirror the leader-direct HTTP path: +// - 64 KiB payload cap (matches adminForwardPayloadLimit elsewhere) +// - NUL-byte rejection (goccy/go-json treats raw NUL as end-of- +// input; without this guard `{"name":"x"}\x00{"extra":1}` +// payloads slip past dec.More(); Codex P2 on PR #635) +// - DisallowUnknownFields + dec.More() trailing-token rejection +// - empty + slash-bearing name rejection (the HTTP handlers +// already 404 slash-bearing names; the forwarded path has to +// reject symmetrically or a hostile follower could act on +// `foo/bar` while a leader-direct call would not). +func decodeNamedPayload(payload []byte, opLabel string) (string, *pb.AdminForwardResponse, error) { + if len(payload) > adminForwardPayloadLimit { + resp, err := rejectForward(http.StatusRequestEntityTooLarge, "payload_too_large", + "forwarded payload exceeds the 64 KiB admin limit") + return "", resp, err + } + if bytes.IndexByte(payload, 0) >= 0 { + resp, err := rejectForward(http.StatusBadRequest, "invalid_body", + opLabel+" payload contains a NUL byte") + return "", resp, err + } + dec := json.NewDecoder(bytes.NewReader(payload)) + dec.DisallowUnknownFields() + var body struct { + Name string `json:"name"` + } + if err := dec.Decode(&body); err != nil { + resp, rerr := rejectForward(http.StatusBadRequest, "invalid_body", + opLabel+" payload is not valid JSON") + return "", resp, rerr + } + if dec.More() { + resp, err := rejectForward(http.StatusBadRequest, "invalid_body", + opLabel+" payload has trailing data") + return "", resp, err + } + if body.Name == "" { + resp, err := rejectForward(http.StatusBadRequest, "invalid_body", + opLabel+" payload missing name") + return "", resp, err + } + if strings.ContainsRune(body.Name, '/') { + resp, err := rejectForward(http.StatusBadRequest, "invalid_body", + opLabel+" payload name must not contain '/'") + return "", resp, err + } + return body.Name, nil, nil +} + // forwardErrorResponse re-encodes a TablesSource error in the // structured shape the follower's handler can re-emit verbatim. This // is the leader-side counterpart of writeTablesError: every status / @@ -270,12 +552,18 @@ func forwardErrorResponse(op string, err error) *pb.AdminForwardResponse { // regressions, and logging them at LevelError would drown the // operational signal. The HTTP path's writeTablesError applies // the same policy (Codex P2 on PR #635 flagged the silent path). -func (s *ForwardServer) logUnexpectedSourceError(ctx context.Context, op, table, forwardedFrom string, err error) { +// +// The resource argument carries either a Dynamo table name or an +// S3 bucket name, depending on op. The slog key is "resource" so +// log queries do not have to know which resource family produced a +// given line — Claude review on PR #673 caught the prior "table" +// key, which made bucket-error queries miss the audit entries. +func (s *ForwardServer) logUnexpectedSourceError(ctx context.Context, op, resource, forwardedFrom string, err error) { if isStructuredSourceError(err) { return } s.logger.LogAttrs(ctx, slog.LevelError, "admin_forward_"+op+"_failed", - slog.String("table", table), + slog.String("resource", resource), slog.String("forwarded_from", forwardedFrom), slog.String("error", err.Error()), ) @@ -290,7 +578,12 @@ func isStructuredSourceError(err error) bool { case errors.Is(err, ErrTablesForbidden), errors.Is(err, ErrTablesNotLeader), errors.Is(err, ErrTablesNotFound), - errors.Is(err, ErrTablesAlreadyExists): + errors.Is(err, ErrTablesAlreadyExists), + errors.Is(err, ErrBucketsForbidden), + errors.Is(err, ErrBucketsNotLeader), + errors.Is(err, ErrBucketsNotFound), + errors.Is(err, ErrBucketsAlreadyExists), + errors.Is(err, ErrBucketsNotEmpty): return true } var verr *ValidationError diff --git a/internal/admin/forward_server_test.go b/internal/admin/forward_server_test.go index 72a6bcf72..c722e29bd 100644 --- a/internal/admin/forward_server_test.go +++ b/internal/admin/forward_server_test.go @@ -445,3 +445,260 @@ func TestForwardServer_DoesNotLogStructuredSourceErrors(t *testing.T) { }) } } + +// newForwardServerWithBucketsForTest is the slice 2b counterpart of +// newForwardServerForTest: wires both a TablesSource (zero-value) and +// a BucketsSource so the bucket-side dispatch tests can mutate the +// inputs without rebuilding the role-lookup boilerplate. +func newForwardServerWithBucketsForTest(buckets BucketsSource, roles MapRoleStore) *ForwardServer { + return NewForwardServer(&stubTablesSource{tables: map[string]*DynamoTableSummary{}}, roles, nil). + WithBucketsSource(buckets) +} + +func TestForwardServer_CreateBucket_HappyPath(t *testing.T) { + buckets := &stubBucketsSource{} + srv := newForwardServerWithBucketsForTest(buckets, fullPrincipalRoleStore()) + body := CreateBucketRequest{BucketName: "public-assets", ACL: "public-read"} + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + Payload: mustJSON(t, body), + ForwardedFrom: "node-2", + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusCreated), resp.GetStatusCode()) + require.Equal(t, "public-assets", buckets.lastCreateInput.BucketName) + require.Equal(t, RoleFull, buckets.lastCreatePrincipal.Role) + var summary BucketSummary + require.NoError(t, json.Unmarshal(resp.GetPayload(), &summary)) + require.Equal(t, "public-assets", summary.Name) +} + +func TestForwardServer_BucketOps_NoBucketsSourceReturns501(t *testing.T) { + // Builds without S3 do not call WithBucketsSource; the leader + // must still reject every bucket operation cleanly with 501 + // instead of reaching for a nil receiver and panicking. Sweep + // over all three operations so a future op added without a + // nil-receiver guard fails CI immediately (Claude review on + // PR #673 caught the original test only covering CREATE_BUCKET). + cases := []struct { + name string + op pb.AdminOperation + payload []byte + }{ + { + name: "create", + op: pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + payload: mustJSON(t, CreateBucketRequest{BucketName: "x"}), + }, + { + name: "delete", + op: pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, + payload: []byte(`{"name":"x"}`), + }, + { + name: "put_acl", + op: pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, + payload: []byte(`{"name":"x","acl":"private"}`), + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + srv := newForwardServerForTest(&stubTablesSource{tables: map[string]*DynamoTableSummary{}}, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: tc.op, + Payload: tc.payload, + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusNotImplemented), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "not_implemented") + }) + } +} + +func TestForwardServer_DynamoOps_NoTablesSourceReturns501(t *testing.T) { + // S3-only deployments construct ForwardServer with a nil + // TablesSource so leaders can still register the gRPC service + // for follower-forwarded bucket writes. Dynamo operations must + // then reject cleanly with 501 instead of dereferencing the nil + // source and panicking. Symmetric with + // TestForwardServer_BucketOps_NoBucketsSourceReturns501 for the + // inverse Dynamo-only build (Codex P1 on PR #673). + cases := []struct { + name string + op pb.AdminOperation + payload []byte + }{ + { + name: "create_table", + op: pb.AdminOperation_ADMIN_OP_CREATE_TABLE, + payload: mustJSON(t, CreateTableRequest{TableName: "orders"}), + }, + { + name: "delete_table", + op: pb.AdminOperation_ADMIN_OP_DELETE_TABLE, + payload: []byte(`{"name":"orders"}`), + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + srv := NewForwardServer(nil, fullPrincipalRoleStore(), nil). + WithBucketsSource(&stubBucketsSource{}) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: tc.op, + Payload: tc.payload, + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusNotImplemented), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "not_implemented") + }) + } +} + +func TestForwardServer_CreateBucket_RejectsWhitespacePaddedName(t *testing.T) { + // Validation parity with the HTTP path's + // validateCreateBucketRequest: a name like " bucket " must + // produce the same 400 invalid_body the leader-direct path + // emits, instead of slipping through and hitting the lower- + // level adapter validator with a less actionable error + // (Gemini security-high + Claude #2 on PR #673). + srv := newForwardServerWithBucketsForTest(&stubBucketsSource{}, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + Payload: mustJSON(t, CreateBucketRequest{BucketName: " padded "}), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusBadRequest), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "leading or trailing whitespace") +} + +func TestForwardServer_CreateBucket_BadJSONReturns400(t *testing.T) { + srv := newForwardServerWithBucketsForTest(&stubBucketsSource{}, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + Payload: []byte("{not json"), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusBadRequest), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "invalid_body") +} + +func TestForwardServer_CreateBucket_AlreadyExistsReturns409(t *testing.T) { + buckets := &stubBucketsSource{ + buckets: map[string]BucketSummary{"existing": {Name: "existing"}}, + createErr: ErrBucketsAlreadyExists, + } + srv := newForwardServerWithBucketsForTest(buckets, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + Payload: mustJSON(t, CreateBucketRequest{BucketName: "existing"}), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusConflict), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "already_exists") +} + +func TestForwardServer_DeleteBucket_HappyPath(t *testing.T) { + buckets := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + } + srv := newForwardServerWithBucketsForTest(buckets, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, + Payload: []byte(`{"name":"orders"}`), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusNoContent), resp.GetStatusCode()) + require.Equal(t, "orders", buckets.lastDeleteName) +} + +func TestForwardServer_DeleteBucket_NotEmptyReturns409(t *testing.T) { + buckets := &stubBucketsSource{deleteErr: ErrBucketsNotEmpty} + srv := newForwardServerWithBucketsForTest(buckets, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, + Payload: []byte(`{"name":"orders"}`), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusConflict), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "bucket_not_empty") +} + +func TestForwardServer_DeleteBucket_RejectsSlashInName(t *testing.T) { + srv := newForwardServerWithBucketsForTest(&stubBucketsSource{}, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, + Payload: []byte(`{"name":"foo/bar"}`), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusBadRequest), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "must not contain") +} + +func TestForwardServer_PutBucketAcl_HappyPath(t *testing.T) { + buckets := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders", ACL: "private"}}, + } + srv := newForwardServerWithBucketsForTest(buckets, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, + Payload: []byte(`{"name":"orders","acl":"public-read"}`), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusNoContent), resp.GetStatusCode()) + require.Equal(t, "orders", buckets.lastPutACLBucket) + require.Equal(t, "public-read", buckets.lastPutACLValue) +} + +func TestForwardServer_PutBucketAcl_RejectsMissingACL(t *testing.T) { + srv := newForwardServerWithBucketsForTest(&stubBucketsSource{}, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, + Payload: []byte(`{"name":"orders"}`), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusBadRequest), resp.GetStatusCode()) + require.Contains(t, string(resp.GetPayload()), "missing acl") +} + +func TestForwardServer_PutBucketAcl_RejectsSlashInName(t *testing.T) { + srv := newForwardServerWithBucketsForTest(&stubBucketsSource{}, fullPrincipalRoleStore()) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, + Payload: []byte(`{"name":"foo/bar","acl":"private"}`), + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusBadRequest), resp.GetStatusCode()) +} + +func TestForwardServer_BucketOps_PayloadTooLargeReturns413(t *testing.T) { + cases := []pb.AdminOperation{ + pb.AdminOperation_ADMIN_OP_CREATE_BUCKET, + pb.AdminOperation_ADMIN_OP_DELETE_BUCKET, + pb.AdminOperation_ADMIN_OP_PUT_BUCKET_ACL, + } + for _, op := range cases { + t.Run(op.String(), func(t *testing.T) { + srv := newForwardServerWithBucketsForTest(&stubBucketsSource{}, fullPrincipalRoleStore()) + big := make([]byte, adminForwardPayloadLimit+1) + resp, err := srv.Forward(context.Background(), &pb.AdminForwardRequest{ + Principal: &pb.AdminPrincipal{AccessKey: "AKIA_FULL", Role: "full"}, + Operation: op, + Payload: big, + }) + require.NoError(t, err) + require.Equal(t, int32(http.StatusRequestEntityTooLarge), resp.GetStatusCode()) + }) + } +} diff --git a/internal/admin/s3_handler.go b/internal/admin/s3_handler.go index 90226900c..18ab74391 100644 --- a/internal/admin/s3_handler.go +++ b/internal/admin/s3_handler.go @@ -1,12 +1,16 @@ package admin import ( + "bytes" "errors" + "io" "log/slog" "net/http" "sort" "strings" "time" + + "github.com/goccy/go-json" ) // Pagination knobs for the read-only S3 bucket list endpoint. @@ -30,8 +34,14 @@ const ( // import kv to read the field. const hlcPhysicalShift = 16 +// pathSuffixACL is the trailing segment of /s3/buckets/{name}/acl. +// Pulled out as a constant so the route switch matches a typed +// suffix rather than an inline literal — drop the helper if the +// per-bucket sub-resources ever grow beyond this single member. +const pathSuffixACL = "/acl" + // S3Handler serves /admin/api/v1/s3/buckets and the -// /admin/api/v1/s3/buckets/{name} sub-tree. Construct via +// /admin/api/v1/s3/buckets/{name}{,/acl} sub-tree. Construct via // NewS3Handler and hand to the admin router. // // The handler depends on a BucketsSource for in-process dispatch. @@ -39,12 +49,16 @@ const hlcPhysicalShift = 16 // well-known "S3 admin disabled" signal the router keys off of // (the routes fall through to the unknown-endpoint 404). // -// Slice 1 ships only the read-only paths (list + describe). The -// next slice will add a RoleStore for live role re-validation on -// the write endpoints (mirrors DynamoHandler.WithRoleStore). +// Writes (POST / PUT / DELETE) re-validate the principal against a +// live RoleStore on every request — the JWT freezes the role at +// login but a downgraded or revoked key must not be allowed to +// continue mutating until the token expires (Codex P1 on PR #635 +// applied the same fix on the Dynamo side). type S3Handler struct { - source BucketsSource - logger *slog.Logger + source BucketsSource + logger *slog.Logger + roles RoleStore + forwarder LeaderForwarder } // NewS3Handler wires a BucketsSource into the HTTP handler. Returns @@ -61,7 +75,7 @@ func NewS3Handler(source BucketsSource) *S3Handler { // WithLogger swaps the slog destination. Returns the receiver so // option calls chain at construction sites -// (NewS3Handler(...).WithLogger(...)). +// (NewS3Handler(...).WithLogger(...).WithRoleStore(...)). func (h *S3Handler) WithLogger(logger *slog.Logger) *S3Handler { if logger != nil { h.logger = logger @@ -69,41 +83,91 @@ func (h *S3Handler) WithLogger(logger *slog.Logger) *S3Handler { return h } -// ServeHTTP routes /buckets and /buckets/{name}. The next slice -// wires POST/PUT/DELETE; for now those return 405 so the SPA can -// distinguish "endpoint not configured" (404) from "method not -// implemented yet" (405). +// WithRoleStore wires the live access-key → role lookup. Required +// for the write endpoints' role re-validation; safe to omit on +// builds that disable writes (NewServer ensures it is always set +// when ServerDeps.Buckets is wired). +func (h *S3Handler) WithRoleStore(roles RoleStore) *S3Handler { + h.roles = roles + return h +} + +// WithLeaderForwarder wires the LeaderForwarder used to hand +// follower-side ErrBucketsNotLeader writes off to the leader. +// nil keeps forwarding disabled (the handler falls back to +// 503 + Retry-After:1 directly, mirroring DynamoHandler's contract). +func (h *S3Handler) WithLeaderForwarder(f LeaderForwarder) *S3Handler { + h.forwarder = f + return h +} + +// ServeHTTP routes /buckets, /buckets/{name}, and /buckets/{name}/acl. func (h *S3Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { switch { case r.URL.Path == pathS3Buckets: - switch r.Method { - case http.MethodGet: - h.handleList(w, r) - default: - writeJSONError(w, http.StatusMethodNotAllowed, "method_not_allowed", "only GET is implemented for /s3/buckets in this build") - } + h.serveCollection(w, r) case strings.HasPrefix(r.URL.Path, pathPrefixS3Buckets): - name := strings.TrimPrefix(r.URL.Path, pathPrefixS3Buckets) - // /buckets/{name}/acl is reserved for the next slice. Reject - // any sub-path with 404 here so a SPA bug that calls PUT /acl - // on this build sees a sensible error instead of mistakenly - // hitting the describe path with a "{name}/acl" string. The - // pinned test is TestS3Handler_DescribeBucket_SubpathReturns404 - // (CodeRabbit minor on PR #658 caught the previous comment - // referring to 405). - if strings.Contains(name, "/") { + h.servePerBucket(w, r) + default: + writeJSONError(w, http.StatusNotFound, "not_found", "") + } +} + +func (h *S3Handler) serveCollection(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodGet: + h.handleList(w, r) + case http.MethodPost: + h.handleCreate(w, r) + default: + writeJSONError(w, http.StatusMethodNotAllowed, "method_not_allowed", + "only GET or POST is allowed on /s3/buckets") + } +} + +// servePerBucket dispatches /s3/buckets/{name} and the single +// sub-resource /s3/buckets/{name}/acl. Any other sub-path 404s so a +// SPA bug pointed at a hypothetical /buckets/{name}/policy or +// similar sees an unambiguous "no handler" rather than mistakenly +// hitting the describe path with a "{name}/policy" string. The +// pinned test is TestS3Handler_DescribeBucket_SubpathReturns404 +// (CodeRabbit minor on PR #658 caught a prior version of this +// comment that mistakenly said "405"). +func (h *S3Handler) servePerBucket(w http.ResponseWriter, r *http.Request) { + tail := strings.TrimPrefix(r.URL.Path, pathPrefixS3Buckets) + if tail == "" { + writeJSONError(w, http.StatusBadRequest, "invalid_bucket_name", + "bucket name is empty") + return + } + if strings.HasSuffix(tail, pathSuffixACL) { + name := strings.TrimSuffix(tail, pathSuffixACL) + if name == "" || strings.Contains(name, "/") { writeJSONError(w, http.StatusNotFound, "not_found", "no admin S3 handler is registered for this path") return } - switch r.Method { - case http.MethodGet: - h.handleDescribe(w, r, name) - default: - writeJSONError(w, http.StatusMethodNotAllowed, "method_not_allowed", "only GET is implemented for /s3/buckets/{name} in this build") + if r.Method != http.MethodPut { + writeJSONError(w, http.StatusMethodNotAllowed, "method_not_allowed", + "only PUT is allowed on /s3/buckets/{name}/acl") + return } + h.handlePutACL(w, r, name) + return + } + if strings.Contains(tail, "/") { + writeJSONError(w, http.StatusNotFound, "not_found", + "no admin S3 handler is registered for this path") + return + } + switch r.Method { + case http.MethodGet: + h.handleDescribe(w, r, tail) + case http.MethodDelete: + h.handleDelete(w, r, tail) default: - writeJSONError(w, http.StatusNotFound, "not_found", "") + writeJSONError(w, http.StatusMethodNotAllowed, "method_not_allowed", + "only GET or DELETE is allowed on /s3/buckets/{name}") } } @@ -191,6 +255,355 @@ func FormatBucketCreatedAt(hlc uint64) string { return time.UnixMilli(ms).UTC().Format(time.RFC3339) } +// adminS3CreateBodyLimit is the per-request body cap for POST +// /s3/buckets and PUT /s3/buckets/{name}/acl. Matches design 4.4 +// (64 KiB hard cap on every admin POST/PUT). The wrapping BodyLimit +// middleware also enforces a 64 KiB cap; this constant is the +// in-handler MaxBytesReader limit so an oversized body produces +// errCreateBodyTooLarge before json.Decode bails on the truncation. +const adminS3CreateBodyLimit = 64 << 10 + +// errAdminS3BodyTooLarge is the sentinel decodeAdminS3JSONBody +// returns when the request body trips MaxBytesReader. The handler +// matches it to write 413 + the standard payload_too_large code, +// distinct from the generic 400 invalid_body that other decode +// failures produce. +var errAdminS3BodyTooLarge = errors.New("request body exceeds the 64 KiB admin limit") + +func (h *S3Handler) handleCreate(w http.ResponseWriter, r *http.Request) { + principal, ok := h.principalForWrite(w, r) + if !ok { + return + } + body, err := decodeAdminS3JSONBody[CreateBucketRequest](r.Body) + if err != nil { + if errors.Is(err, errAdminS3BodyTooLarge) { + WriteMaxBytesError(w) + return + } + writeJSONError(w, http.StatusBadRequest, "invalid_body", err.Error()) + return + } + if err := validateCreateBucketRequest(body); err != nil { + writeJSONError(w, http.StatusBadRequest, "invalid_body", err.Error()) + return + } + summary, err := h.source.AdminCreateBucket(r.Context(), principal, body) + if err != nil { + if h.tryForwardCreateBucket(w, r, principal, body, err) { + return + } + h.writeBucketsError(w, r, "create", err) + return + } + h.logger.LogAttrs(r.Context(), slog.LevelInfo, "admin_audit", + slog.String("actor", principal.AccessKey), + slog.String("role", string(principal.Role)), + slog.String("operation", "create_bucket"), + slog.String("bucket", body.BucketName), + ) + writeAdminJSONStatus(w, r.Context(), h.logger, http.StatusCreated, summary) +} + +func (h *S3Handler) handlePutACL(w http.ResponseWriter, r *http.Request, name string) { + principal, ok := h.principalForWrite(w, r) + if !ok { + return + } + body, err := decodeAdminS3JSONBody[PutBucketACLRequest](r.Body) + if err != nil { + if errors.Is(err, errAdminS3BodyTooLarge) { + WriteMaxBytesError(w) + return + } + writeJSONError(w, http.StatusBadRequest, "invalid_body", err.Error()) + return + } + if strings.TrimSpace(body.ACL) == "" { + writeJSONError(w, http.StatusBadRequest, "invalid_body", "acl is required") + return + } + if err := h.source.AdminPutBucketAcl(r.Context(), principal, name, body.ACL); err != nil { + if h.tryForwardPutBucketAcl(w, r, principal, name, body.ACL, err) { + return + } + h.writeBucketsError(w, r, "put_acl", err) + return + } + h.logger.LogAttrs(r.Context(), slog.LevelInfo, "admin_audit", + slog.String("actor", principal.AccessKey), + slog.String("role", string(principal.Role)), + slog.String("operation", "put_bucket_acl"), + slog.String("bucket", name), + slog.String("acl", body.ACL), + ) + w.WriteHeader(http.StatusNoContent) +} + +func (h *S3Handler) handleDelete(w http.ResponseWriter, r *http.Request, name string) { + principal, ok := h.principalForWrite(w, r) + if !ok { + return + } + if err := h.source.AdminDeleteBucket(r.Context(), principal, name); err != nil { + if h.tryForwardDeleteBucket(w, r, principal, name, err) { + return + } + h.writeBucketsError(w, r, "delete", err) + return + } + h.logger.LogAttrs(r.Context(), slog.LevelInfo, "admin_audit", + slog.String("actor", principal.AccessKey), + slog.String("role", string(principal.Role)), + slog.String("operation", "delete_bucket"), + slog.String("bucket", name), + ) + w.WriteHeader(http.StatusNoContent) +} + +// principalForWrite mirrors DynamoHandler.principalForWrite: pull +// the JWT principal out of the request context, enforce the +// JWT-embedded role first (defence-in-depth — a token minted as +// read-only never escalates, even if the live role index says +// `full` after a recent promotion), and — when a RoleStore is +// configured — re-validate the access key against the live role +// index so a key downgraded or revoked since login cannot keep +// mutating with a still-valid JWT. Even a still-valid JWT with +// role=full does not get a free pass — operators who revoke an +// access key get the change picked up on the next admin write +// rather than waiting for the JWT to expire. +func (h *S3Handler) principalForWrite(w http.ResponseWriter, r *http.Request) (AuthPrincipal, bool) { + principal, ok := PrincipalFromContext(r.Context()) + if !ok { + writeJSONError(w, http.StatusUnauthorized, "unauthenticated", + "no session principal") + return AuthPrincipal{}, false + } + // JWT role gate fires unconditionally so a session whose token + // was minted as read-only cannot perform writes even if the + // live role index later promotes the key to full. The previous + // shape skipped this check whenever h.roles != nil, which made + // it the only authorisation gate in production wiring — a + // security regression vs DynamoHandler that Codex P2 + + // coderabbitai flagged on PR #669 (the JWT freezes the role + // at login, the live role index can only constrain further). + if !principal.Role.AllowsWrite() { + writeJSONError(w, http.StatusForbidden, "forbidden", + "this endpoint requires a full-access role") + return AuthPrincipal{}, false + } + // Live re-validation. Skipped when no RoleStore is configured + // (single-handler unit tests where wiring up the role index + // would obscure the source-level behaviour the test is + // exercising); production wiring always sets one. + if h.roles != nil { + liveRole, found := h.roles.LookupRole(principal.AccessKey) + if !found || !liveRole.AllowsWrite() { + writeJSONError(w, http.StatusForbidden, "forbidden", + "this endpoint requires a full-access role") + return AuthPrincipal{}, false + } + // Use the live role downstream; the JWT may carry a stale + // value but the live one is authoritative once both gates + // agree the request is allowed. + principal.Role = liveRole + } + return principal, true +} + +// writeBucketsError translates a BucketsSource error into the +// appropriate HTTP response. Internal-server-error fallthrough logs +// the raw err.Error() but never sends it to the client, matching +// the Dynamo side's writeTablesError policy. +func (h *S3Handler) writeBucketsError(w http.ResponseWriter, r *http.Request, op string, err error) { + switch { + case errors.Is(err, ErrBucketsForbidden): + writeJSONError(w, http.StatusForbidden, "forbidden", + "this endpoint requires a full-access role") + case errors.Is(err, ErrBucketsNotLeader): + // Reached only when no LeaderForwarder is configured (single- + // node or leader-only deployments). When the next slice's + // AdminForward integration ships, the forwarder will catch + // this before writeBucketsError is called. + w.Header().Set("Retry-After", "1") + writeJSONError(w, http.StatusServiceUnavailable, "leader_unavailable", + "this admin node is not the raft leader") + case errors.Is(err, ErrBucketsNotFound): + writeJSONError(w, http.StatusNotFound, "not_found", "bucket does not exist") + case errors.Is(err, ErrBucketsAlreadyExists): + writeJSONError(w, http.StatusConflict, "already_exists", "bucket already exists") + case errors.Is(err, ErrBucketsNotEmpty): + writeJSONError(w, http.StatusConflict, "bucket_not_empty", + "bucket still has objects; remove them and retry") + default: + var verr *ValidationError + if errors.As(err, &verr) { + writeJSONError(w, http.StatusBadRequest, "invalid_request", verr.Error()) + return + } + h.logger.LogAttrs(r.Context(), slog.LevelError, "admin s3 "+op+" bucket failed", + slog.String("error", err.Error()), + ) + writeJSONError(w, http.StatusInternalServerError, "s3_"+op+"_failed", + "failed to "+op+" bucket; see server logs") + } +} + +// validateCreateBucketRequest applies the lightweight client-side +// guard rails. Bucket-name format checks happen in the adapter +// (validateS3BucketName) — we only catch the obvious mistakes +// here so the SPA gets a typed 400 with a clear message rather +// than the more generic adapter-level wrapping. +func validateCreateBucketRequest(in CreateBucketRequest) error { + if strings.TrimSpace(in.BucketName) == "" { + return errors.New("bucket_name is required") + } + if in.BucketName != strings.TrimSpace(in.BucketName) { + return errors.New("bucket_name must not have leading or trailing whitespace") + } + return nil +} + +// decodeAdminS3JSONBody is the shared decoder for POST /s3/buckets +// and PUT /s3/buckets/{name}/acl. Strict (DisallowUnknownFields, +// trailing-token rejection, NUL-byte rejection) so a future +// schema change does not silently accept extra keys. +func decodeAdminS3JSONBody[T any](body io.Reader) (T, error) { + var zero T + if body == nil { + return zero, errors.New("request body is empty") + } + // nil ResponseWriter is safe here: MaxBytesReader's only use of + // the writer is the connection-close optimisation (sets the + // requestTooLarger flag on the writer when the cap is hit); the + // nil-check inside is a comma-ok type assertion, so passing nil + // just skips the optimisation. The cap itself is still enforced + // and surfaces as *http.MaxBytesError on the read, which the + // errors.As below catches and translates to errAdminS3BodyTooLarge. + // We do not have access to the *http.Request's writer at this + // layer (the helper takes only an io.Reader so the same code is + // reusable from non-HTTP entry points like AdminForward decode). + // Claude review on PR #669 flagged the nil as undocumented. + limited := http.MaxBytesReader(nil, io.NopCloser(body), adminS3CreateBodyLimit) + raw, err := io.ReadAll(limited) + if err != nil { + var me *http.MaxBytesError + if errors.As(err, &me) { + return zero, errAdminS3BodyTooLarge + } + return zero, errors.New("failed to read request body") + } + if len(raw) == 0 { + // httptest passes http.NoBody for a nil-body request, which + // reads as an empty byte slice rather than an error. The + // caller's contract is "missing body → 400 invalid_body / + // 'empty'", so surface that explicitly here rather than + // letting the JSON decoder bubble up a generic + // "not valid JSON" error. + return zero, errors.New("request body is empty") + } + if bytes.IndexByte(raw, 0) >= 0 { + // goccy/go-json treats raw NUL as end-of-input; reject so a + // payload like `{"acl":"private"}\x00{"x":1}` cannot smuggle + // a second JSON object past dec.More(). Codex P2 on PR #635 + // caught the same vector on the leader-side decoder. + return zero, errors.New("request body contains a NUL byte") + } + dec := json.NewDecoder(bytes.NewReader(raw)) + dec.DisallowUnknownFields() + var out T + if err := dec.Decode(&out); err != nil { + return zero, errors.New("request body is not valid JSON") + } + if dec.More() { + return zero, errors.New("request body has trailing data after the first JSON value") + } + return out, nil +} + +// tryForwardCreateBucket / tryForwardPutBucketAcl / +// tryForwardDeleteBucket mirror the Dynamo handler's forwarding +// pattern: only the ErrBucketsNotLeader source error triggers +// forwarding, and only when a forwarder is configured. Each helper +// owns the leader-response replay so the per-method handlers stay +// short. +func (h *S3Handler) tryForwardCreateBucket(w http.ResponseWriter, r *http.Request, principal AuthPrincipal, body CreateBucketRequest, sourceErr error) bool { + if !errors.Is(sourceErr, ErrBucketsNotLeader) || h.forwarder == nil { + return false + } + res, err := h.forwarder.ForwardCreateBucket(r.Context(), principal, body) + if err != nil { + h.writeForwardFailure(w, r, "create", err) + return true + } + h.writeForwardResult(w, r, res) + return true +} + +func (h *S3Handler) tryForwardPutBucketAcl(w http.ResponseWriter, r *http.Request, principal AuthPrincipal, name, acl string, sourceErr error) bool { + if !errors.Is(sourceErr, ErrBucketsNotLeader) || h.forwarder == nil { + return false + } + res, err := h.forwarder.ForwardPutBucketAcl(r.Context(), principal, name, acl) + if err != nil { + h.writeForwardFailure(w, r, "put_acl", err) + return true + } + h.writeForwardResult(w, r, res) + return true +} + +func (h *S3Handler) tryForwardDeleteBucket(w http.ResponseWriter, r *http.Request, principal AuthPrincipal, name string, sourceErr error) bool { + if !errors.Is(sourceErr, ErrBucketsNotLeader) || h.forwarder == nil { + return false + } + res, err := h.forwarder.ForwardDeleteBucket(r.Context(), principal, name) + if err != nil { + h.writeForwardFailure(w, r, "delete", err) + return true + } + h.writeForwardResult(w, r, res) + return true +} + +// writeForwardResult re-emits the leader's structured response +// verbatim. Mirrors DynamoHandler.writeForwardResult — same headers +// and 503-+-Retry-After contract. +func (h *S3Handler) writeForwardResult(w http.ResponseWriter, r *http.Request, res *ForwardResult) { + w.Header().Set("Content-Type", res.ContentType) + w.Header().Set("X-Content-Type-Options", "nosniff") + w.Header().Set("Cache-Control", "no-store") + if res.StatusCode == http.StatusServiceUnavailable { + w.Header().Set("Retry-After", "1") + } + w.WriteHeader(res.StatusCode) + if len(res.Payload) > 0 { + if _, err := w.Write(res.Payload); err != nil { + h.logger.LogAttrs(r.Context(), slog.LevelWarn, "admin s3 forward response write failed", + slog.String("error", err.Error()), + ) + } + } +} + +// writeForwardFailure handles forwarder-side errors that did not +// produce a structured leader response: ErrLeaderUnavailable +// (election in flight) and gRPC transport errors. Both surface as +// 503 + Retry-After: 1 so the SPA's retry contract is uniform. +// ErrLeaderUnavailable is intentionally NOT logged at LevelError +// — elections are routine; transport errors get logged so +// operators can investigate. +func (h *S3Handler) writeForwardFailure(w http.ResponseWriter, r *http.Request, op string, err error) { + if !errors.Is(err, ErrLeaderUnavailable) { + h.logger.LogAttrs(r.Context(), slog.LevelError, "admin s3 "+op+" forward failed", + slog.String("error", err.Error()), + ) + } + w.Header().Set("Retry-After", "1") + writeJSONError(w, http.StatusServiceUnavailable, "leader_unavailable", + "raft leader currently unavailable; retry shortly") +} + // paginateBuckets slices `buckets` (already lex-sorted by the // adapter) into a single page starting strictly after `startAfter`. // Returns the page plus the opaque cursor for the next call ("" if diff --git a/internal/admin/s3_handler_test.go b/internal/admin/s3_handler_test.go index 6b408f4fb..962786232 100644 --- a/internal/admin/s3_handler_test.go +++ b/internal/admin/s3_handler_test.go @@ -4,9 +4,11 @@ import ( "context" "encoding/base64" "errors" + "io" "net/http" "net/http/httptest" "sort" + "strings" "testing" "github.com/goccy/go-json" @@ -15,13 +17,26 @@ import ( // stubBucketsSource is the in-memory test double the S3 admin // handler tests use. AdminListBuckets returns summaries in lex order -// of bucket name, matching the adapter contract; descErr / listErr -// let tests trigger the storage-failure paths without standing up a -// real adapter. +// of bucket name, matching the adapter contract; the *Err fields let +// tests trigger the structured-failure paths without standing up a +// real adapter. lastCreate / lastDelete / lastPutACL pin the +// principal + payload that reached the source so tests can prove +// the role gate and body decode wired through correctly. type stubBucketsSource struct { - buckets map[string]BucketSummary - listErr error - descErr error + buckets map[string]BucketSummary + listErr error + descErr error + createErr error + putACLErr error + deleteErr error + + lastCreatePrincipal AuthPrincipal + lastCreateInput CreateBucketRequest + lastPutACLPrincipal AuthPrincipal + lastPutACLBucket string + lastPutACLValue string + lastDeletePrincipal AuthPrincipal + lastDeleteName string } func (s *stubBucketsSource) AdminListBuckets(_ context.Context) ([]BucketSummary, error) { @@ -51,6 +66,61 @@ func (s *stubBucketsSource) AdminDescribeBucket(_ context.Context, name string) return &b, true, nil } +func (s *stubBucketsSource) AdminCreateBucket(_ context.Context, principal AuthPrincipal, in CreateBucketRequest) (*BucketSummary, error) { + s.lastCreatePrincipal = principal + s.lastCreateInput = in + if s.createErr != nil { + return nil, s.createErr + } + if _, exists := s.buckets[in.BucketName]; exists { + return nil, ErrBucketsAlreadyExists + } + acl := in.ACL + if acl == "" { + acl = "private" + } + summary := BucketSummary{ + Name: in.BucketName, + ACL: acl, + Generation: 1, + Owner: principal.AccessKey, + } + if s.buckets == nil { + s.buckets = map[string]BucketSummary{} + } + s.buckets[in.BucketName] = summary + return &summary, nil +} + +func (s *stubBucketsSource) AdminPutBucketAcl(_ context.Context, principal AuthPrincipal, name, acl string) error { + s.lastPutACLPrincipal = principal + s.lastPutACLBucket = name + s.lastPutACLValue = acl + if s.putACLErr != nil { + return s.putACLErr + } + bucket, ok := s.buckets[name] + if !ok { + return ErrBucketsNotFound + } + bucket.ACL = acl + s.buckets[name] = bucket + return nil +} + +func (s *stubBucketsSource) AdminDeleteBucket(_ context.Context, principal AuthPrincipal, name string) error { + s.lastDeletePrincipal = principal + s.lastDeleteName = name + if s.deleteErr != nil { + return s.deleteErr + } + if _, ok := s.buckets[name]; !ok { + return ErrBucketsNotFound + } + delete(s.buckets, name) + return nil +} + func newS3HandlerForTest(src BucketsSource) *S3Handler { return NewS3Handler(src) } @@ -201,11 +271,12 @@ func TestS3Handler_ListBuckets_ForbiddenReturns403(t *testing.T) { require.Contains(t, rec.Body.String(), "forbidden") } -func TestS3Handler_ListBuckets_RejectsNonGet(t *testing.T) { - // POST/PUT/DELETE on /buckets are reserved for the next slice; - // for now the handler returns 405 so a SPA bug that calls them - // against this build sees a sensible error rather than a 404. - cases := []string{http.MethodPost, http.MethodPut, http.MethodDelete, http.MethodPatch} +func TestS3Handler_ListBuckets_RejectsUnsupportedMethods(t *testing.T) { + // POST is now the create endpoint (slice 2); PUT / DELETE / + // PATCH on the collection root are still 405. The pinned set + // guards against a future routing refactor that accidentally + // accepts a non-listed method on the collection. + cases := []string{http.MethodPut, http.MethodDelete, http.MethodPatch} for _, m := range cases { t.Run(m, func(t *testing.T) { h := newS3HandlerForTest(&stubBucketsSource{}) @@ -268,20 +339,33 @@ func TestS3Handler_DescribeBucket_ForbiddenReturns403(t *testing.T) { require.Contains(t, rec.Body.String(), "forbidden") } -func TestS3Handler_DescribeBucket_SubpathReturns404(t *testing.T) { - // /buckets/foo/acl is reserved for the next slice. Until then, - // any path with a slash inside the bucket-name segment must 404 +func TestS3Handler_PerBucket_UnknownSubpathReturns404(t *testing.T) { + // /buckets/foo/policy (or any non-/acl sub-resource) must 404 // rather than mistakenly reach handleDescribe with the full - // "foo/acl" string. + // "foo/policy" string — protects against a SPA bug that mis- + // constructs the URL for a future sub-resource. h := newS3HandlerForTest(&stubBucketsSource{buckets: map[string]BucketSummary{ "foo": {Name: "foo"}, }}) - req := httptest.NewRequest(http.MethodGet, pathS3Buckets+"/foo/acl", nil) + req := httptest.NewRequest(http.MethodGet, pathS3Buckets+"/foo/policy", nil) rec := httptest.NewRecorder() h.ServeHTTP(rec, req) require.Equal(t, http.StatusNotFound, rec.Code) } +func TestS3Handler_PerBucket_AclSubpathRejectsGet(t *testing.T) { + // /buckets/{name}/acl is a real sub-resource (PUT is the wire + // shape for changing the ACL); GET on that exact path must + // return 405 rather than mistakenly reaching handleDescribe. + h := newS3HandlerForTest(&stubBucketsSource{buckets: map[string]BucketSummary{ + "foo": {Name: "foo"}, + }}) + req := httptest.NewRequest(http.MethodGet, pathS3Buckets+"/foo/acl", nil) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusMethodNotAllowed, rec.Code) +} + func TestFormatBucketCreatedAt_ZeroProducesEmpty(t *testing.T) { // Zero HLC means "no creation time recorded" — the SPA renders // it as a dash, so we emit an empty string rather than the Unix @@ -299,3 +383,563 @@ func TestFormatBucketCreatedAt_RoundtripsSecondPrecision(t *testing.T) { hlc := uint64(wallMillis) << hlcPhysicalShift require.Equal(t, "2026-05-04T06:00:00Z", FormatBucketCreatedAt(hlc)) } + +// withFullPrincipal injects a full-access AuthPrincipal into the +// request context so write-handler tests can bypass the SessionAuth +// middleware while keeping the role check live. Mirrors +// withWritePrincipal in dynamo_handler_test.go. +func withFullPrincipal(req *http.Request) *http.Request { + return req.WithContext(context.WithValue(req.Context(), ctxKeyPrincipal, + AuthPrincipal{AccessKey: "AKIA_FULL", Role: RoleFull})) +} + +// withReadOnlyPrincipalForS3 mirrors withFullPrincipal but with the +// read-only role. Pinned helper makes the role intent obvious at +// the call site (the variable name in dynamo's withReadOnlyPrincipal +// is identical, hence the local rename to avoid cross-file +// confusion when both files are open). +func withReadOnlyPrincipalForS3(req *http.Request) *http.Request { + return req.WithContext(context.WithValue(req.Context(), ctxKeyPrincipal, + AuthPrincipal{AccessKey: "AKIA_RO", Role: RoleReadOnly})) +} + +// validCreateBucketBody returns a minimal-but-valid POST body the +// happy-path tests share. +func validCreateBucketBody() string { + return `{"bucket_name":"public-assets","acl":"public-read"}` +} + +func TestS3Handler_CreateBucket_HappyPath(t *testing.T) { + src := &stubBucketsSource{} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusCreated, rec.Code) + require.Equal(t, "public-assets", src.lastCreateInput.BucketName) + require.Equal(t, "public-read", src.lastCreateInput.ACL) + require.Equal(t, RoleFull, src.lastCreatePrincipal.Role) + var got BucketSummary + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &got)) + require.Equal(t, "public-assets", got.Name) + require.Equal(t, "public-read", got.ACL) +} + +func TestS3Handler_CreateBucket_ReadOnlyRoleRejected(t *testing.T) { + src := &stubBucketsSource{} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withReadOnlyPrincipalForS3(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusForbidden, rec.Code) + require.Empty(t, src.lastCreateInput.BucketName, + "role gate must short-circuit before the source is reached") +} + +// TestS3Handler_PrincipalForWrite_JWTGateFiresWithRoleStore pins the +// fix for Codex P2 + coderabbitai Minor on PR #669: the previous +// shape skipped the JWT role check whenever h.roles != nil +// (production wiring), so a session whose token was minted as +// read-only could mutate buckets if the live role index had since +// promoted the key to full. Aligning with DynamoHandler, the JWT +// gate now fires unconditionally — the live store can only narrow +// the JWT's role, never widen it. +// +// All three S3 admin write endpoints exercise the same +// principalForWrite, so the table sweeps each one to lock down +// the contract per-endpoint (a future op that forgets to call +// principalForWrite would fail the relevant row immediately). +func TestS3Handler_PrincipalForWrite_JWTGateFiresWithRoleStore(t *testing.T) { + cases := []struct { + name string + method string + path string + body string + }{ + {"create_bucket", http.MethodPost, pathS3Buckets, validCreateBucketBody()}, + {"put_bucket_acl", http.MethodPut, pathS3Buckets + "/x/acl", `{"acl":"private"}`}, + {"delete_bucket", http.MethodDelete, pathS3Buckets + "/x", ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + src := &stubBucketsSource{} + // Live RoleStore says AKIA_RO is full — exactly the + // pre-fix escalation window. JWT carries read-only + // (the "freeze at login" contract). Handler must reject. + roles := MapRoleStore{"AKIA_RO": RoleFull} + h := NewS3Handler(src).WithRoleStore(roles) + var body io.Reader + if tc.body != "" { + body = strings.NewReader(tc.body) + } + req := httptest.NewRequest(tc.method, tc.path, body) + req = withReadOnlyPrincipalForS3(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusForbidden, rec.Code, + "JWT read-only must reject even when live role index says full") + require.Empty(t, src.lastCreateInput.BucketName, + "role gate must short-circuit before the source is reached") + require.Empty(t, src.lastDeleteName, + "role gate must short-circuit before the source is reached") + }) + } +} + +// TestS3Handler_PrincipalForWrite_LiveRoleNarrowsJWT covers the +// other half of the contract: a JWT minted as full is rejected +// when the live role index has demoted the key to read-only or +// removed it entirely. Already covered conceptually by the +// existing "ReadOnlyRoleRejected" tests for the h.roles == nil +// path, but those leave the live-role-narrowing branch unproven. +func TestS3Handler_PrincipalForWrite_LiveRoleNarrowsJWT(t *testing.T) { + cases := []struct { + name string + roles MapRoleStore + }{ + {"live role demoted to read-only", MapRoleStore{"AKIA_FULL": RoleReadOnly}}, + {"live role removed from index", MapRoleStore{}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + src := &stubBucketsSource{} + h := NewS3Handler(src).WithRoleStore(tc.roles) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusForbidden, rec.Code) + require.Empty(t, src.lastCreateInput.BucketName, + "live role gate must short-circuit before the source is reached") + }) + } +} + +func TestS3Handler_CreateBucket_AlreadyExistsReturns409(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"public-assets": {Name: "public-assets"}}, + createErr: ErrBucketsAlreadyExists, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusConflict, rec.Code) + require.Contains(t, rec.Body.String(), "already_exists") +} + +func TestS3Handler_CreateBucket_NotLeaderReturns503(t *testing.T) { + src := &stubBucketsSource{createErr: ErrBucketsNotLeader} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusServiceUnavailable, rec.Code) + require.Equal(t, "1", rec.Header().Get("Retry-After")) + require.Contains(t, rec.Body.String(), "leader_unavailable") +} + +func TestS3Handler_CreateBucket_RejectsInvalidJSON(t *testing.T) { + cases := []struct { + name string + body string + want string + }{ + {"empty body", "", "request body is empty"}, + {"not json", "not-a-json", "request body is not valid JSON"}, + {"missing bucket_name", `{"acl":"private"}`, "bucket_name is required"}, + {"unknown field", `{"bucket_name":"x","extra":"x"}`, "request body is not valid JSON"}, + {"trailing data", `{"bucket_name":"x"}{"extra":1}`, "trailing data"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + h := newS3HandlerForTest(&stubBucketsSource{}) + var body io.Reader + if tc.body != "" { + body = strings.NewReader(tc.body) + } + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, body) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code, "body=%q", tc.body) + require.Contains(t, rec.Body.String(), tc.want) + }) + } +} + +func TestS3Handler_CreateBucket_RejectsNULByte(t *testing.T) { + h := newS3HandlerForTest(&stubBucketsSource{}) + body := "{\"bucket_name\":\"users\"}\x00{\"extra\":1}" + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, strings.NewReader(body)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "NUL byte") +} + +func TestS3Handler_CreateBucket_OversizeBodyReturns413(t *testing.T) { + h := newS3HandlerForTest(&stubBucketsSource{}) + // 64 KiB + 1 byte: just over the limit. + body := strings.Repeat("a", adminS3CreateBodyLimit+1) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, strings.NewReader(body)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusRequestEntityTooLarge, rec.Code) +} + +func TestS3Handler_PutBucketAcl_HappyPath(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders", ACL: "private"}}, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPut, pathS3Buckets+"/orders/acl", + strings.NewReader(`{"acl":"public-read"}`)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusNoContent, rec.Code) + require.Equal(t, "orders", src.lastPutACLBucket) + require.Equal(t, "public-read", src.lastPutACLValue) +} + +func TestS3Handler_PutBucketAcl_ReadOnlyRoleRejected(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPut, pathS3Buckets+"/orders/acl", + strings.NewReader(`{"acl":"public-read"}`)) + req = withReadOnlyPrincipalForS3(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusForbidden, rec.Code) + require.Empty(t, src.lastPutACLBucket, + "role gate must short-circuit before the source is reached") +} + +func TestS3Handler_PutBucketAcl_RequiresAclField(t *testing.T) { + h := newS3HandlerForTest(&stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + }) + req := httptest.NewRequest(http.MethodPut, pathS3Buckets+"/orders/acl", + strings.NewReader(`{}`)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "acl is required") +} + +func TestS3Handler_PutBucketAcl_MissingBucketReturns404(t *testing.T) { + src := &stubBucketsSource{putACLErr: ErrBucketsNotFound} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPut, pathS3Buckets+"/missing/acl", + strings.NewReader(`{"acl":"private"}`)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusNotFound, rec.Code) + require.Contains(t, rec.Body.String(), "not_found") +} + +func TestS3Handler_PutBucketAcl_RejectsNonPut(t *testing.T) { + cases := []string{http.MethodGet, http.MethodPost, http.MethodDelete, http.MethodPatch} + for _, m := range cases { + t.Run(m, func(t *testing.T) { + h := newS3HandlerForTest(&stubBucketsSource{}) + req := httptest.NewRequest(m, pathS3Buckets+"/foo/acl", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusMethodNotAllowed, rec.Code) + }) + } +} + +func TestS3Handler_DeleteBucket_HappyPath(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/orders", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusNoContent, rec.Code) + require.Equal(t, "orders", src.lastDeleteName) + require.NotContains(t, src.buckets, "orders") +} + +func TestS3Handler_DeleteBucket_ReadOnlyRoleRejected(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/orders", nil) + req = withReadOnlyPrincipalForS3(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusForbidden, rec.Code) + require.Empty(t, src.lastDeleteName) +} + +func TestS3Handler_DeleteBucket_NotEmptyReturns409(t *testing.T) { + src := &stubBucketsSource{deleteErr: ErrBucketsNotEmpty} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/orders", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusConflict, rec.Code) + require.Contains(t, rec.Body.String(), "bucket_not_empty") +} + +func TestS3Handler_DeleteBucket_MissingReturns404(t *testing.T) { + src := &stubBucketsSource{deleteErr: ErrBucketsNotFound} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/missing", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusNotFound, rec.Code) + require.Contains(t, rec.Body.String(), "not_found") +} + +func TestS3Handler_DeleteBucket_NotLeaderReturns503(t *testing.T) { + src := &stubBucketsSource{deleteErr: ErrBucketsNotLeader} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/orders", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusServiceUnavailable, rec.Code) + require.Equal(t, "1", rec.Header().Get("Retry-After")) +} + +func TestS3Handler_WriteEndpoints_ValidationErrorReturns400(t *testing.T) { + // Pin the writeBucketsError *ValidationError arm — exercised + // end-to-end via the bridge (adapter ErrAdminInvalid* → + // translateAdminBucketsError → &ValidationError{...}) but + // previously had no handler-level coverage. Locking the arm + // down protects against a future refactor that drops the + // errors.As branch and silently downgrades typed validation + // failures to 500 (Claude review on PR #669). + const msg = "invalid bucket name: uppercase letters not allowed" + t.Run("create", func(t *testing.T) { + src := &stubBucketsSource{createErr: &ValidationError{Message: msg}} + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "invalid_request") + require.Contains(t, rec.Body.String(), msg) + }) + t.Run("put_acl", func(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + putACLErr: &ValidationError{Message: msg}, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodPut, pathS3Buckets+"/orders/acl", + strings.NewReader(`{"acl":"public-read"}`)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "invalid_request") + require.Contains(t, rec.Body.String(), msg) + }) + t.Run("delete", func(t *testing.T) { + src := &stubBucketsSource{ + buckets: map[string]BucketSummary{"orders": {Name: "orders"}}, + deleteErr: &ValidationError{Message: msg}, + } + h := newS3HandlerForTest(src) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/orders", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "invalid_request") + require.Contains(t, rec.Body.String(), msg) + }) +} + +// notLeaderBucketsSource simulates a follower's BucketsSource — every +// write path returns ErrBucketsNotLeader. Used to exercise the +// tryForward* integration path on S3Handler. +type notLeaderBucketsSource struct { + stubBucketsSource +} + +func (s *notLeaderBucketsSource) AdminCreateBucket(_ context.Context, _ AuthPrincipal, _ CreateBucketRequest) (*BucketSummary, error) { + return nil, ErrBucketsNotLeader +} + +func (s *notLeaderBucketsSource) AdminPutBucketAcl(_ context.Context, _ AuthPrincipal, _ string, _ string) error { + return ErrBucketsNotLeader +} + +func (s *notLeaderBucketsSource) AdminDeleteBucket(_ context.Context, _ AuthPrincipal, _ string) error { + return ErrBucketsNotLeader +} + +func TestS3Handler_CreateBucket_ForwardsOnNotLeader(t *testing.T) { + src := ¬LeaderBucketsSource{} + fwd := &stubLeaderForwarder{createBucketRes: &ForwardResult{ + StatusCode: http.StatusCreated, + Payload: []byte(`{"bucket_name":"public-assets","acl":"public-read"}`), + ContentType: "application/json; charset=utf-8", + }} + h := NewS3Handler(src).WithLeaderForwarder(fwd) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusCreated, rec.Code) + require.Equal(t, "public-assets", fwd.lastCreateBucketInput.BucketName, + "forwarder must be invoked when source returns ErrBucketsNotLeader") + require.JSONEq(t, `{"bucket_name":"public-assets","acl":"public-read"}`, rec.Body.String()) +} + +func TestS3Handler_DeleteBucket_ForwardsOnNotLeader(t *testing.T) { + src := ¬LeaderBucketsSource{} + fwd := &stubLeaderForwarder{deleteBucketRes: &ForwardResult{ + StatusCode: http.StatusNoContent, + ContentType: "application/json; charset=utf-8", + }} + h := NewS3Handler(src).WithLeaderForwarder(fwd) + req := httptest.NewRequest(http.MethodDelete, pathS3Buckets+"/orders", nil) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusNoContent, rec.Code) + require.Equal(t, "orders", fwd.lastDeleteBucketName) +} + +func TestS3Handler_PutBucketAcl_ForwardsOnNotLeader(t *testing.T) { + src := ¬LeaderBucketsSource{} + fwd := &stubLeaderForwarder{putACLRes: &ForwardResult{ + StatusCode: http.StatusNoContent, + ContentType: "application/json; charset=utf-8", + }} + h := NewS3Handler(src).WithLeaderForwarder(fwd) + req := httptest.NewRequest(http.MethodPut, pathS3Buckets+"/orders/acl", + strings.NewReader(`{"acl":"public-read"}`)) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusNoContent, rec.Code) + require.Equal(t, "orders", fwd.lastPutACLBucket) + require.Equal(t, "public-read", fwd.lastPutACLValue) +} + +func TestS3Handler_CreateBucket_ForwarderLeaderUnavailableReturns503(t *testing.T) { + // ErrLeaderUnavailable from the forwarder layer maps to 503 + + // Retry-After:1 — the SPA's retry contract is uniform whether + // the leader is briefly absent or the network hiccupped. + src := ¬LeaderBucketsSource{} + fwd := &stubLeaderForwarder{createBucketErr: ErrLeaderUnavailable} + h := NewS3Handler(src).WithLeaderForwarder(fwd) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusServiceUnavailable, rec.Code) + require.Equal(t, "1", rec.Header().Get("Retry-After")) + require.Contains(t, rec.Body.String(), "leader_unavailable") +} + +func TestS3Handler_CreateBucket_ForwarderTransportErrorReturns503(t *testing.T) { + // Generic gRPC error → 503 + Retry-After. The error is logged + // on the server but never surfaces to the SPA. + src := ¬LeaderBucketsSource{} + fwd := &stubLeaderForwarder{createBucketErr: errors.New("gRPC sentinel TX-1")} + h := NewS3Handler(src).WithLeaderForwarder(fwd) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusServiceUnavailable, rec.Code) + require.Equal(t, "1", rec.Header().Get("Retry-After")) + require.NotContains(t, rec.Body.String(), "TX-1", + "transport error detail must not leak to the client") +} + +func TestS3Handler_CreateBucket_ForwarderNotInvokedForNonNotLeader(t *testing.T) { + // The forwarder gate must run ONLY on ErrBucketsNotLeader; a + // generic source error or AlreadyExists must fall through to + // writeBucketsError. Otherwise a leader-direct 409 would be + // silently re-applied at the leader. + cases := []struct { + name string + err error + wantCode int + }{ + {"already_exists", ErrBucketsAlreadyExists, http.StatusConflict}, + {"forbidden", ErrBucketsForbidden, http.StatusForbidden}, + {"generic", errors.New("opaque storage failure"), http.StatusInternalServerError}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + fwd := &stubLeaderForwarder{} + src := &stubBucketsSource{createErr: tc.err} + h := NewS3Handler(src).WithLeaderForwarder(fwd) + req := httptest.NewRequest(http.MethodPost, pathS3Buckets, + strings.NewReader(validCreateBucketBody())) + req = withFullPrincipal(req) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, tc.wantCode, rec.Code) + require.Empty(t, fwd.lastCreateBucketInput.BucketName, + "forwarder must not be invoked for source error: %s", tc.name) + }) + } +} + +func TestS3Handler_WriteEndpoints_RejectMissingPrincipal(t *testing.T) { + // Without a session principal in the context, writes must + // 401 — SessionAuth normally enforces this; principalForWrite + // is the second-line guard. + cases := []struct { + name string + method string + path string + body string + }{ + {"create", http.MethodPost, pathS3Buckets, validCreateBucketBody()}, + {"put_acl", http.MethodPut, pathS3Buckets + "/foo/acl", `{"acl":"private"}`}, + {"delete", http.MethodDelete, pathS3Buckets + "/foo", ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + h := newS3HandlerForTest(&stubBucketsSource{}) + var body io.Reader + if tc.body != "" { + body = strings.NewReader(tc.body) + } + req := httptest.NewRequest(tc.method, tc.path, body) + rec := httptest.NewRecorder() + h.ServeHTTP(rec, req) + require.Equal(t, http.StatusUnauthorized, rec.Code) + }) + } +} diff --git a/internal/admin/server.go b/internal/admin/server.go index c1e1587f7..d3704a38c 100644 --- a/internal/admin/server.go +++ b/internal/admin/server.go @@ -177,13 +177,21 @@ func buildDynamoHandlerForDeps(deps ServerDeps, logger *slog.Logger) http.Handle } // buildS3HandlerForDeps is the parallel constructor for the S3 -// admin handler. Slice 1 is read-only; the next slice will plumb a -// MapRoleStore and the LeaderForwarder through the same shape. +// admin handler. Wires the live RoleStore so write endpoints +// re-validate the principal on every request, plus the +// LeaderForwarder so a follower hands ErrBucketsNotLeader writes +// off to the leader transparently — both mirror the Dynamo side. func buildS3HandlerForDeps(deps ServerDeps, logger *slog.Logger) http.Handler { if deps.Buckets == nil { return nil } - return NewS3Handler(deps.Buckets).WithLogger(logger) + h := NewS3Handler(deps.Buckets). + WithLogger(logger). + WithRoleStore(MapRoleStore(deps.Roles)) + if deps.Forwarder != nil { + h = h.WithLeaderForwarder(deps.Forwarder) + } + return h } // buildSqsHandlerForDeps is the parallel constructor for the SQS @@ -230,7 +238,10 @@ func (s *Server) APIHandler() http.Handler { // GET /admin/api/v1/dynamo/tables/{name} (auth required) // DELETE /admin/api/v1/dynamo/tables/{name} (auth required, full role) // GET /admin/api/v1/s3/buckets (auth required) +// POST /admin/api/v1/s3/buckets (auth required, full role) // GET /admin/api/v1/s3/buckets/{name} (auth required) +// DELETE /admin/api/v1/s3/buckets/{name} (auth required, full role) +// PUT /admin/api/v1/s3/buckets/{name}/acl (auth required, full role) // GET /admin/api/v1/keyviz/matrix (auth required) // // Body limit applies uniformly. CSRF and Audit middleware apply to diff --git a/main.go b/main.go index 02f09f7fe..1efb9a0d0 100644 --- a/main.go +++ b/main.go @@ -1329,21 +1329,28 @@ func (r *runtimeServerRunner) start() error { if err := startRedisServer(r.ctx, r.lc, r.eg, r.redisAddress, r.shardStore, r.coordinate, r.leaderRedis, r.pubsubRelay, r.metricsRegistry, r.readTracker); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } - // startDynamoDBServer must run BEFORE startRaftServers so the - // resulting DynamoDBServer is available to the leader-side gRPC - // AdminForward registration in startRaftServers (design 3.3). - // Both servers listen on different addresses; the dynamo HTTP - // listener accepting traffic before raft TCP listeners are up - // is no different from the existing startup-race semantics — a - // hit in that window already returned 503 before this reorder. + // startDynamoDBServer + startS3Server must run BEFORE + // startRaftServers so the resulting *DynamoDBServer / *S3Server + // are available to the leader-side gRPC AdminForward registration + // in startRaftServers (design 3.3, P2 slice 2b). Each server + // listens on its own address; them accepting traffic before the + // raft TCP listeners are up is no different from the existing + // startup-race semantics — a hit in that window already returned + // 503 before this reorder. dynamoServer, err := startDynamoDBServer(r.ctx, r.lc, r.eg, r.dynamoAddress, r.shardStore, r.coordinate, r.leaderDynamo, r.metricsRegistry, r.readTracker) if err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } r.dynamoServer = dynamoServer + s3Server, err := startS3Server(r.ctx, r.lc, r.eg, r.s3Address, r.shardStore, r.coordinate, r.leaderS3, r.s3Region, r.s3CredsFile, r.s3PathStyleOnly, r.readTracker) + if err != nil { + return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) + } + r.s3Server = s3Server forwardDeps := adminForwardServerDeps{ - tables: newDynamoTablesSource(r.dynamoServer), - roles: r.roleStore, + tables: newDynamoTablesSource(r.dynamoServer), + buckets: newBucketsSource(r.s3Server), + roles: r.roleStore, } if err := startRaftServers( r.ctx, @@ -1363,11 +1370,6 @@ func (r *runtimeServerRunner) start() error { ); err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) } - s3Server, err := startS3Server(r.ctx, r.lc, r.eg, r.s3Address, r.shardStore, r.coordinate, r.leaderS3, r.s3Region, r.s3CredsFile, r.s3PathStyleOnly, r.readTracker) - if err != nil { - return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) - } - r.s3Server = s3Server sqsServer, err := startSQSServer(r.ctx, r.lc, r.eg, r.sqsAddress, r.shardStore, r.coordinate, r.leaderSQS, r.sqsRegion, r.sqsCredsFile) if err != nil { return waitErrgroupAfterStartupFailure(r.cancel, r.eg, err) diff --git a/main_admin.go b/main_admin.go index d4d42435e..35dc74ffa 100644 --- a/main_admin.go +++ b/main_admin.go @@ -332,6 +332,36 @@ func (b *bucketsBridge) AdminDescribeBucket(ctx context.Context, name string) (* return &summary, true, nil } +func (b *bucketsBridge) AdminCreateBucket(ctx context.Context, principal admin.AuthPrincipal, in admin.CreateBucketRequest) (*admin.BucketSummary, error) { + row, err := b.server.AdminCreateBucket(ctx, convertAdminPrincipal(principal), in.BucketName, in.ACL) + if err != nil { + return nil, translateAdminBucketsError(err) + } + if row == nil { + // AdminCreateBucket guarantees a non-nil summary on success; + // nil here would be an adapter regression. Surface as a typed + // error so the handler logs it as 500 rather than panicking + // on the de-reference at the call site. + return nil, errors.New("admin buckets bridge: adapter returned nil summary on create success") + } + summary := bucketSummaryFromAdapter(*row) + return &summary, nil +} + +func (b *bucketsBridge) AdminPutBucketAcl(ctx context.Context, principal admin.AuthPrincipal, name, acl string) error { + if err := b.server.AdminPutBucketAcl(ctx, convertAdminPrincipal(principal), name, acl); err != nil { + return translateAdminBucketsError(err) + } + return nil +} + +func (b *bucketsBridge) AdminDeleteBucket(ctx context.Context, principal admin.AuthPrincipal, name string) error { + if err := b.server.AdminDeleteBucket(ctx, convertAdminPrincipal(principal), name); err != nil { + return translateAdminBucketsError(err) + } + return nil +} + func bucketSummaryFromAdapter(in adapter.AdminBucketSummary) admin.BucketSummary { return admin.BucketSummary{ Name: in.Name, @@ -343,6 +373,42 @@ func bucketSummaryFromAdapter(in adapter.AdminBucketSummary) admin.BucketSummary } } +// translateAdminBucketsError maps the adapter's S3 admin error +// vocabulary onto the admin-package sentinels the HTTP handler +// matches against. Mirrors translateAdminTablesError on the Dynamo +// side: structured failures (forbidden, not-leader, validation, +// already-exists, etc.) become typed sentinels; everything else +// is forwarded as-is and answered with 500 + a sanitised body. +func translateAdminBucketsError(err error) error { + switch { + case err == nil: + return nil + case errors.Is(err, adapter.ErrAdminForbidden): + return admin.ErrBucketsForbidden + case errors.Is(err, adapter.ErrAdminNotLeader): + return admin.ErrBucketsNotLeader + case errors.Is(err, adapter.ErrAdminBucketAlreadyExists): + return admin.ErrBucketsAlreadyExists + case errors.Is(err, adapter.ErrAdminBucketNotFound): + return admin.ErrBucketsNotFound + case errors.Is(err, adapter.ErrAdminBucketNotEmpty): + return admin.ErrBucketsNotEmpty + case errors.Is(err, adapter.ErrAdminInvalidBucketName), + errors.Is(err, adapter.ErrAdminInvalidACL): + // Surface the adapter's wrapped message via *ValidationError + // so the HTTP handler emits 400 invalid_request with a + // useful explanation instead of leaking raw err.Error(). + return &admin.ValidationError{Message: err.Error()} + case isLeaderChurnError(err): + // Mid-dispatch leadership churn looks like an internal error + // from the kv coordinator; mapping it to the Bucket-side + // not-leader sentinel keeps the SPA's retry contract intact. + return admin.ErrBucketsNotLeader + default: + return err //nolint:wrapcheck // forwarded so the handler logs but does not surface it. + } +} + // dynamoTablesBridge is the thin adapter that re-shapes the adapter's // AdminTableSummary DTO into the admin package's DynamoTableSummary. // The two structs are deliberately isomorphic so this translation diff --git a/main_admin_forward.go b/main_admin_forward.go index c06e57165..41c5f527b 100644 --- a/main_admin_forward.go +++ b/main_admin_forward.go @@ -62,21 +62,28 @@ func buildLeaderForwarder(coordinate kv.Coordinator, connCache *kv.GRPCConnCache // adminForwardServerDeps is the small bundle the gRPC ForwardServer // needs to be reachable from a follower's bridge. Collecting them in // a struct keeps startRaftServers' signature tractable as the wiring -// surface grows. All fields are required; a missing one means the -// ForwardServer is not registered (single-node / leader-only build). +// surface grows. tables + roles are required; buckets is optional +// (a build that ships without the S3 adapter sets it to nil and the +// ForwardServer's S3 dispatch returns 501 for those operations). type adminForwardServerDeps struct { - tables admin.TablesSource - roles admin.RoleStore + tables admin.TablesSource + buckets admin.BucketsSource + roles admin.RoleStore } // readyForRegistration reports whether the bundle has enough -// collaborators to construct + register a ForwardServer. Both fields -// must be non-nil; a nil TablesSource means the build ships without -// the dynamo adapter, and a nil RoleStore means admin auth is not -// configured. Either way, registering the gRPC service would 500 -// every forwarded call, so we silently skip registration instead. +// collaborators to construct + register a ForwardServer. +// RoleStore is always required (the principal re-evaluation step +// has no fallback). At least one of TablesSource or BucketsSource +// must be present — registering with neither would respond 501 to +// every operation, which is functionally identical to not +// registering at all. The dispatcher emits 501 for whichever +// source is nil so an S3-only or Dynamo-only build still serves +// its half of the admin surface (Codex P1 on PR #673: an S3-only +// cluster previously skipped registration entirely and surfaced +// follower forwards as gRPC Unimplemented / 503). func (d adminForwardServerDeps) readyForRegistration() bool { - return d.tables != nil && d.roles != nil + return d.roles != nil && (d.tables != nil || d.buckets != nil) } // registerAdminForwardServer attaches the leader-side gRPC @@ -89,7 +96,11 @@ func registerAdminForwardServer(gs *grpc.Server, deps adminForwardServerDeps, lo if !deps.readyForRegistration() { return } - pb.RegisterAdminForwardServer(gs, admin.NewForwardServer(deps.tables, deps.roles, logger)) + srv := admin.NewForwardServer(deps.tables, deps.roles, logger) + if deps.buckets != nil { + srv = srv.WithBucketsSource(deps.buckets) + } + pb.RegisterAdminForwardServer(gs, srv) } // roleStoreFromFlags builds the same access-key → role map that diff --git a/main_admin_forward_test.go b/main_admin_forward_test.go index f82037036..b01245694 100644 --- a/main_admin_forward_test.go +++ b/main_admin_forward_test.go @@ -110,18 +110,40 @@ func TestRoleStoreFromFlags(t *testing.T) { func TestAdminForwardServerDeps_ReadyForRegistration(t *testing.T) { // The bundle's readyForRegistration gate decides whether - // startRaftServers wires the gRPC ForwardServer at all. A nil - // TablesSource (cluster-only build) or nil RoleStore (admin - // auth disabled) means a registered service would 500 every - // forwarded call — silently skipping registration is the - // preferred behaviour. - require.False(t, adminForwardServerDeps{}.readyForRegistration()) - require.False(t, adminForwardServerDeps{tables: dummyTablesSource{}}.readyForRegistration()) - require.False(t, adminForwardServerDeps{roles: admin.MapRoleStore{}}.readyForRegistration()) + // startRaftServers wires the gRPC ForwardServer at all. + // RoleStore is always required (admin auth disabled means the + // principal re-evaluation step has nothing to compare against). + // At least one of TablesSource / BucketsSource must be present; + // registering with neither would 501 every operation, which is + // indistinguishable from not registering at all. + // + // The S3-only case (Codex P1 on PR #673) used to fail this gate + // because the predicate required tables != nil — a cluster + // started with --dynamoAddr empty but S3 enabled never + // registered AdminForward, and follower-side S3 writes hit + // gRPC Unimplemented / 503 instead of reaching the leader. The + // "buckets only" assertion below pins the fix. + require.False(t, adminForwardServerDeps{}.readyForRegistration(), + "empty bundle must not register") + require.False(t, adminForwardServerDeps{tables: dummyTablesSource{}}.readyForRegistration(), + "missing roles must not register") + require.False(t, adminForwardServerDeps{buckets: dummyBucketsSource{}}.readyForRegistration(), + "missing roles must not register (S3-only)") + require.False(t, adminForwardServerDeps{roles: admin.MapRoleStore{}}.readyForRegistration(), + "roles without any source must not register") require.True(t, adminForwardServerDeps{ tables: dummyTablesSource{}, roles: admin.MapRoleStore{}, - }.readyForRegistration()) + }.readyForRegistration(), "Dynamo-only deployment must register") + require.True(t, adminForwardServerDeps{ + buckets: dummyBucketsSource{}, + roles: admin.MapRoleStore{}, + }.readyForRegistration(), "S3-only deployment must register") + require.True(t, adminForwardServerDeps{ + tables: dummyTablesSource{}, + buckets: dummyBucketsSource{}, + roles: admin.MapRoleStore{}, + }.readyForRegistration(), "full bundle must register") } func TestBuildAdminLeaderForwarder_NilGateReturnsNoForwarder(t *testing.T) { @@ -223,3 +245,31 @@ func (dummyTablesSource) AdminCreateTable(_ context.Context, _ admin.AuthPrincip func (dummyTablesSource) AdminDeleteTable(_ context.Context, _ admin.AuthPrincipal, _ string) error { panic("dummyTablesSource.AdminDeleteTable should not be invoked") } + +// dummyBucketsSource is the smallest concrete admin.BucketsSource +// for the readyForRegistration gate test — symmetric with +// dummyTablesSource. The S3-only branch of the gate (Codex P1 on +// PR #673) needs a non-nil BucketsSource value to assert; using a +// real adapter source would pull S3 wiring into a main_admin test +// that is only checking the registration predicate. +type dummyBucketsSource struct{} + +func (dummyBucketsSource) AdminListBuckets(_ context.Context) ([]admin.BucketSummary, error) { + panic("dummyBucketsSource.AdminListBuckets should not be invoked") +} + +func (dummyBucketsSource) AdminDescribeBucket(_ context.Context, _ string) (*admin.BucketSummary, bool, error) { + panic("dummyBucketsSource.AdminDescribeBucket should not be invoked") +} + +func (dummyBucketsSource) AdminCreateBucket(_ context.Context, _ admin.AuthPrincipal, _ admin.CreateBucketRequest) (*admin.BucketSummary, error) { + panic("dummyBucketsSource.AdminCreateBucket should not be invoked") +} + +func (dummyBucketsSource) AdminPutBucketAcl(_ context.Context, _ admin.AuthPrincipal, _, _ string) error { + panic("dummyBucketsSource.AdminPutBucketAcl should not be invoked") +} + +func (dummyBucketsSource) AdminDeleteBucket(_ context.Context, _ admin.AuthPrincipal, _ string) error { + panic("dummyBucketsSource.AdminDeleteBucket should not be invoked") +} diff --git a/proto/admin_forward.pb.go b/proto/admin_forward.pb.go index bdedffe68..67865005a 100644 --- a/proto/admin_forward.pb.go +++ b/proto/admin_forward.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.36.11 -// protoc v7.34.0 +// protoc v5.29.3 // source: admin_forward.proto package proto @@ -31,6 +31,12 @@ const ( AdminOperation_ADMIN_OP_UNSPECIFIED AdminOperation = 0 AdminOperation_ADMIN_OP_CREATE_TABLE AdminOperation = 1 AdminOperation_ADMIN_OP_DELETE_TABLE AdminOperation = 2 + // S3 bucket admin operations (P2 slice 2b). New values appended + // after the Dynamo block so the wire-format integers for the + // existing operations stay stable. + AdminOperation_ADMIN_OP_CREATE_BUCKET AdminOperation = 3 + AdminOperation_ADMIN_OP_DELETE_BUCKET AdminOperation = 4 + AdminOperation_ADMIN_OP_PUT_BUCKET_ACL AdminOperation = 5 ) // Enum value maps for AdminOperation. @@ -39,11 +45,17 @@ var ( 0: "ADMIN_OP_UNSPECIFIED", 1: "ADMIN_OP_CREATE_TABLE", 2: "ADMIN_OP_DELETE_TABLE", + 3: "ADMIN_OP_CREATE_BUCKET", + 4: "ADMIN_OP_DELETE_BUCKET", + 5: "ADMIN_OP_PUT_BUCKET_ACL", } AdminOperation_value = map[string]int32{ - "ADMIN_OP_UNSPECIFIED": 0, - "ADMIN_OP_CREATE_TABLE": 1, - "ADMIN_OP_DELETE_TABLE": 2, + "ADMIN_OP_UNSPECIFIED": 0, + "ADMIN_OP_CREATE_TABLE": 1, + "ADMIN_OP_DELETE_TABLE": 2, + "ADMIN_OP_CREATE_BUCKET": 3, + "ADMIN_OP_DELETE_BUCKET": 4, + "ADMIN_OP_PUT_BUCKET_ACL": 5, } ) @@ -310,11 +322,14 @@ const file_admin_forward_proto_rawDesc = "" + "statusCode\x12\x18\n" + "\apayload\x18\x02 \x01(\fR\apayload\x12!\n" + "\fcontent_type\x18\x03 \x01(\tR\vcontentType\x12.\n" + - "\x13retry_after_seconds\x18\x04 \x01(\x05R\x11retryAfterSeconds*`\n" + + "\x13retry_after_seconds\x18\x04 \x01(\x05R\x11retryAfterSeconds*\xb5\x01\n" + "\x0eAdminOperation\x12\x18\n" + "\x14ADMIN_OP_UNSPECIFIED\x10\x00\x12\x19\n" + "\x15ADMIN_OP_CREATE_TABLE\x10\x01\x12\x19\n" + - "\x15ADMIN_OP_DELETE_TABLE\x10\x022H\n" + + "\x15ADMIN_OP_DELETE_TABLE\x10\x02\x12\x1a\n" + + "\x16ADMIN_OP_CREATE_BUCKET\x10\x03\x12\x1a\n" + + "\x16ADMIN_OP_DELETE_BUCKET\x10\x04\x12\x1b\n" + + "\x17ADMIN_OP_PUT_BUCKET_ACL\x10\x052H\n" + "\fAdminForward\x128\n" + "\aForward\x12\x14.AdminForwardRequest\x1a\x15.AdminForwardResponse\"\x00B#Z!github.com/bootjp/elastickv/protob\x06proto3" diff --git a/proto/admin_forward.proto b/proto/admin_forward.proto index 99d2d47f8..009daea4a 100644 --- a/proto/admin_forward.proto +++ b/proto/admin_forward.proto @@ -45,6 +45,12 @@ enum AdminOperation { ADMIN_OP_UNSPECIFIED = 0; ADMIN_OP_CREATE_TABLE = 1; ADMIN_OP_DELETE_TABLE = 2; + // S3 bucket admin operations (P2 slice 2b). New values appended + // after the Dynamo block so the wire-format integers for the + // existing operations stay stable. + ADMIN_OP_CREATE_BUCKET = 3; + ADMIN_OP_DELETE_BUCKET = 4; + ADMIN_OP_PUT_BUCKET_ACL = 5; } message AdminForwardRequest { diff --git a/proto/admin_forward_grpc.pb.go b/proto/admin_forward_grpc.pb.go index 9b9169893..78356d823 100644 --- a/proto/admin_forward_grpc.pb.go +++ b/proto/admin_forward_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.6.1 -// - protoc v7.34.0 +// - protoc v5.29.3 // source: admin_forward.proto package proto diff --git a/scripts/rolling-update.env.example b/scripts/rolling-update.env.example index 387e3cb69..3635fb108 100644 --- a/scripts/rolling-update.env.example +++ b/scripts/rolling-update.env.example @@ -58,3 +58,20 @@ RAFTADMIN_ALLOW_INSECURE="true" # opt out. User EXTRA_ENV keys override matching keys in DEFAULT_EXTRA_ENV. DEFAULT_EXTRA_ENV="GOMEMLIMIT=1800MiB" CONTAINER_MEMORY_LIMIT="2500m" + +# Admin dashboard. Disabled by default; flip ADMIN_ENABLED=true to turn the +# listener on. When enabled, ADMIN_SESSION_SIGNING_KEY_FILE plus at least one +# of ADMIN_FULL_ACCESS_KEYS / ADMIN_READ_ONLY_ACCESS_KEYS is required. The +# script bind-mounts the referenced files into the container read-only at +# the same path. Read docs/admin.md and docs/admin_deployment.md before +# enabling on a real deployment. +ADMIN_ENABLED="false" +# ADMIN_ADDRESS="0.0.0.0:8080" +# ADMIN_FULL_ACCESS_KEYS="AKIA_ADMIN" +# ADMIN_READ_ONLY_ACCESS_KEYS="AKIA_OBSERVER1,AKIA_OBSERVER2" +# ADMIN_SESSION_SIGNING_KEY_FILE="/etc/elastickv/admin-hs256.b64" +# ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE="/etc/elastickv/admin-hs256.previous.b64" +# ADMIN_TLS_CERT_FILE="/etc/elastickv/admin-tls.crt" +# ADMIN_TLS_KEY_FILE="/etc/elastickv/admin-tls.key" +# ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK="false" +# ADMIN_ALLOW_INSECURE_DEV_COOKIE="false" diff --git a/scripts/rolling-update.sh b/scripts/rolling-update.sh index 97e23de69..989530e4f 100755 --- a/scripts/rolling-update.sh +++ b/scripts/rolling-update.sh @@ -74,6 +74,44 @@ Optional environment: cascading to host processes (e.g. qemu-guest-agent, systemd). Paired with GOMEMLIMIT=1800MiB so Go GC preempts the kill. Set to "" to disable. + Admin dashboard (opt-in; ADMIN_ENABLED=true turns the listener on) + ADMIN_ENABLED + Master switch (default false). When true, the listener is configured + using the rest of the ADMIN_* variables and the script bind-mounts the + referenced files (signing key, optional previous key, TLS pair) into + the container read-only. Required when enabled: + ADMIN_SESSION_SIGNING_KEY_FILE plus at least one of ADMIN_FULL_ACCESS_KEYS + / ADMIN_READ_ONLY_ACCESS_KEYS. + ADMIN_ADDRESS + host:port for the admin listener (default: daemon-internal 127.0.0.1:8080). + Set to a reachable bind only with ADMIN_TLS_CERT_FILE/_KEY_FILE. + ADMIN_FULL_ACCESS_KEYS, ADMIN_READ_ONLY_ACCESS_KEYS + Comma-separated allow-lists. Same key must NOT appear in both. Sessions + re-validate against this list on every state-changing request, so + revoking a key takes effect on the next admin write rather than + waiting for the JWT to expire. + ADMIN_SESSION_SIGNING_KEY_FILE + Required. Path on the remote host to the base64-encoded HS256 key + (exactly 64 raw bytes — 88 base64 chars with standard padding, or 86 + with RawURLEncoding). Bind-mounted read-only at the same path inside + the container. Must be identical on every node. + ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE + Optional. Path to the previous HS256 key, used only for verification + during a rotation window. New tokens always sign with the primary key. + ADMIN_TLS_CERT_FILE, ADMIN_TLS_KEY_FILE + Optional PEM cert + key for the admin listener. Both must be set + together. Required when ADMIN_ADDRESS is non-loopback unless + ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK=true. + ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK + Default false. When true, allows the admin listener on a non-loopback + bind without TLS. Strongly discouraged. See docs/admin.md for the + interaction with ADMIN_ALLOW_INSECURE_DEV_COOKIE — without the + latter, the dashboard mints Secure cookies the browser will refuse + to send over plaintext, breaking sessions end-to-end. + ADMIN_ALLOW_INSECURE_DEV_COOKIE + Default false. When true, mints session cookies without the Secure + attribute (for local plaintext development only). + Notes: - If RAFT_TO_REDIS_MAP is unset, it is derived automatically from NODES, RAFT_PORT, and REDIS_PORT. @@ -125,6 +163,23 @@ SSH_TARGETS="${SSH_TARGETS:-}" ROLLING_ORDER="${ROLLING_ORDER:-}" RAFT_TO_REDIS_MAP="${RAFT_TO_REDIS_MAP:-}" RAFT_TO_S3_MAP="${RAFT_TO_S3_MAP:-}" + +# Admin dashboard knobs. ADMIN_ENABLED is the master switch; the +# remaining variables only take effect when ADMIN_ENABLED=true. +# Required when enabled: ADMIN_SESSION_SIGNING_KEY_FILE plus at +# least one of ADMIN_FULL_ACCESS_KEYS / ADMIN_READ_ONLY_ACCESS_KEYS. +# See docs/admin.md and docs/admin_deployment.md for the contract. +ADMIN_ENABLED="${ADMIN_ENABLED:-false}" +ADMIN_ADDRESS="${ADMIN_ADDRESS:-}" +ADMIN_FULL_ACCESS_KEYS="${ADMIN_FULL_ACCESS_KEYS:-}" +ADMIN_READ_ONLY_ACCESS_KEYS="${ADMIN_READ_ONLY_ACCESS_KEYS:-}" +ADMIN_SESSION_SIGNING_KEY_FILE="${ADMIN_SESSION_SIGNING_KEY_FILE:-}" +ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE="${ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE:-}" +ADMIN_TLS_CERT_FILE="${ADMIN_TLS_CERT_FILE:-}" +ADMIN_TLS_KEY_FILE="${ADMIN_TLS_KEY_FILE:-}" +ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK="${ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK:-false}" +ADMIN_ALLOW_INSECURE_DEV_COOKIE="${ADMIN_ALLOW_INSECURE_DEV_COOKIE:-false}" + # Container OOM defenses. See usage() for rationale. Empty string disables. DEFAULT_EXTRA_ENV="${DEFAULT_EXTRA_ENV-GOMEMLIMIT=1800MiB}" CONTAINER_MEMORY_LIMIT="${CONTAINER_MEMORY_LIMIT-2500m}" @@ -732,6 +787,18 @@ run_container() { memory_flags=(--memory "$CONTAINER_MEMORY_LIMIT") fi + # Admin dashboard plumbing. The admin listener is opt-in; when + # ADMIN_ENABLED=false the build_admin_flags helper produces no + # arguments and no extra bind-mounts, so existing deployments keep + # behaving identically. When enabled, the helper also validates that + # every referenced file exists on the remote host (signing key, + # optional previous key, optional TLS pair) before docker run, so a + # missing path fails fast on this node instead of silently leaving a + # node with auth disabled. + local admin_flags=() + local admin_volumes=() + build_admin_flags admin_flags admin_volumes + docker run -d \ --name "$CONTAINER_NAME" \ --restart unless-stopped \ @@ -739,6 +806,7 @@ run_container() { "${memory_flags[@]}" \ -v "$DATA_DIR:$DATA_DIR" \ "${s3_creds_volume[@]}" \ + "${admin_volumes[@]}" \ "${extra_env_flags[@]}" \ "$IMAGE" "$SERVER_ENTRYPOINT" \ --address "${NODE_HOST}:${RAFT_PORT}" \ @@ -748,7 +816,97 @@ run_container() { --raftEngine "$RAFT_ENGINE" \ --raftDataDir "$DATA_DIR" \ --raftRedisMap "$RAFT_TO_REDIS_MAP" \ - "${s3_flags[@]}" >/dev/null + "${s3_flags[@]}" \ + "${admin_flags[@]}" >/dev/null +} + +# build_admin_flags emits the --admin* flag list and bind-mount list +# the docker run needs to expose the admin dashboard listener. Both +# output arrays are populated by-reference (bash 4.3+ namerefs) so +# run_container can compose them with the rest of the docker +# arguments without globals leaking between rollout iterations. When +# ADMIN_ENABLED is anything other than the literal string "true", +# both arrays are left empty and the function returns silently — the +# admin listener stays off, matching the daemon's hard default. +# +# The validation pass is intentional: a missing signing-key file or +# missing TLS pair on a remote host is the kind of misconfiguration +# that would otherwise hard-error the daemon at startup AFTER the +# previous container was already stopped, leaving the node +# unhealthy. Failing here aborts before stop_container fires. +build_admin_flags() { + local -n _flags="$1" + local -n _volumes="$2" + + if [[ "${ADMIN_ENABLED}" != "true" ]]; then + return 0 + fi + + if [[ -z "${ADMIN_SESSION_SIGNING_KEY_FILE}" ]]; then + echo "ADMIN_ENABLED=true requires ADMIN_SESSION_SIGNING_KEY_FILE; aborting" >&2 + exit 1 + fi + if [[ -z "${ADMIN_FULL_ACCESS_KEYS}" && -z "${ADMIN_READ_ONLY_ACCESS_KEYS}" ]]; then + echo "ADMIN_ENABLED=true requires at least one of ADMIN_FULL_ACCESS_KEYS / ADMIN_READ_ONLY_ACCESS_KEYS; aborting" >&2 + exit 1 + fi + if [[ ! -f "$ADMIN_SESSION_SIGNING_KEY_FILE" || ! -r "$ADMIN_SESSION_SIGNING_KEY_FILE" ]]; then + echo "ADMIN_SESSION_SIGNING_KEY_FILE='$ADMIN_SESSION_SIGNING_KEY_FILE' is missing or unreadable on this host; aborting" >&2 + exit 1 + fi + + _flags+=(--adminEnabled) + _flags+=(--adminSessionSigningKeyFile "$ADMIN_SESSION_SIGNING_KEY_FILE") + _volumes+=(-v "${ADMIN_SESSION_SIGNING_KEY_FILE}:${ADMIN_SESSION_SIGNING_KEY_FILE}:ro") + + if [[ -n "${ADMIN_ADDRESS}" ]]; then + _flags+=(--adminListen "$ADMIN_ADDRESS") + fi + if [[ -n "${ADMIN_FULL_ACCESS_KEYS}" ]]; then + _flags+=(--adminFullAccessKeys "$ADMIN_FULL_ACCESS_KEYS") + fi + if [[ -n "${ADMIN_READ_ONLY_ACCESS_KEYS}" ]]; then + _flags+=(--adminReadOnlyAccessKeys "$ADMIN_READ_ONLY_ACCESS_KEYS") + fi + + if [[ -n "${ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE}" ]]; then + if [[ ! -f "$ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE" || ! -r "$ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE" ]]; then + echo "ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE='$ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE' is missing or unreadable; aborting" >&2 + exit 1 + fi + _flags+=(--adminSessionSigningKeyPreviousFile "$ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE") + _volumes+=(-v "${ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE}:${ADMIN_SESSION_SIGNING_KEY_PREVIOUS_FILE}:ro") + fi + + # TLS pair must be set together. The daemon already rejects partial + # configs at startup, but failing earlier here gives the operator a + # script-level error pointing at the variable name, instead of the + # daemon's "exactly one of cert/key" message after the container is + # already running. + if [[ -n "${ADMIN_TLS_CERT_FILE}" || -n "${ADMIN_TLS_KEY_FILE}" ]]; then + if [[ -z "${ADMIN_TLS_CERT_FILE}" || -z "${ADMIN_TLS_KEY_FILE}" ]]; then + echo "ADMIN_TLS_CERT_FILE and ADMIN_TLS_KEY_FILE must be set together; aborting" >&2 + exit 1 + fi + if [[ ! -f "$ADMIN_TLS_CERT_FILE" || ! -r "$ADMIN_TLS_CERT_FILE" ]]; then + echo "ADMIN_TLS_CERT_FILE='$ADMIN_TLS_CERT_FILE' is missing or unreadable; aborting" >&2 + exit 1 + fi + if [[ ! -f "$ADMIN_TLS_KEY_FILE" || ! -r "$ADMIN_TLS_KEY_FILE" ]]; then + echo "ADMIN_TLS_KEY_FILE='$ADMIN_TLS_KEY_FILE' is missing or unreadable; aborting" >&2 + exit 1 + fi + _flags+=(--adminTLSCertFile "$ADMIN_TLS_CERT_FILE" --adminTLSKeyFile "$ADMIN_TLS_KEY_FILE") + _volumes+=(-v "${ADMIN_TLS_CERT_FILE}:${ADMIN_TLS_CERT_FILE}:ro") + _volumes+=(-v "${ADMIN_TLS_KEY_FILE}:${ADMIN_TLS_KEY_FILE}:ro") + fi + + if [[ "${ADMIN_ALLOW_PLAINTEXT_NON_LOOPBACK}" == "true" ]]; then + _flags+=(--adminAllowPlaintextNonLoopback) + fi + if [[ "${ADMIN_ALLOW_INSECURE_DEV_COOKIE}" == "true" ]]; then + _flags+=(--adminAllowInsecureDevCookie) + fi } require_passwordless_sudo() {