From e94f93f40d3c43a2825bb21ac9a6558dd2279863 Mon Sep 17 00:00:00 2001 From: Jason Pickering Date: Wed, 3 Jun 2026 09:06:05 +0200 Subject: [PATCH 1/3] perf: isolate UsersPerformanceTest scenarios and reuse session The "GET User - by uid" p95 assertion kept failing on CI (e.g. 783ms vs 700ms) while the endpoint is ~10-24ms in isolation. The latency was an artifact of how the test measured, not the endpoint: 1. Parallel mode (primary). Default mode=parallel ran all 7 scenarios concurrently, and on the single shared self-hosted CI runner the bcrypt-heavy write scenarios (password hashing on POST/PUT/REPLICA payloads, plus per-virtual-user login) saturated CPU and stretched the GET tail. Running scenarios sequentially takes GET p95 from 783ms on CI to ~24ms in isolation. Faster multi-core dev machines hide the contention entirely, which is why it never reproduced locally. 2. One-time auth bcrypt charged to the first measured request (secondary). DHIS2 is stateful (SessionCreationPolicy.IF_REQUIRED + HttpSessionSecurityContextRepository), so with the default cookie jar the session is reused and bcrypt is paid only once per virtual user -- but with protocol-level basicAuth that one-time ~90ms cost landed inside the first GET/POST/... request and surfaced in their p95/max (e.g. POST p95 172ms -> 103ms once isolated). There is NO per-request bcrypt and no missing auth cache; this is expected Spring Security behaviour. 3. Tiny sample size made p95 a coin flip (~10-20 samples/scenario). Changes: - Default mode to sequential so each scenario is measured in isolation. parallel remains available as an opt-in mixed-load stress mode. - Authenticate once per virtual user via a separately-named request and reuse the JSESSIONID cookie, so the one-time auth bcrypt is excluded from the per-endpoint assertions. Relies on CSRF being disabled (DHIS2 default) so session-cookie writes are accepted. - Bump iterations (load 10->30, smoke 3->10) for a more stable p95. Verified locally (sequential): GET p50 ~11ms, p95 ~19-24ms; all write scenarios succeed under session-cookie auth. Thresholds were calibrated under the old parallel regime and are now far too loose; they are flagged in-code as pending recalibration from fresh nightly baselines. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../workflows/performance-tests-scheduled.yml | 4 +- .../test/platform/UsersPerformanceTest.java | 68 ++++++++++++++----- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/.github/workflows/performance-tests-scheduled.yml b/.github/workflows/performance-tests-scheduled.yml index ce10772df085..f326fab753cc 100644 --- a/.github/workflows/performance-tests-scheduled.yml +++ b/.github/workflows/performance-tests-scheduled.yml @@ -39,13 +39,13 @@ jobs: DHIS2_IMAGE=dhis2/core-dev:latest SIMULATION_CLASS=org.hisp.dhis.test.platform.UsersPerformanceTest WARMUP=2 - MVN_ARGS="-Diterations=3 -Dprofile=smoke" + MVN_ARGS="-Diterations=10 -Dprofile=smoke" slack_webhook_secret: SLACK_WEBHOOK_PLATFORM_PERFORMANCE - name: users-load env: | DHIS2_IMAGE=dhis2/core-dev:latest SIMULATION_CLASS=org.hisp.dhis.test.platform.UsersPerformanceTest - MVN_ARGS=-Diterations=10 + MVN_ARGS=-Diterations=30 slack_webhook_secret: SLACK_WEBHOOK_PLATFORM_PERFORMANCE uses: ./.github/workflows/performance-tests.yml with: diff --git a/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java b/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java index e485d2e0d283..94b7c819cba3 100644 --- a/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java +++ b/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java @@ -93,7 +93,9 @@ *
  • {@code orgUnitUid} (default: {@code ImspTQPwCqd} — "Sierra Leone" root) *
  • {@code userGroupUid} (default: {@code wl5cDMuUhmF} — "Administrators") *
  • {@code iterations} (default: {@code 3}) - *
  • {@code mode} (default: {@code parallel}; use {@code sequential} to chain scenarios) + *
  • {@code mode} (default: {@code sequential}; runs each scenario in isolation so timings + * reflect single-endpoint latency. Use {@code parallel} to run all scenarios concurrently as + * a mixed-load stress test.) * */ public class UsersPerformanceTest extends Simulation { @@ -137,11 +139,16 @@ private static String prop(String key, String defaultValue) { private static final String BASE_URL = prop("baseUrl", "http://localhost:8080"); private static final String USERNAME = prop("username", "admin"); private static final String PASSWORD = prop("password", "district"); + private static final String BASIC_AUTH = + Base64.getEncoder() + .encodeToString((USERNAME + ":" + PASSWORD).getBytes(StandardCharsets.UTF_8)); private static final String USER_ROLE_UID = prop("userRoleUid", "Euq3XfEIEbx"); private static final String ORG_UNIT_UID = prop("orgUnitUid", "ImspTQPwCqd"); private static final String USER_GROUP_UID = prop("userGroupUid", "wl5cDMuUhmF"); private static final int ITERATIONS = Integer.parseInt(prop("iterations", "3")); - private static final String MODE = prop("mode", "parallel"); + // Default to sequential so each scenario is measured in isolation (single-endpoint latency). + // Set -Dmode=parallel for a mixed-load stress test where all scenarios run concurrently. + private static final String MODE = prop("mode", "sequential"); private enum Profile { SMOKE, @@ -167,10 +174,18 @@ private record Thresholds(int p95, int max) {} private static final int PATCH_GROUP_COUNT = Integer.parseInt(prop("patchGroupCount", "7")); // Thresholds per profile, slack 1.5× observed p95/max, rounded to nearest 50ms. - // LOAD: 10 nightly runs × 10 iterations (2026-04-02 – 2026-04-11), 100 samples each. - // SMOKE: 14 nightly runs × 3 iterations (2026-04-09 – 2026-04-22), 42–90 samples each. - // Recalibrate with: scripts/download-user-perf-results.sh --test-name users-smoke - // scripts/analyze-user-perf-results.py --profile smoke + // + // STALE — PENDING RECALIBRATION. The values below were calibrated under the OLD regime: all + // scenarios run in parallel, which on the shared CI runner caused CPU contention that inflated + // the + // tail (GET p95 reached 783ms on CI vs ~81ms in isolation). As of 2026-06-03 the test runs + // scenarios sequentially (in isolation) and authenticates once per virtual user, moving the + // one-time session-establishing bcrypt out of the measured requests, so measured times dropped + // sharply. These thresholds are now far too loose and provide little regression protection until + // recalibrated from fresh nightly baselines (~1 week of runs). + // Old baselines: LOAD 10 nightly × 10 iter (2026-04-02–04-11); SMOKE 14 nightly × 3 iter. + // Recalibrate with: scripts/download-user-perf-results.sh --test-name users-smoke + // scripts/analyze-user-perf-results.py --profile smoke private static final Map POST_THRESH = Map.of(Profile.SMOKE, new Thresholds(1150, 1200), Profile.LOAD, new Thresholds(1250, 1500)); @@ -371,16 +386,35 @@ public void before() { } public UsersPerformanceTest() { + // No protocol-level basicAuth. DHIS2 is stateful (SessionCreationPolicy.IF_REQUIRED + + // HttpSessionSecurityContextRepository), so once a session exists Spring Security reuses the + // SecurityContext and skips re-authentication; bcrypt password verification (~70ms) is only + // paid on the FIRST request that establishes the session. With protocol basicAuth + the default + // cookie jar, that first-request bcrypt landed inside the measured GET/PUT/... requests and + // surfaced in their p95 (a fixed ~80ms auth artifact, not endpoint latency). Instead we + // authenticate once per virtual user via a separately-named request (see `authenticate`) so the + // measured requests reflect endpoint cost only. HttpProtocolBuilder httpProtocol = - http.baseUrl(BASE_URL) - .acceptHeader("application/json") - .disableCaching() - .basicAuth(USERNAME, PASSWORD); + http.baseUrl(BASE_URL).acceptHeader("application/json").disableCaching(); + + // Authenticate once per virtual user (paying the one-time bcrypt to establish the session), + // then + // reuse the JSESSIONID cookie for all measured requests. The login request is named separately + // so its bcrypt cost is NOT counted in the per-endpoint assertions. Relies on CSRF being + // disabled (the DHIS2 default) so session-cookie writes (POST/PUT/PATCH/DELETE) are accepted + // without a CSRF token. + ChainBuilder authenticate = + exec(flushCookieJar()) + .exec( + http("Authenticate (session login)") + .get("/api/me") + .header("Authorization", "Basic " + BASIC_AUTH) + .check(status().is(200))); // ── Scenario: POST /api/users ──────────────────────────────────────────── ScenarioBuilder postScenario = scenario(POST_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { @@ -398,7 +432,7 @@ public UsersPerformanceTest() { // ── Scenario: GET /api/users/{uid} ────────────────────────────────────── ScenarioBuilder getScenario = scenario(GET_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { @@ -412,7 +446,7 @@ public UsersPerformanceTest() { // ── Scenario: PUT /api/users/{uid} ────────────────────────────────────── ScenarioBuilder putScenario = scenario(PUT_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { @@ -433,7 +467,7 @@ public UsersPerformanceTest() { // ── Scenario: PATCH /api/users/{uid} ──────────────────────────────────── ScenarioBuilder patchScenario = scenario(PATCH_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { @@ -456,7 +490,7 @@ public UsersPerformanceTest() { // ── Scenario: PATCH /api/users/{uid} userGroups ──────────────────────── ScenarioBuilder patchGroupsScenario = scenario(PATCH_GROUPS_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { @@ -476,7 +510,7 @@ public UsersPerformanceTest() { // ── Scenario: POST /api/users/{uid}/replica ───────────────────────────── ScenarioBuilder replicaScenario = scenario(REPLICA_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { @@ -503,7 +537,7 @@ public UsersPerformanceTest() { // Users are pre-created in before(), so this scenario measures only DELETE time. ScenarioBuilder deleteScenario = scenario(DELETE_REQUEST) - .exec(flushCookieJar()) + .exec(authenticate) .repeat(ITERATIONS) .on( exec(session -> { From 031e4782d9246e10a1de5632946b7be92afc1486 Mon Sep 17 00:00:00 2001 From: Jason Pickering Date: Sat, 6 Jun 2026 18:23:42 +0200 Subject: [PATCH 2/3] perf: run UsersPerformanceTest against the platform-perf DB (#24108) Replaces the Sierra Leone demo DB with the platform-perf DB (~250k users, ~250k org units) as the default target for the users performance test, so timings reflect a realistically sized instance. - Default userRoleUid/orgUnitUid/userGroupUid now point at platform-perf metadata: the largest role and group (~83k each, to expose user create/delete N+1s) and the org unit hierarchy root. Still overridable via -D or a configFile. - CI users-smoke/users-load jobs build the platform-perf DB (DB_DIR=dev, DB_TYPE=platform-perf, DB_VERSION=43-2026-03-10). - Interim DB fix: the dump ships hibernate_sequence at ~965 while holding ~250k bulk-seeded rows, so every insert collides on the primary key and write operations return 409. A post-restore step in the DB image build advances hibernate_sequence past the seeded ids (forward-only; no-op on dumps that don't need it). Stopgap until the dump is regenerated with the sequence set correctly. - Threshold comment updated: the values still reflect Sierra Leone and must be recalibrated from fresh nightly baselines on platform-perf. Verified: DB image builds and applies the fix (hibernate_sequence -> 1e8, max userinfoid 250004); full users test passes 120/120 against a local platform-perf instance with the new defaults. Based on #24107. Co-authored-by: Claude Opus 4.8 (1M context) --- .../workflows/performance-tests-scheduled.yml | 6 +++ .../docker/Dockerfile.postgres | 4 ++ .../docker/docker-entrypoint-build.sh | 9 +++++ .../docker/fix-hibernate-sequence.sql | 22 +++++++++++ .../test/platform/UsersPerformanceTest.java | 38 ++++++++++--------- 5 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql diff --git a/.github/workflows/performance-tests-scheduled.yml b/.github/workflows/performance-tests-scheduled.yml index f326fab753cc..54f2609fe4c7 100644 --- a/.github/workflows/performance-tests-scheduled.yml +++ b/.github/workflows/performance-tests-scheduled.yml @@ -38,6 +38,9 @@ jobs: env: | DHIS2_IMAGE=dhis2/core-dev:latest SIMULATION_CLASS=org.hisp.dhis.test.platform.UsersPerformanceTest + DB_DIR=dev + DB_TYPE=platform-perf + DB_VERSION=43-2026-03-10 WARMUP=2 MVN_ARGS="-Diterations=10 -Dprofile=smoke" slack_webhook_secret: SLACK_WEBHOOK_PLATFORM_PERFORMANCE @@ -45,6 +48,9 @@ jobs: env: | DHIS2_IMAGE=dhis2/core-dev:latest SIMULATION_CLASS=org.hisp.dhis.test.platform.UsersPerformanceTest + DB_DIR=dev + DB_TYPE=platform-perf + DB_VERSION=43-2026-03-10 MVN_ARGS=-Diterations=30 slack_webhook_secret: SLACK_WEBHOOK_PLATFORM_PERFORMANCE uses: ./.github/workflows/performance-tests.yml diff --git a/dhis-2/dhis-test-performance/docker/Dockerfile.postgres b/dhis-2/dhis-test-performance/docker/Dockerfile.postgres index 62b229dc1b66..90f34899dda3 100644 --- a/dhis-2/dhis-test-performance/docker/Dockerfile.postgres +++ b/dhis-2/dhis-test-performance/docker/Dockerfile.postgres @@ -35,6 +35,10 @@ ENV POSTGRES_USER=dhis \ COPY --from=downloader /tmp/dump.sql.gz /tmp/dump.sql.gz +# Interim post-restore fix: advance hibernate_sequence past the platform-perf dump's seeded ids +# (see fix-hibernate-sequence.sql). Forward-only, so it is a no-op on other dumps. +COPY fix-hibernate-sequence.sql /tmp/fix-hibernate-sequence.sql + COPY docker-entrypoint-build.sh /usr/local/bin/ # Initialize the database and restore the dump at build time # Uses a modified version of the official PostgreSQL docker-entrypoint.sh diff --git a/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh b/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh index 6a6a0463f089..22640402e9da 100644 --- a/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh +++ b/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh @@ -145,6 +145,15 @@ build_init() { echo "Restoring database dump..." gunzip -c /tmp/dump.sql.gz | docker_process_sql -d "$POSTGRES_DB" echo "Database dump restored successfully" + + # Interim fix: advance hibernate_sequence past the dump's seeded ids so write + # operations don't collide with existing primary keys. Forward-only and harmless on + # dumps that don't need it. See fix-hibernate-sequence.sql. Remove once the dump is fixed. + if [ -f /tmp/fix-hibernate-sequence.sql ]; then + echo "Applying hibernate_sequence fix..." + docker_process_sql -d "$POSTGRES_DB" -f /tmp/fix-hibernate-sequence.sql + echo "hibernate_sequence fix applied" + fi fi docker_temp_server_stop diff --git a/dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql b/dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql new file mode 100644 index 000000000000..e01d84c2776a --- /dev/null +++ b/dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql @@ -0,0 +1,22 @@ +-- Interim workaround for a defect in the platform-perf DB dump. +-- +-- DHIS2 assigns primary keys for most tables (userinfo, organisationunit, usergroup, ...) from a +-- single shared sequence, `hibernate_sequence`. The platform-perf dump was bulk-seeded with ~250k +-- users and ~250k org units at high ids, but ships `hibernate_sequence` set to ~965. As a result +-- every insert reuses an id that already exists and fails with e.g. +-- ERROR: duplicate key value violates unique constraint "userinfo_pkey" +-- Detail: Key (userinfoid)=(973) already exists. +-- which makes every write operation (user create/replicate/delete) return HTTP 409. The users +-- performance test is write-heavy, so it cannot run against the dump until the sequence is advanced +-- past the seeded ids. +-- +-- This advances `hibernate_sequence` to a value comfortably above any seeded id. It is FORWARD-ONLY +-- (GREATEST with the current value), so it is a harmless no-op on correctly generated dumps +-- (sierra-leone, hmis) and never moves a sequence backwards (which would reintroduce collisions). +-- +-- This is a stopgap applied at DB-image build time. The proper fix is to regenerate the +-- platform-perf dump with the sequence set correctly; remove this script once that lands. +SELECT setval( + 'hibernate_sequence', + GREATEST(100000000::bigint, (SELECT last_value FROM hibernate_sequence)), + true); diff --git a/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java b/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java index 94b7c819cba3..d5015a662109 100644 --- a/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java +++ b/dhis-2/dhis-test-performance/src/test/java/org/hisp/dhis/test/platform/UsersPerformanceTest.java @@ -55,7 +55,8 @@ /** * Performance test for single-user CRUD operations on {@code /api/users}. * - *

    Five scenarios, all running against the Sierra Leone demo DB by default: + *

    Five scenarios, all running against the platform-perf DB (~250k users / ~250k org units) by + * default: * *

      *
    1. POST — creates a new user (with optional group assignment) @@ -71,7 +72,7 @@ * itself — no existing database users are touched. * *

      Run with {@code -DuserGroupUid=} pointing at a group with large membership to expose N+1 - * problems in POST and DELETE. The default points at "Administrators" on the SL demo DB. + * problems in POST and DELETE. The default points at a ~83k-member group on the platform-perf DB. * *

      Configuration can be provided via a {@code .properties} file instead of individual {@code -D} * flags: @@ -82,16 +83,16 @@ * * Individual {@code -D} flags always override values from the config file. * - *

      Available properties (with SL demo DB defaults): + *

      Available properties (with platform-perf DB defaults): * *

        *
      • {@code configFile} — path to a {@code .properties} file (optional) *
      • {@code baseUrl} (default: {@code http://localhost:8080}) *
      • {@code username} (default: {@code admin}) *
      • {@code password} (default: {@code district}) - *
      • {@code userRoleUid} (default: {@code Euq3XfEIEbx} — "Data entry clerk") - *
      • {@code orgUnitUid} (default: {@code ImspTQPwCqd} — "Sierra Leone" root) - *
      • {@code userGroupUid} (default: {@code wl5cDMuUhmF} — "Administrators") + *
      • {@code userRoleUid} (default: {@code MoRvPzDH7lc} — generic role with ~83k users) + *
      • {@code orgUnitUid} (default: {@code VCCdfC9pvMA} — root org unit) + *
      • {@code userGroupUid} (default: {@code KOvR9SAEeEZ} — group with ~83k members) *
      • {@code iterations} (default: {@code 3}) *
      • {@code mode} (default: {@code sequential}; runs each scenario in isolation so timings * reflect single-endpoint latency. Use {@code parallel} to run all scenarios concurrently as @@ -142,9 +143,13 @@ private static String prop(String key, String defaultValue) { private static final String BASIC_AUTH = Base64.getEncoder() .encodeToString((USERNAME + ":" + PASSWORD).getBytes(StandardCharsets.UTF_8)); - private static final String USER_ROLE_UID = prop("userRoleUid", "Euq3XfEIEbx"); - private static final String ORG_UNIT_UID = prop("orgUnitUid", "ImspTQPwCqd"); - private static final String USER_GROUP_UID = prop("userGroupUid", "wl5cDMuUhmF"); + // Defaults target the platform-perf DB (~250k users / ~250k org units). Role and group are the + // largest available (~83k each) to expose user create/delete N+1s; org unit is the hierarchy + // root. + // Override via -D or a configFile to run against a different instance. + private static final String USER_ROLE_UID = prop("userRoleUid", "MoRvPzDH7lc"); + private static final String ORG_UNIT_UID = prop("orgUnitUid", "VCCdfC9pvMA"); + private static final String USER_GROUP_UID = prop("userGroupUid", "KOvR9SAEeEZ"); private static final int ITERATIONS = Integer.parseInt(prop("iterations", "3")); // Default to sequential so each scenario is measured in isolation (single-endpoint latency). // Set -Dmode=parallel for a mixed-load stress test where all scenarios run concurrently. @@ -175,14 +180,13 @@ private record Thresholds(int p95, int max) {} // Thresholds per profile, slack 1.5× observed p95/max, rounded to nearest 50ms. // - // STALE — PENDING RECALIBRATION. The values below were calibrated under the OLD regime: all - // scenarios run in parallel, which on the shared CI runner caused CPU contention that inflated - // the - // tail (GET p95 reached 783ms on CI vs ~81ms in isolation). As of 2026-06-03 the test runs - // scenarios sequentially (in isolation) and authenticates once per virtual user, moving the - // one-time session-establishing bcrypt out of the measured requests, so measured times dropped - // sharply. These thresholds are now far too loose and provide little regression protection until - // recalibrated from fresh nightly baselines (~1 week of runs). + // STALE — PENDING RECALIBRATION on the platform-perf DB. These values were calibrated against the + // Sierra Leone demo DB under the old parallel regime. As of 2026-06-03 the test (a) runs + // scenarios + // sequentially and authenticates once per virtual user, and (b) targets the platform-perf DB + // (~250k users / ~250k org units), where operations are materially slower (e.g. GET ~6× SL). The + // numbers below reflect neither change and must be recalibrated from fresh nightly baselines on + // platform-perf (~1 week of runs) before they provide meaningful regression protection. // Old baselines: LOAD 10 nightly × 10 iter (2026-04-02–04-11); SMOKE 14 nightly × 3 iter. // Recalibrate with: scripts/download-user-perf-results.sh --test-name users-smoke // scripts/analyze-user-perf-results.py --profile smoke From dd34aa8ad008a6388b10edc117a99fb16a058310 Mon Sep 17 00:00:00 2001 From: Jason Pickering Date: Mon, 8 Jun 2026 10:37:51 +0200 Subject: [PATCH 3/3] perf: drop hibernate_sequence stopgap, use regenerated platform-perf dump The platform-perf dump has been regenerated with hibernate_sequence set correctly (44-2026-06-03), so the interim post-restore workaround is no longer needed. Removes fix-hibernate-sequence.sql and the Dockerfile / docker-entrypoint-build.sh hooks that applied it. Also points DB_VERSION at the regenerated dump and wires the users-load job to platform-perf (it was still defaulting to the Sierra Leone DB, unlike users-smoke). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../workflows/performance-tests-scheduled.yml | 5 ++++- .../docker/Dockerfile.postgres | 4 ---- .../docker/docker-entrypoint-build.sh | 9 -------- .../docker/fix-hibernate-sequence.sql | 22 ------------------- 4 files changed, 4 insertions(+), 36 deletions(-) delete mode 100644 dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql diff --git a/.github/workflows/performance-tests-scheduled.yml b/.github/workflows/performance-tests-scheduled.yml index ce9413c04179..195a7c81ed77 100644 --- a/.github/workflows/performance-tests-scheduled.yml +++ b/.github/workflows/performance-tests-scheduled.yml @@ -40,7 +40,7 @@ jobs: SIMULATION_CLASS=org.hisp.dhis.test.platform.UsersPerformanceTest DB_DIR=dev DB_TYPE=platform-perf - DB_VERSION=43-2026-03-10 + DB_VERSION=44-2026-06-03 WARMUP=2 MVN_ARGS="-Diterations=10 -Dprofile=smoke" slack_webhook_secret: SLACK_WEBHOOK_PLATFORM_PERFORMANCE @@ -48,6 +48,9 @@ jobs: env: | DHIS2_IMAGE=dhis2/core-dev:latest SIMULATION_CLASS=org.hisp.dhis.test.platform.UsersPerformanceTest + DB_DIR=dev + DB_TYPE=platform-perf + DB_VERSION=44-2026-06-03 MVN_ARGS=-Diterations=30 slack_webhook_secret: SLACK_WEBHOOK_PLATFORM_PERFORMANCE uses: ./.github/workflows/performance-tests.yml diff --git a/dhis-2/dhis-test-performance/docker/Dockerfile.postgres b/dhis-2/dhis-test-performance/docker/Dockerfile.postgres index 90f34899dda3..62b229dc1b66 100644 --- a/dhis-2/dhis-test-performance/docker/Dockerfile.postgres +++ b/dhis-2/dhis-test-performance/docker/Dockerfile.postgres @@ -35,10 +35,6 @@ ENV POSTGRES_USER=dhis \ COPY --from=downloader /tmp/dump.sql.gz /tmp/dump.sql.gz -# Interim post-restore fix: advance hibernate_sequence past the platform-perf dump's seeded ids -# (see fix-hibernate-sequence.sql). Forward-only, so it is a no-op on other dumps. -COPY fix-hibernate-sequence.sql /tmp/fix-hibernate-sequence.sql - COPY docker-entrypoint-build.sh /usr/local/bin/ # Initialize the database and restore the dump at build time # Uses a modified version of the official PostgreSQL docker-entrypoint.sh diff --git a/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh b/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh index 22640402e9da..6a6a0463f089 100644 --- a/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh +++ b/dhis-2/dhis-test-performance/docker/docker-entrypoint-build.sh @@ -145,15 +145,6 @@ build_init() { echo "Restoring database dump..." gunzip -c /tmp/dump.sql.gz | docker_process_sql -d "$POSTGRES_DB" echo "Database dump restored successfully" - - # Interim fix: advance hibernate_sequence past the dump's seeded ids so write - # operations don't collide with existing primary keys. Forward-only and harmless on - # dumps that don't need it. See fix-hibernate-sequence.sql. Remove once the dump is fixed. - if [ -f /tmp/fix-hibernate-sequence.sql ]; then - echo "Applying hibernate_sequence fix..." - docker_process_sql -d "$POSTGRES_DB" -f /tmp/fix-hibernate-sequence.sql - echo "hibernate_sequence fix applied" - fi fi docker_temp_server_stop diff --git a/dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql b/dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql deleted file mode 100644 index e01d84c2776a..000000000000 --- a/dhis-2/dhis-test-performance/docker/fix-hibernate-sequence.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Interim workaround for a defect in the platform-perf DB dump. --- --- DHIS2 assigns primary keys for most tables (userinfo, organisationunit, usergroup, ...) from a --- single shared sequence, `hibernate_sequence`. The platform-perf dump was bulk-seeded with ~250k --- users and ~250k org units at high ids, but ships `hibernate_sequence` set to ~965. As a result --- every insert reuses an id that already exists and fails with e.g. --- ERROR: duplicate key value violates unique constraint "userinfo_pkey" --- Detail: Key (userinfoid)=(973) already exists. --- which makes every write operation (user create/replicate/delete) return HTTP 409. The users --- performance test is write-heavy, so it cannot run against the dump until the sequence is advanced --- past the seeded ids. --- --- This advances `hibernate_sequence` to a value comfortably above any seeded id. It is FORWARD-ONLY --- (GREATEST with the current value), so it is a harmless no-op on correctly generated dumps --- (sierra-leone, hmis) and never moves a sequence backwards (which would reintroduce collisions). --- --- This is a stopgap applied at DB-image build time. The proper fix is to regenerate the --- platform-perf dump with the sequence set correctly; remove this script once that lands. -SELECT setval( - 'hibernate_sequence', - GREATEST(100000000::bigint, (SELECT last_value FROM hibernate_sequence)), - true);