From ccfc0c446a8a6baafb51d0805f8cdd718d40cd1e Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 9 Dec 2025 10:09:49 -0500 Subject: [PATCH 1/4] GitLab CI jvm memory configuration refactored to percentage --- .gitlab-ci.yml | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 81f123610c4..1349d5adc16 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -134,15 +134,13 @@ default: stage: build variables: MAVEN_OPTS: "-Xms256M -Xmx1024M" - GRADLE_WORKERS: 2 - GRADLE_MEM: 3G + GRADLE_MEM: "70.0" KUBERNETES_CPU_REQUEST: 8 KUBERNETES_MEMORY_REQUEST: 10Gi KUBERNETES_MEMORY_LIMIT: 10Gi CACHE_TYPE: "lib" #default FF_USE_FASTZIP: "true" CACHE_COMPRESSION_LEVEL: "slowest" - RUNTIME_AVAILABLE_PROCESSORS_OVERRIDE: 4 # Runtime.getRuntime().availableProcessors() returns incorrect or very high values in Kubernetes GIT_SUBMODULE_STRATEGY: normal GIT_SUBMODULE_DEPTH: 1 @@ -184,8 +182,8 @@ default: # replace maven central part by MAVEN_REPOSITORY_PROXY in .mvn/wrapper/maven-wrapper.properties - sed -i "s|https://repo.maven.apache.org/maven2/|$MAVEN_REPOSITORY_PROXY|g" .mvn/wrapper/maven-wrapper.properties - mkdir -p .mvn/caches - - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-Xms$GRADLE_MEM -Xmx$GRADLE_MEM -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" - - export GRADLE_ARGS=" --build-cache --stacktrace --no-daemon --parallel --max-workers=$GRADLE_WORKERS" + - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-XX:MaxRAMPercentage=$GRADLE_MEM -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" + - export GRADLE_ARGS=" --build-cache --stacktrace --no-daemon --parallel" - *normalize_node_index # for weird reasons, gradle will always "chmod 700" the .gradle folder # with Gitlab caching, .gradle is always owned by root and thus gradle's chmod invocation fails @@ -281,8 +279,7 @@ build_tests: variables: BUILD_CACHE_POLICY: push DEPENDENCY_CACHE_POLICY: pull - GRADLE_MEM: 4G - GRADLE_WORKERS: 3 + GRADLE_MEM: "80.0" KUBERNETES_MEMORY_REQUEST: 18Gi KUBERNETES_MEMORY_LIMIT: 18Gi parallel: @@ -365,12 +362,11 @@ spotless: stage: tests needs: [] variables: - # TODO: Latest version of spotless is failing with OOM on CI only. - # Setting 8G memory solving this issue, but we need to solve it eventually. - GRADLE_MEM: 8G + GRADLE_MEM: "80.0" KUBERNETES_MEMORY_REQUEST: 18Gi KUBERNETES_MEMORY_LIMIT: 18Gi script: + - echo "GRADLE_OPTS_DBG3: ${GRADLE_OPTS}" - ./gradlew --version - ./gradlew spotlessCheck $GRADLE_ARGS @@ -394,10 +390,10 @@ test_published_artifacts: - rm -rf "${mvn_local_repo}/com/datadoghq" - export GPG_PRIVATE_KEY=$(aws ssm get-parameter --region us-east-1 --name ci.dd-trace-java.signing.gpg_private_key --with-decryption --query "Parameter.Value" --out text) - export GPG_PASSWORD=$(aws ssm get-parameter --region us-east-1 --name ci.dd-trace-java.signing.gpg_passphrase --with-decryption --query "Parameter.Value" --out text) - - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-Xms2G -Xmx2G -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" + - echo "GRADLE_OPTS_DBG1: ${GRADLE_OPTS}" - ./gradlew publishToMavenLocal $GRADLE_ARGS - cd test-published-dependencies - - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-Xms1G -Xmx1G -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" + - echo "GRADLE_OPTS_DBG2: ${GRADLE_OPTS}" - ./gradlew --version - ./gradlew check --info $GRADLE_ARGS after_script: @@ -545,8 +541,6 @@ muzzle-dep-report: KUBERNETES_MEMORY_REQUEST: 17Gi KUBERNETES_MEMORY_LIMIT: 17Gi KUBERNETES_CPU_REQUEST: 10 - GRADLE_WORKERS: 4 - GRADLE_MEM: 3G GRADLE_PARAMS: "-PskipFlakyTests" CONTINUE_ON_FAILURE: "false" TESTCONTAINERS_CHECKS_DISABLE: "true" @@ -568,7 +562,8 @@ muzzle-dep-report: export PROFILER_COMMAND="-XX:StartFlightRecording=settings=profile,filename=/tmp/${CI_JOB_NAME_SLUG}.jfr,dumponexit=true"; fi - *prepare_test_env - - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-Xms$GRADLE_MEM -Xmx$GRADLE_MEM $PROFILER_COMMAND -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp' -Ddatadog.forkedMaxHeapSize=1024M -Ddatadog.forkedMinHeapSize=128M" + - export GRADLE_OPTS="-Ddatadog.forkedMinHeapSize=128M -Ddatadog.forkedMaxHeapSize=1024M -Dorg.gradle.jvmargs='-XX:MaxRAMPercentage=$GRADLE_MEM $PROFILER_COMMAND -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" + - echo "GRADLE_OPTS_DBG4: ${GRADLE_OPTS}" - ./gradlew --version - ./gradlew $GRADLE_TARGET $GRADLE_PARAMS -PtestJvm=$testJvm -PtaskPartitionCount=$NORMALIZED_NODE_TOTAL -PtaskPartition=$NORMALIZED_NODE_INDEX $GRADLE_ARGS --continue || $CONTINUE_ON_FAILURE after_script: From f5b98a0b0d8bf735ab8cdcc5f977a4b83883e1e2 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 9 Dec 2025 10:36:50 -0500 Subject: [PATCH 2/4] Trigger CI --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1349d5adc16..9341d00ae52 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -141,6 +141,7 @@ default: CACHE_TYPE: "lib" #default FF_USE_FASTZIP: "true" CACHE_COMPRESSION_LEVEL: "slowest" + RUNTIME_AVAILABLE_PROCESSORS_OVERRIDE: 4 # Runtime.getRuntime().availableProcessors() returns incorrect or very high values in Kubernetes GIT_SUBMODULE_STRATEGY: normal GIT_SUBMODULE_DEPTH: 1 From 57e7c0af36271b5e6bf6c3f96716ba2db66ca75a Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 9 Dec 2025 14:15:38 -0500 Subject: [PATCH 3/4] Split jobs to more partitions to relax contention on prefs. --- .gitlab-ci.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9341d00ae52..4a20cf0f985 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -36,7 +36,7 @@ variables: value: "false" NON_DEFAULT_JVMS: description: "Enable tests on JVMs that are not the default" - value: "false" + value: "true" RUN_FLAKY_TESTS: description: "Enable flaky tests" value: "false" @@ -652,7 +652,7 @@ test_inst: GRADLE_TARGET: ":instrumentationTest" CACHE_TYPE: "inst" parallel: - matrix: *test_matrix_6 + matrix: *test_matrix_8 test_inst_latest: extends: .test_job_with_test_agent @@ -664,7 +664,7 @@ test_inst_latest: - testJvm: ["8", "17", "21", "25"] # the latest "stable" version is LTS v25 # Gitlab doesn't support "parallel" and "parallel:matrix" at the same time # This emulates "parallel" by including it in the matrix - CI_SPLIT: [ "1/6", "2/6", "3/6", "4/6", "5/6", "6/6"] + CI_SPLIT: [ "1/8", "2/8", "3/8", "4/8", "5/8", "6/8", "7/8", "8/8"] test_flaky: extends: .test_job_with_test_agent @@ -724,7 +724,7 @@ test_smoke: GRADLE_PARAMS: "-PskipFlakyTests" CACHE_TYPE: "smoke" parallel: - matrix: *test_matrix_4 + matrix: *test_matrix_8 test_ssi_smoke: extends: .test_job @@ -735,7 +735,7 @@ test_ssi_smoke: DD_INJECT_FORCE: "true" DD_INJECTION_ENABLED: "tracer" parallel: - matrix: *test_matrix_4 + matrix: *test_matrix_8 test_smoke_graalvm: extends: .test_job From c96c25235bd216f73f950b25274f7c24a23d10e9 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Fri, 12 Dec 2025 13:59:47 -0500 Subject: [PATCH 4/4] Some tweaks --- .gitlab-ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3218754cd77..9966b5bba3c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -36,7 +36,7 @@ variables: value: "false" NON_DEFAULT_JVMS: description: "Enable tests on JVMs that are not the default" - value: "true" + value: "false" RUN_FLAKY_TESTS: description: "Enable flaky tests" value: "false" @@ -134,7 +134,7 @@ default: stage: build variables: MAVEN_OPTS: "-Xms256M -Xmx1024M" - GRADLE_MEM: "70.0" + GRADLE_MEM_PERCENTAGE: "70.0" KUBERNETES_CPU_REQUEST: 8 KUBERNETES_MEMORY_REQUEST: 10Gi KUBERNETES_MEMORY_LIMIT: 10Gi @@ -183,7 +183,7 @@ default: # replace maven central part by MAVEN_REPOSITORY_PROXY in .mvn/wrapper/maven-wrapper.properties - sed -i "s|https://repo.maven.apache.org/maven2/|$MAVEN_REPOSITORY_PROXY|g" .mvn/wrapper/maven-wrapper.properties - mkdir -p .mvn/caches - - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-XX:MaxRAMPercentage=$GRADLE_MEM -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" + - export GRADLE_OPTS="-Dorg.gradle.jvmargs='-XX:MaxRAMPercentage=$GRADLE_MEM_PERCENTAGE -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" - export GRADLE_ARGS=" --build-cache --stacktrace --no-daemon --parallel" - *normalize_node_index # for weird reasons, gradle will always "chmod 700" the .gradle folder @@ -280,7 +280,7 @@ build_tests: variables: BUILD_CACHE_POLICY: push DEPENDENCY_CACHE_POLICY: pull - GRADLE_MEM: "80.0" + GRADLE_MEM_PERCENTAGE: "80.0" KUBERNETES_MEMORY_REQUEST: 18Gi KUBERNETES_MEMORY_LIMIT: 18Gi parallel: @@ -363,7 +363,7 @@ spotless: stage: tests needs: [] variables: - GRADLE_MEM: "80.0" + GRADLE_MEM_PERCENTAGE: "80.0" KUBERNETES_MEMORY_REQUEST: 18Gi KUBERNETES_MEMORY_LIMIT: 18Gi script: @@ -563,7 +563,7 @@ muzzle-dep-report: export PROFILER_COMMAND="-XX:StartFlightRecording=settings=profile,filename=/tmp/${CI_JOB_NAME_SLUG}.jfr,dumponexit=true"; fi - *prepare_test_env - - export GRADLE_OPTS="-Ddatadog.forkedMinHeapSize=128M -Ddatadog.forkedMaxHeapSize=1024M -Dorg.gradle.jvmargs='-XX:MaxRAMPercentage=$GRADLE_MEM $PROFILER_COMMAND -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" + - export GRADLE_OPTS="-Ddatadog.forkedMinHeapSize=128M -Ddatadog.forkedMaxHeapSize=1024M -Dorg.gradle.jvmargs='-XX:MaxRAMPercentage=$GRADLE_MEM_PERCENTAGE $PROFILER_COMMAND -XX:ErrorFile=/tmp/hs_err_pid%p.log -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'" - echo "GRADLE_OPTS_DBG4: ${GRADLE_OPTS}" - ./gradlew --version - ./gradlew $GRADLE_TARGET $GRADLE_PARAMS -PtestJvm=$testJvm -PtaskPartitionCount=$NORMALIZED_NODE_TOTAL -PtaskPartition=$NORMALIZED_NODE_INDEX $GRADLE_ARGS --continue || $CONTINUE_ON_FAILURE @@ -652,7 +652,7 @@ test_inst: GRADLE_TARGET: ":instrumentationTest" CACHE_TYPE: "inst" parallel: - matrix: *test_matrix_8 + matrix: *test_matrix_6 test_inst_latest: extends: .test_job_with_test_agent @@ -664,7 +664,7 @@ test_inst_latest: - testJvm: ["8", "17", "21", "25"] # the latest "stable" version is LTS v25 # Gitlab doesn't support "parallel" and "parallel:matrix" at the same time # This emulates "parallel" by including it in the matrix - CI_SPLIT: [ "1/8", "2/8", "3/8", "4/8", "5/8", "6/8", "7/8", "8/8"] + CI_SPLIT: [ "1/6", "2/6", "3/6", "4/6", "5/6", "6/6"] test_flaky: extends: .test_job_with_test_agent