Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
850 changes: 340 additions & 510 deletions .github/workflows/docker.yml

Large diffs are not rendered by default.

100 changes: 89 additions & 11 deletions .github/workflows/package-cleanup.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,45 @@
name: PackageCleanup

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
inputs:
buildcache_cutoff:
description: 'Delete buildcache entries older than this (ISO date, e.g. 2026-05-18). Leave empty to skip buildcache cleanup.'
required: false
default: ''
dry_run:
description: 'Set to true to only list what would be deleted without actually deleting'
required: false
default: 'true'

jobs:
cleanup-packages:
cleanup-untagged:
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
-
name: Remove untagged versions of dockerspackstackslurmcluster/frontend
name: Remove untagged versions of dockerspackstackslurmcluster/slurm-spack-stack-frontend
uses: actions/delete-package-versions@v5
with:
package-name: 'dockerspackstackslurmcluster/frontend'
package-name: 'dockerspackstackslurmcluster/slurm-spack-stack-frontend'
package-type: 'container'
min-versions-to-keep: 0
delete-only-untagged-versions: 'true'
-
name: Remove untagged versions of dockerspackstackslurmcluster/master
name: Remove untagged versions of dockerspackstackslurmcluster/slurm-spack-stack-master
uses: actions/delete-package-versions@v5
with:
package-name: 'dockerspackstackslurmcluster/master'
package-name: 'dockerspackstackslurmcluster/slurm-spack-stack-master'
package-type: 'container'
min-versions-to-keep: 0
delete-only-untagged-versions: 'true'
-
name: Remove untagged versions of dockerspackstackslurmcluster/node
name: Remove untagged versions of dockerspackstackslurmcluster/slurm-spack-stack-node
uses: actions/delete-package-versions@v5
with:
package-name: 'dockerspackstackslurmcluster/node'
package-name: 'dockerspackstackslurmcluster/slurm-spack-stack-node'
package-type: 'container'
min-versions-to-keep: 0
delete-only-untagged-versions: 'true'
Expand Down Expand Up @@ -86,3 +91,76 @@ jobs:
package-type: 'container'
min-versions-to-keep: 0
delete-only-untagged-versions: 'true'

cleanup-stale-buildcache:
if: ${{ github.event.inputs.buildcache_cutoff != '' }}
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
steps:
-
name: Clean stale buildcache entries
uses: actions/github-script@v7
with:
script: |
const cutoff = new Date('${{ github.event.inputs.buildcache_cutoff }}');
const dryRun = '${{ github.event.inputs.dry_run }}' === 'true';
const org = 'noaa-gsl';
const packageName = 'dockerspackstackslurmcluster/buildcache';

console.log(`Cutoff date: ${cutoff.toISOString()}`);
console.log(`Dry run: ${dryRun}`);

let deleted = 0;
let kept = 0;
let page = 1;
const perPage = 100;

while (true) {
const versions = await github.rest.packages.getAllPackageVersionsForPackageOwnedByOrg({
package_type: 'container',
package_name: packageName,
org: org,
per_page: perPage,
page: page,
});

if (versions.data.length === 0) break;

for (const version of versions.data) {
const createdAt = new Date(version.created_at);
const tags = version.metadata?.container?.tags || [];

// Never delete index entries - they are updated in place by spack buildcache update-index
const isIndex = tags.some(t => t.includes('index') || t.startsWith('_'));
if (isIndex) {
console.log(`Preserving index: ${version.id} (tags: ${tags.join(', ')})`);
kept++;
continue;
}

if (createdAt < cutoff) {
if (dryRun) {
console.log(`[DRY RUN] Would delete: ${version.id} (created ${createdAt.toISOString()}, tags: ${tags.join(', ')})`);
} else {
console.log(`Deleting: ${version.id} (created ${createdAt.toISOString()}, tags: ${tags.join(', ')})`);
await github.rest.packages.deletePackageVersionForOrg({
package_type: 'container',
package_name: packageName,
org: org,
package_version_id: version.id,
});
}
deleted++;
} else {
console.log(`Keeping: ${version.id} (created ${createdAt.toISOString()}, tags: ${tags.join(', ')})`);
kept++;
}
}

if (versions.data.length < perPage) break;
page++;
}

console.log(`\nSummary: ${deleted} ${dryRun ? 'would be ' : ''}deleted, ${kept} kept`);
59 changes: 57 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,62 @@ sizes. The cluster behaves as if it were running on multiple
nodes even if the containers are all running on the same host
machine.

# Building the Containers

To build the containers from source:

## Master and Node Containers

```bash
docker build -t ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-master:latest -f master/Dockerfile master/
docker build -t ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest -f node/Dockerfile node/
```

## Frontend Container

The frontend container requires a GitHub personal access token (PAT) with package write permissions to push built packages to the GitHub Container Registry build cache. Set your token in an environment variable and pass it as a secret during build:

```bash
export GITHUB_TOKEN=your_github_pat_here
docker build --progress=plain \
--secret id=github_token,env=GITHUB_TOKEN \
-t ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-frontend:latest \
-f frontend/Dockerfile \
frontend/
```

**Note:** The `--progress=plain` flag shows full build output. The frontend build compiles 355+ scientific software packages from source and can take several hours on first build. Subsequent builds use the cached packages from GHCR.

### Configuring Parallel Build Jobs

The frontend Dockerfile uses the `SPACK_BUILD_JOBS` build argument to control the number of parallel make jobs (`-j` flag) used when building each package (default: 8). This should match the number of CPU cores available:

**For 8-core systems (default):**
```bash
docker build --build-arg SPACK_BUILD_JOBS=8 ...
```

**For 16-core systems:**
```bash
docker build --build-arg SPACK_BUILD_JOBS=16 ...
```

**With Docker Compose:**
```bash
docker compose build --build-arg SPACK_BUILD_JOBS=16
```

You can also modify the default in `docker-compose.yml`:
```yaml
services:
slurmfrontend:
build:
args:
SPACK_BUILD_JOBS: 16 # Change from default 8
```

**Performance note:** Higher values speed up compilation of individual packages, especially large ones like ESMF, JEDI components, and NetCDF. However, on 32GB RAM systems, values above 8 may cause memory pressure during compilation of memory-intensive Fortran packages, potentially leading to swapping or OOM errors.

# Quick Start

To start the slurm cluster environment:
Expand Down Expand Up @@ -69,10 +125,9 @@ docker exec -it spack-stack-frontend bash -l
Next, load the spack-stack base environment:

```
module use /opt/spack-stack/envs/unified-env/install/modulefiles/Core
module use /opt/spack-stack/envs/unified-env/modules/Core
module load stack-gcc
module load stack-openmpi
module load stack-python
```

Once the basic spack-stack modules are loaded, you can choose from multiple spack-stack environments for different purposes.
Expand Down
22 changes: 15 additions & 7 deletions docker-compose-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ services:
build:
context: ./frontend
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/frontend:latest
args:
SPACK_BUILD_JOBS: 8
secrets:
- github_token
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-frontend:latest
container_name: spack-stack-frontend
hostname: slurmfrontend
user: admin
Expand All @@ -17,7 +21,7 @@ services:
build:
context: ./master
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/master:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-master:latest
container_name: spack-stack-master
hostname: slurmmaster
user: admin
Expand All @@ -35,7 +39,7 @@ services:
build:
context: ./node
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node1
hostname: slurmnode1
user: admin
Expand All @@ -49,7 +53,7 @@ services:
links:
- slurmmaster
slurmnode2:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node2
hostname: slurmnode2
user: admin
Expand All @@ -63,7 +67,7 @@ services:
links:
- slurmmaster
slurmnode3:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node3
hostname: slurmnode3
user: admin
Expand All @@ -77,7 +81,7 @@ services:
links:
- slurmmaster
slurmnode4:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node4
hostname: slurmnode4
user: admin
Expand All @@ -91,7 +95,7 @@ services:
links:
- slurmmaster
slurmnode5:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node5
hostname: slurmnode5
user: admin
Expand All @@ -107,3 +111,7 @@ services:
volumes:
home-vol:
opt-vol:

secrets:
github_token:
environment: GITHUB_TOKEN
22 changes: 15 additions & 7 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ services:
build:
context: ./frontend
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/frontend:latest
args:
SPACK_BUILD_JOBS: 8
secrets:
- github_token
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-frontend:latest
container_name: spack-stack-frontend
hostname: slurmfrontend
user: admin
Expand All @@ -16,7 +20,7 @@ services:
build:
context: ./master
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/master:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-master:latest
container_name: spack-stack-master
hostname: slurmmaster
user: admin
Expand All @@ -33,7 +37,7 @@ services:
build:
context: ./node
dockerfile: ./Dockerfile
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node1
hostname: slurmnode1
user: admin
Expand All @@ -46,7 +50,7 @@ services:
links:
- slurmmaster
slurmnode2:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node2
hostname: slurmnode2
user: admin
Expand All @@ -59,7 +63,7 @@ services:
links:
- slurmmaster
slurmnode3:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node3
hostname: slurmnode3
user: admin
Expand All @@ -72,7 +76,7 @@ services:
links:
- slurmmaster
slurmnode4:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node4
hostname: slurmnode4
user: admin
Expand All @@ -85,7 +89,7 @@ services:
links:
- slurmmaster
slurmnode5:
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/node:latest
image: ghcr.io/noaa-gsl/dockerspackstackslurmcluster/slurm-spack-stack-node:latest
container_name: spack-stack-node5
hostname: slurmnode5
user: admin
Expand All @@ -100,3 +104,7 @@ services:
volumes:
home-vol:
opt-vol:

secrets:
github_token:
environment: GITHUB_TOKEN
Loading
Loading