From e5712d1d02870ca56ba2f6c80205cb00feabec24 Mon Sep 17 00:00:00 2001 From: Corentin Kerisit Date: Tue, 21 Apr 2026 10:44:40 +0200 Subject: [PATCH 1/5] Add CI --- .github/workflows/_build.yaml | 57 +++++++++ .github/workflows/ci.yaml | 12 ++ .github/workflows/publish.yml | 218 --------------------------------- .github/workflows/release.yaml | 34 +++++ 4 files changed, 103 insertions(+), 218 deletions(-) create mode 100644 .github/workflows/_build.yaml create mode 100644 .github/workflows/ci.yaml delete mode 100644 .github/workflows/publish.yml create mode 100644 .github/workflows/release.yaml diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml new file mode 100644 index 00000000000..875d899340d --- /dev/null +++ b/.github/workflows/_build.yaml @@ -0,0 +1,57 @@ +name: Build flash-attention artifacts + +on: + workflow_call: + +jobs: + flash-attention-artifacts: + runs-on: ${{ matrix.build.runs_on }} + strategy: + fail-fast: false + matrix: + build: + - platform: linux-amd64 + cuda_platform: linux-x86_64 + runs_on: + ["runs-on", "cpu=128", "family=m+c", "image=ubuntu22-amd64"] + - platform: linux-arm64 + cuda_platform: linux-sbsa + runs_on: + ["runs-on", "cpu=96", "family=m+c", "image=ubuntu22-full-arm64"] + + steps: + - uses: runs-on/action@v1 + + - name: Check out repository + uses: actions/checkout@v4 + + - uses: bazel-contrib/setup-bazel@0.15.0 + with: + bazelisk-version: 1.26.0 + bazelisk-cache: false + bazelrc: | + common --verbose_failures + common --color=yes + common --show_timestamps + + - name: Build shared library + run: | + bazel build \ + -c opt \ + //:flashattn_so \ + --@rules_cuda//cuda:exec_platform=${{ matrix.build.cuda_platform }} \ + --@rules_cuda//cuda:target_platform=${{ matrix.build.cuda_platform }} + + - name: Package ${{ matrix.build.platform }} artifact + run: | + artifact_dir="flash-attention_${{ matrix.build.platform }}" + mkdir -p "${artifact_dir}/lib" "${artifact_dir}/include" + cp bazel-bin/libflashattn.so "${artifact_dir}/lib/libflashattn.so" + cp capi/capi.h "${artifact_dir}/include/capi.h" + tar -czf "${artifact_dir}.tar.gz" -C "${artifact_dir}" . + + - name: Upload ${{ matrix.build.platform }} artifact + uses: actions/upload-artifact@v4 + with: + name: flash-attention_${{ matrix.build.platform }}.tar.gz + path: flash-attention_${{ matrix.build.platform }}.tar.gz diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000000..2c1c47d2504 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,12 @@ +name: CI + +on: + pull_request: + +concurrency: + group: ${{ github.workflow_sha }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + flash-attention-artifacts: + uses: ./.github/workflows/_build.yaml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 6f227d1abe1..00000000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,218 +0,0 @@ -# This workflow will: -# - Create a new Github release -# - Build wheels for supported architectures -# - Deploy the wheels to the Github release -# - Release the static code to PyPi -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -name: Build wheels and deploy - -on: - create: - tags: - - v* - -jobs: - - setup_release: - name: Create Release - runs-on: ubuntu-latest - steps: - - name: Get the tag version - id: extract_branch - run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} - shell: bash - - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ steps.extract_branch.outputs.branch }} - release_name: ${{ steps.extract_branch.outputs.branch }} - - build_wheels: - name: Build Wheel - needs: setup_release - runs-on: ${{ matrix.os }} - - strategy: - fail-fast: false - matrix: - # Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the - # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. - os: [ubuntu-20.04] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] - torch-version: ['2.2.2', '2.3.1', '2.4.0', '2.5.1', '2.6.0'] - cuda-version: ['12.4.1'] - # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. - # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI. - # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) - # when building without C++11 ABI and using it on nvcr images. - cxx11_abi: ['FALSE', 'TRUE'] - exclude: - # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix - # Pytorch < 2.5 does not support Python 3.13 - - torch-version: '2.2.2' - python-version: '3.13' - - torch-version: '2.3.1' - python-version: '3.13' - - torch-version: '2.4.0' - python-version: '3.13' - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Set CUDA and PyTorch versions - run: | - echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV - echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV - echo "WHEEL_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1'})" >> $GITHUB_ENV - echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV - - - name: Free up disk space - if: ${{ runner.os == 'Linux' }} - # https://github.com/easimon/maximize-build-space/blob/master/action.yml - # https://github.com/easimon/maximize-build-space/tree/test-report - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /opt/hostedtoolcache/CodeQL - - - name: Set up swap space - if: runner.os == 'Linux' - uses: pierotofy/set-swap-space@v1.0 - with: - swap-size-gb: 10 - - - name: Install CUDA ${{ matrix.cuda-version }} - if: ${{ matrix.cuda-version != 'cpu' }} - uses: Jimver/cuda-toolkit@v0.2.19 - id: cuda-toolkit - with: - cuda: ${{ matrix.cuda-version }} - linux-local-args: '["--toolkit"]' - # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 - # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} - method: 'network' - sub-packages: '["nvcc"]' - - - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} - run: | - pip install --upgrade pip - # For some reason torch 2.2.0 on python 3.12 errors saying no setuptools - pip install setuptools==75.8.0 - # With python 3.13 and torch 2.5.1, unless we update typing-extensions, we get error - # AttributeError: attribute '__default__' of 'typing.ParamSpec' objects is not writable - pip install typing-extensions==4.12.2 - # We want to figure out the CUDA version to download pytorch - # e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116 - # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix - # This code is ugly, maybe there's a better way to do this. - export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \ - minv = {'2.2': 118, '2.3': 118, '2.4': 118, '2.5': 118, '2.6': 118}[env['MATRIX_TORCH_VERSION']]; \ - maxv = {'2.2': 121, '2.3': 121, '2.4': 124, '2.5': 124, '2.6': 124}[env['MATRIX_TORCH_VERSION']]; \ - print(minv if int(env['MATRIX_CUDA_VERSION']) < 120 else maxv)" \ - ) - if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then - # pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} - # Can't use --no-deps because we need cudnn etc. - # Hard-coding this version of pytorch-triton for torch 2.6.0.dev20241001 - pip install jinja2 - pip install https://download.pytorch.org/whl/nightly/pytorch_triton-3.1.0%2Bcf34004b8a-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl - pip install --no-cache-dir --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ matrix.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl - else - pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} - fi - nvcc --version - python --version - python -c "import torch; print('PyTorch:', torch.__version__)" - python -c "import torch; print('CUDA:', torch.version.cuda)" - python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" - shell: - bash - - - name: Build wheel - run: | - # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6 - # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810 - # However this still fails so I'm using a newer version of setuptools - pip install setuptools==75.8.0 - pip install ninja packaging wheel - export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH - export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH - # Limit MAX_JOBS otherwise the github runner goes OOM - # nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM - MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "123" ] && echo 1 || echo 2) FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist - tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }} - wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") - ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} - echo "wheel_name=${wheel_name}" >> $GITHUB_ENV - - - name: Log Built Wheels - run: | - ls dist - - - name: Get the tag version - id: extract_branch - run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} - - - name: Get Release with tag - id: get_current_release - uses: joutvhu/get-release@v1 - with: - tag_name: ${{ steps.extract_branch.outputs.branch }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Upload Release Asset - id: upload_release_asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.get_current_release.outputs.upload_url }} - asset_path: ./dist/${{env.wheel_name}} - asset_name: ${{env.wheel_name}} - asset_content_type: application/* - - publish_package: - name: Publish package - needs: [build_wheels] - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - pip install ninja packaging wheel twine - # Install latest setuptools with support for pypi metadata 2.2 (improved compat w/ uv) - pip install setuptools==75.8.0 - # We don't want to download anything CUDA-related here - pip install torch --index-url https://download.pytorch.org/whl/cpu - - - name: Build core package - env: - FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" - run: | - python setup.py sdist --dist-dir=dist - - - name: Deploy - env: - TWINE_USERNAME: "__token__" - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: | - python -m twine upload dist/* diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 00000000000..81bec50585e --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,34 @@ +name: Release + +on: + push: + tags: + - "*" + - "!nightly-*" + +concurrency: + group: ${{ github.workflow_sha }}-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + flash-attention-artifacts: + uses: ./.github/workflows/_build.yaml + + release: + name: Release flash-attention artifacts + needs: ["flash-attention-artifacts"] + runs-on: ubuntu-latest + steps: + - run: rm -rf flash-attention_*.tar.gz + + - name: Download all artifacts + uses: actions/download-artifact@v4 + + - name: Publish release assets + uses: softprops/action-gh-release@v2 + with: + files: | + *.tar.gz/*.tar.gz From c866bfc934e0a43f783e2437121a80965044460e Mon Sep 17 00:00:00 2001 From: Corentin Kerisit Date: Tue, 21 Apr 2026 10:47:21 +0200 Subject: [PATCH 2/5] Add runson config --- .github/runs-on.yml | 25 +++++++++++++++++++++++++ .github/workflows/_build.yaml | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 .github/runs-on.yml diff --git a/.github/runs-on.yml b/.github/runs-on.yml new file mode 100644 index 00000000000..a130d3f3e92 --- /dev/null +++ b/.github/runs-on.yml @@ -0,0 +1,25 @@ +images: + ubuntu24-amd64: + platform: "linux" + arch: "amd64" + ami: "ami-04a92520784b93e73" + disk: large + preinstall: | + #!/bin/bash + apt update && apt install -y curl build-essential + ubuntu22-amd64: + platform: "linux" + arch: "amd64" + ami: "ami-04a4acda26ca36de0" + disk: large + preinstall: | + #!/bin/bash + apt update && apt install -y curl build-essential + ubuntu22-arm64: + platform: "linux" + arch: "arm64" + ami: "ami-080565b2a8d78af19" + disk: large + preinstall: | + #!/bin/bash + apt update && apt install -y curl build-essential diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index 875d899340d..2a3a76b644a 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -17,7 +17,7 @@ jobs: - platform: linux-arm64 cuda_platform: linux-sbsa runs_on: - ["runs-on", "cpu=96", "family=m+c", "image=ubuntu22-full-arm64"] + ["runs-on", "cpu=96", "family=m+c", "image=ubuntu22-arm64"] steps: - uses: runs-on/action@v1 From 4b29a0184b9102065c1485fe0837a171465c5cac Mon Sep 17 00:00:00 2001 From: Corentin Kerisit Date: Tue, 21 Apr 2026 10:55:50 +0200 Subject: [PATCH 3/5] test --- .github/workflows/_build.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index 2a3a76b644a..e66f64faa3b 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -13,11 +13,11 @@ jobs: - platform: linux-amd64 cuda_platform: linux-x86_64 runs_on: - ["runs-on", "cpu=128", "family=m+c", "image=ubuntu22-amd64"] + ["runs-on", "cpu=128", "family=m+c", "image=ubuntu22-full-amd64"] - platform: linux-arm64 cuda_platform: linux-sbsa runs_on: - ["runs-on", "cpu=96", "family=m+c", "image=ubuntu22-arm64"] + ["runs-on", "cpu=96", "family=m+c", "image=ubuntu22-full-arm64"] steps: - uses: runs-on/action@v1 From e1197ce06af1359bd9fd3d17556cd6752efb08ae Mon Sep 17 00:00:00 2001 From: Corentin Kerisit Date: Tue, 21 Apr 2026 10:58:45 +0200 Subject: [PATCH 4/5] fix --- .github/workflows/_build.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index e66f64faa3b..b752b68311c 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -24,6 +24,8 @@ jobs: - name: Check out repository uses: actions/checkout@v4 + with: + submodules: recursive - uses: bazel-contrib/setup-bazel@0.15.0 with: From 04aacf4b70f3532fe40a716c6d8e719b656a1a7a Mon Sep 17 00:00:00 2001 From: Corentin Kerisit Date: Tue, 21 Apr 2026 11:01:27 +0200 Subject: [PATCH 5/5] ful --- .github/workflows/_build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_build.yaml b/.github/workflows/_build.yaml index b752b68311c..273617509e5 100644 --- a/.github/workflows/_build.yaml +++ b/.github/workflows/_build.yaml @@ -13,7 +13,7 @@ jobs: - platform: linux-amd64 cuda_platform: linux-x86_64 runs_on: - ["runs-on", "cpu=128", "family=m+c", "image=ubuntu22-full-amd64"] + ["runs-on", "cpu=128", "family=m+c", "image=ubuntu22-full-x64"] - platform: linux-arm64 cuda_platform: linux-sbsa runs_on: