diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 000000000..e818a920d --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,132 @@ +# ------------------------------------------------------------------------------ +# Copyright Matt Borland 2026. +# Distributed under the Boost Software License, +# Version 1.0. (See accompanying file LICENSE_1_0.txt +# or copy at http://www.boost.org/LICENSE_1_0.txt) +# ------------------------------------------------------------------------------ +# +# Runs the Boost.Decimal benchmarks (test/benchmarks.cpp) in release mode on a +# spread of native runners so that performance numbers, and any regressions in +# them, are captured on every run. The benchmark target is a run-fail test that +# always returns non-zero, so b2 reports success and the timing output is read +# back from the captured .output file. + +name: Run Benchmarks + +on: + push: + branches: + - master + - develop + - feature/** + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + +concurrency: + group: ${{ format('{0}:{1}:benchmarks', github.repository, github.ref) }} + cancel-in-progress: true + +env: + GIT_FETCH_JOBS: 8 + +jobs: + linux: + name: Linux GCC ${{ matrix.address_model }}-bit + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + address_model: [ 32, 64 ] + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v6 + - name: Install packages + run: | + sudo apt-get update + sudo apt-get install -y g++-14 g++-14-multilib + - name: Setup Boost + run: | + LIBRARY=${GITHUB_REPOSITORY#*/} + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + BOOST_BRANCH=develop && [ "$REF" = "master" ] && BOOST_BRANCH=master || true + echo "BOOST_BRANCH: $BOOST_BRANCH" + cd .. + git clone -b "$BOOST_BRANCH" --depth 1 "https://github.com/boostorg/boost.git" boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r "$GITHUB_WORKSPACE"/* libs/$LIBRARY + git submodule update --init tools/boostdep + python tools/boostdep/depinst/depinst.py --git_args "--jobs $GIT_FETCH_JOBS" $LIBRARY + ./bootstrap.sh + ./b2 headers + echo "using gcc : : g++-14 ;" > ~/user-config.jam + - name: Run benchmarks (release) + run: | + cd ../boost-root + ./b2 -j$(nproc) toolset=gcc cxxstd=20 variant=release address-model=${{ matrix.address_model }} \ + define=BOOST_DECIMAL_RUN_BENCHMARKS libs/$LIBRARY/test//benchmarks + echo "==================== Benchmark results (Linux GCC ${{ matrix.address_model }}-bit) ====================" + find bin.v2 -path '*release*' -name 'benchmarks.output' -exec cat {} + + find bin.v2 -path '*release*' -name 'benchmarks.output' -exec cp {} "$GITHUB_WORKSPACE/benchmarks-linux-gcc-${{ matrix.address_model }}-bit.txt" \; + - name: Upload benchmark output + if: always() + uses: actions/upload-artifact@v6 + with: + name: benchmarks-linux-gcc-${{ matrix.address_model }}-bit + path: benchmarks-linux-gcc-${{ matrix.address_model }}-bit.txt + if-no-files-found: warn + + windows: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - name: Windows x64 MSVC + os: windows-latest + arch: x64 + - name: Windows ARM64 MSVC + os: windows-11-arm + arch: arm64 + defaults: + run: + shell: cmd + steps: + - uses: actions/checkout@v6 + - name: Setup Boost + run: | + for /f %%i in ("%GITHUB_REPOSITORY%") do set LIBRARY=%%~nxi + echo LIBRARY=%LIBRARY%>>%GITHUB_ENV% + if "%GITHUB_BASE_REF%" == "" set GITHUB_BASE_REF=%GITHUB_REF% + set BOOST_BRANCH=develop + for /f %%i in ("%GITHUB_BASE_REF%") do if "%%~nxi" == "master" set BOOST_BRANCH=master + echo BOOST_BRANCH: %BOOST_BRANCH% + cd .. + git clone -b %BOOST_BRANCH% --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + xcopy /s /e /q %GITHUB_WORKSPACE% libs\%LIBRARY%\ + git submodule update --init tools/boostdep + python tools/boostdep/depinst/depinst.py --git_args "--jobs 3" %LIBRARY% + cmd /c bootstrap + b2 -d0 headers + - name: Run benchmarks (release) + run: | + cd ../boost-root + b2 -j3 libs/%LIBRARY%/test//benchmarks toolset=msvc cxxstd=latest address-model=64 variant=release define=BOOST_DECIMAL_RUN_BENCHMARKS embed-manifest-via=linker || exit /b 1 + echo ==================== Benchmark results (%RUNNER_OS% %RUNNER_ARCH%) ==================== + for /f "delims=" %%f in ('dir /s /b bin.v2\benchmarks.output 2^>nul') do @type "%%f" + for /f "delims=" %%f in ('dir /s /b bin.v2\benchmarks.output 2^>nul') do @copy /y "%%f" "%GITHUB_WORKSPACE%\benchmarks-windows-${{ matrix.arch }}.txt" >nul + exit /b 0 + - name: Upload benchmark output + if: always() + uses: actions/upload-artifact@v6 + with: + name: benchmarks-windows-${{ matrix.arch }} + path: benchmarks-windows-${{ matrix.arch }}.txt + if-no-files-found: warn diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8c837340..b9fe14c11 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1125,60 +1125,3 @@ jobs: run: | cd ../boost-root/__build__ ctest --output-on-failure --no-tests=error - - cuda-cmake-test: - strategy: - fail-fast: false - - runs-on: ubuntu-24.04 - - steps: - - uses: Jimver/cuda-toolkit@v0.2.30 - id: cuda-toolkit - with: - cuda: '12.8.0' - method: 'network' - sub-packages: '["nvcc"]' - - - name: Output CUDA information - run: | - echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+ - echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" - nvcc -V - - uses: actions/checkout@v5 - - - name: Install Packages - run: | - sudo apt-get install -y cmake make - - name: Setup Boost - run: | - echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY - LIBRARY=${GITHUB_REPOSITORY#*/} - echo LIBRARY: $LIBRARY - echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV - echo GITHUB_BASE_REF: $GITHUB_BASE_REF - echo GITHUB_REF: $GITHUB_REF - REF=${GITHUB_BASE_REF:-$GITHUB_REF} - REF=${REF#refs/heads/} - echo REF: $REF - BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true - echo BOOST_BRANCH: $BOOST_BRANCH - cd .. - git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root - cd boost-root - mkdir -p libs/$LIBRARY - cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY - git submodule update --init tools/boostdep - python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY - # ctest --output-on-failure --no-tests=error should be added between the last cmake and cd when GPU runners are available - - name: Test C++17/20/23 - run: | - for std in 17 20 23; do - echo "======== Testing C++${std} ========" - cd ../boost-root - rm -rf __build__ - mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_DECIMAL_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=${std} .. - cmake --build . --target tests -j $(nproc) - cd $GITHUB_WORKSPACE - done diff --git a/doc/modules/ROOT/pages/benchmarks.adoc b/doc/modules/ROOT/pages/benchmarks.adoc index 52936c83e..488c2c22a 100644 --- a/doc/modules/ROOT/pages/benchmarks.adoc +++ b/doc/modules/ROOT/pages/benchmarks.adoc @@ -62,7 +62,7 @@ These benchmarks are automatically disabled if your compiler does not provide fe [#x64_linux_benchmarks] == x64 Linux -Run using an Intel i9-11900k chipset running Ubuntu 24.04 and Intel oneAPI compiler 2025.2.0 or GCC 13.3.0. +Run using an Intel i9-11900k chipset running Ubuntu 24.04 and Intel oneAPI compiler 2025.3.3 or GCC 13.4.0. === Comparisons @@ -71,38 +71,38 @@ Intel Compiler: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 72,696 -| 0.505 +| 70,801 +| 0.499 | `double` -| 143,924 +| 141,854 | 1.000 | `decimal32_t` -| 1,485,786 -| 10.323 +| 1,322,218 +| 9.321 | `decimal64_t` -| 1,653,991 -| 11.492 +| 1,599,987 +| 11.279 | `decimal128_t` -| 4,662,704 -| 32.397 +| 3,951,400 +| 27.855 | `decimal_fast32_t` -| 619,662 -| 4.305 +| 537,268 +| 3.787 | `decimal_fast64_t` -| 606,382 -| 4.213 +| 556,455 +| 3.923 | `decimal_fast128_t` -| 698,945 -| 4.856 +| 667,681 +| 4.707 | Intel `BID_UINT32` -| 2,411,294 -| 16.754 +| 3,680,493 +| 25.946 | Intel `BID_UINT64` -| 3,158,422 -| 21.945 +| 5,714,482 +| 40.284 | Intel `BID_UINT128` -| 3,389,883 -| 23.553 +| 5,853,596 +| 41.265 |=== GCC: @@ -110,47 +110,47 @@ GCC: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 56,457 -| 0.916 +| 55,070 +| 0.952 | `double` -| 61,615 +| 57,870 | 1.000 | `decimal32_t` -| 1,404,638 -| 22.797 +| 1,103,116 +| 19.062 | `decimal64_t` -| 1,408,074 -| 22.853 +| 1,090,026 +| 18.836 | `decimal128_t` -| 4,974,170 -| 80.730 +| 3,772,309 +| 65.186 | `decimal_fast32_t` -| 546,836 -| 8.875 +| 465,000 +| 8.035 | `decimal_fast64_t` -| 472,387 -| 7.667 +| 513,587 +| 8.875 | `decimal_fast128_t` -| 480,853 -| 7.804 +| 444,145 +| 7.675 | GCC `_Decimal32` -| 816,703 -| 13.255 +| 831,820 +| 14.374 | GCC `_Decimal64` -| 501,479 -| 8.139 +| 475,605 +| 8.219 | GCC `_Decimal128` -| 914,600 -| 14.844 +| 872,026 +| 15.069 | Intel `BID_UINT32` -| 3,718,385 -| 60.348 +| 3,658,360 +| 63.217 | Intel `BID_UINT64` -| 5,721,887 -| 92.865 +| 5,452,325 +| 94.217 | Intel `BID_UINT128` -| 7,090,648 -| 115.080 +| 5,705,342 +| 98.589 |=== === Addition @@ -160,38 +160,38 @@ Intel Compiler: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 118,040 -| 1.303 +| 76,312 +| 1.105 | `double` -| 90,579 +| 69,075 | 1.000 | `decimal32_t` -| 1,712,396 -| 18.905 +| 1,238,019 +| 17.923 | `decimal64_t` -| 1,575,893 -| 17.398 +| 1,087,951 +| 15.750 | `decimal128_t` -| 3,181,562 -| 35.125 +| 2,381,956 +| 34.484 | `decimal_fast32_t` -| 729,257 -| 8.051 +| 944,208 +| 13.669 | `decimal_fast64_t` -| 1,083,923 -| 11.967 +| 754,414 +| 10.922 | `decimal_fast128_t` -| 1,367,004 -| 15.092 +| 767,338 +| 11.109 | Intel `BID_UINT32` -| 1,242,797 -| 13.721 +| 2,895,250 +| 41.915 | Intel `BID_UINT64` -| 1,689,585 -| 18.653 +| 3,477,761 +| 50.348 | Intel `BID_UINT128` -| 1,958,345 -| 21.620 +| 2,587,966 +| 37.466 |=== GCC: @@ -199,47 +199,47 @@ GCC: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 79,256 -| 1.085 +| 72,939 +| 1.166 | `double` -| 73,017 +| 62,541 | 1.000 | `decimal32_t` -| 1,501,645 -| 20.566 +| 1,225,447 +| 19.594 | `decimal64_t` -| 1,567,250 -| 21.464 +| 1,257,515 +| 20.107 | `decimal128_t` -| 4,609,413 -| 63.128 +| 2,085,250 +| 33.342 | `decimal_fast32_t` -| 735,864 -| 10.078 +| 771,283 +| 12.332 | `decimal_fast64_t` -| 1,002,119 -| 13.724 +| 1,064,025 +| 17.013 | `decimal_fast128_t` -| 1,329,644 -| 18.210 +| 926,772 +| 14.819 | GCC `_Decimal32` -| 2,975,146 -| 40.746 +| 2,894,703 +| 46.285 | GCC `_Decimal64` -| 2,186,565 -| 29.946 +| 2,100,094 +| 33.579 | GCC `_Decimal128` -| 3,368,864 -| 46.138 +| 3,264,420 +| 52.196 | Intel `BID_UINT32` -| 2,838,194 -| 38.879 +| 2,574,559 +| 41.166 | Intel `BID_UINT64` -| 3,297,652 -| 45.163 +| 3,379,104 +| 54.030 | Intel `BID_UINT128` -| 2,796,283 -| 38.296 +| 2,509,923 +| 40.132 |=== === Subtraction @@ -249,38 +249,38 @@ Intel Compiler: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 78,250 -| 1.069 +| 76,299 +| 1.104 | `double` -| 73,193 +| 69,112 | 1.000 | `decimal32_t` -| 1,480,678 -| 20.229 +| 1,274,392 +| 18.440 | `decimal64_t` -| 1,371,677 -| 18.741 +| 1,181,406 +| 17.094 | `decimal128_t` -| 2,768,955 -| 37.831 +| 2,343,671 +| 33.911 | `decimal_fast32_t` -| 1,040,587 -| 14.217 +| 1,003,326 +| 14.517 | `decimal_fast64_t` -| 1,055,980 -| 14.427 +| 823,770 +| 11.919 | `decimal_fast128_t` -| 1,212,405 -| 16.564 +| 945,118 +| 13.675 | Intel `BID_UINT32` -| 1,922,108 -| 26.261 +| 3,853,532 +| 55.758 | Intel `BID_UINT64` -| 1,793,879 -| 24.509 +| 3,550,643 +| 51.375 | Intel `BID_UINT128` -| 2,397,372 -| 32.754 +| 3,186,584 +| 46.108 |=== GCC: @@ -288,47 +288,47 @@ GCC: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 275,230 -| 0.936 +| 267,529 +| 0.976 | `double` -| 293,907 +| 274,107 | 1.000 | `decimal32_t` -| 1,451,610 -| 4.939 +| 1,228,320 +| 4.481 | `decimal64_t` -| 1,456,587 -| 4.956 +| 1,313,266 +| 4.791 | `decimal128_t` -| 4,332,644 -| 14.742 +| 2,234,926 +| 8.153 | `decimal_fast32_t` -| 842,910 -| 2.868 +| 790,531 +| 2.884 | `decimal_fast64_t` -| 968,939 -| 3.297 +| 1,166,909 +| 4.257 | `decimal_fast128_t` -| 1,327,411 -| 4.516 +| 1,445,687 +| 5.274 | GCC `_Decimal32` -| 2,045,306 -| 6.959 +| 1,972,600 +| 7.196 | GCC `_Decimal64` -| 1,355,777 -| 4.613 +| 1,313,261 +| 4.791 | GCC `_Decimal128` -| 3,178,891 -| 10.816 +| 3,084,987 +| 11.255 | Intel `BID_UINT32` -| 3,762,566 -| 12.802 +| 3,539,123 +| 12.911 | Intel `BID_UINT64` -| 3,432,814 -| 11.680 +| 3,522,244 +| 12.850 | Intel `BID_UINT128` -| 3,725,534 -| 12.676 +| 3,042,150 +| 11.098 |=== === Multiplication @@ -338,38 +338,38 @@ Intel Compiler: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 78,445 -| 1.078 +| 77,440 +| 1.122 | `double` -| 72,798 +| 68,993 | 1.000 | `decimal32_t` -| 1,735,239 -| 23.836 +| 2,142,304 +| 31.051 | `decimal64_t` -| 2,272,739 -| 31.220 +| 2,378,675 +| 34.477 | `decimal128_t` -| 6,396,750 -| 87.870 +| 6,705,642 +| 97.193 | `decimal_fast32_t` -| 993,256 -| 13.644 +| 1,616,807 +| 23.434 | `decimal_fast64_t` -| 1,670,141 -| 22.942 +| 1,934,743 +| 28.043 | `decimal_fast128_t` -| 5,959,977 -| 81.870 +| 5,788,000 +| 83.893 | Intel `BID_UINT32` -| 1,375,434 -| 18.894 +| 2,719,047 +| 39.410 | Intel `BID_UINT64` -| 2,052,278 -| 28.191 +| 5,078,522 +| 73.609 | Intel `BID_UINT128` -| 5,964,489 -| 81.932 +| 21,265,835 +| 308.232 |=== GCC: @@ -377,47 +377,47 @@ GCC: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 76,238 -| 1.161 +| 72,857 +| 1.165 | `double` -| 65,652 +| 62,531 | 1.000 | `decimal32_t` -| 1,703,365 -| 25.945 +| 2,112,818 +| 33.788 | `decimal64_t` -| 2,564,605 -| 39.063 +| 2,528,577 +| 40.437 | `decimal128_t` -| 7,115,514 -| 108.382 +| 7,966,781 +| 127.405 | `decimal_fast32_t` -| 1,225,047 -| 18.660 +| 1,130,499 +| 18.079 | `decimal_fast64_t` -| 1,904,509 -| 29.009 +| 2,312,590 +| 36.983 | `decimal_fast128_t` -| 6,056,348 -| 92.249 +| 8,116,908 +| 129.806 | GCC `_Decimal32` -| 2,635,531 -| 40.144 +| 2,576,432 +| 41.202 | GCC `_Decimal64` -| 2,545,441 -| 38.772 +| 2,462,813 +| 39.385 | GCC `_Decimal128` -| 7,050,299 -| 107.289 +| 6,683,052 +| 106.876 | Intel `BID_UINT32` -| 2,638,999 -| 40.197 +| 2,635,154 +| 42.142 | Intel `BID_UINT64` -| 4,605,497 -| 70.150 +| 4,973,224 +| 79.532 | Intel `BID_UINT128` -| 13,075,436 -| 199.163 +| 21,352,000 +| 341.463 |=== === Division @@ -427,38 +427,38 @@ Intel Compiler: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 100,799 -| 0.971 +| 99,024 +| 0.943 | `double` -| 103,796 +| 105,042 | 1.000 | `decimal32_t` -| 2,134,312 -| 20.563 +| 2,416,706 +| 23.007 | `decimal64_t` -| 5,399,276 -| 52.018 +| 2,516,984 +| 23.962 | `decimal128_t` -| 10,012,578 -| 96.464 +| 5,995,607 +| 57.078 | `decimal_fast32_t` -| 1,558,774 -| 15.018 +| 1,708,582 +| 16.266 | `decimal_fast64_t` -| 1,597,873 -| 15.394 +| 1,990,538 +| 18.950 | `decimal_fast128_t` -| 8,105,004 -| 78.086 +| 4,335,834 +| 41.277 | Intel `BID_UINT32` -| 1,561,213 -| 15.041 +| 3,600,610 +| 34.278 | Intel `BID_UINT64` -| 3,115,862 -| 30.019 +| 5,739,738 +| 54.642 | Intel `BID_UINT128` -| 7,474,712 -| 72.013 +| 15,019,593 +| 142.987 |=== GCC: @@ -466,47 +466,47 @@ GCC: |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 60,277 -| 0.747 +| 58,777 +| 0.761 | `double` -| 80,676 +| 77,196 | 1.000 | `decimal32_t` -| 2,396,732 -| 29.708 +| 2,079,584 +| 26.939 | `decimal64_t` -| 4,021,720 -| 49.850 +| 2,477,436 +| 32.093 | `decimal128_t` -| 10,677,625 -| 132.352 +| 5,854,061 +| 75.834 | `decimal_fast32_t` -| 1,083,011 -| 13.424 +| 1,718,838 +| 22.266 | `decimal_fast64_t` -| 1,851,520 -| 22.950 +| 1,724,535 +| 22.340 | `decimal_fast128_t` -| 8,121,160 -| 100.664 +| 4,442,123 +| 57.543 | GCC `_Decimal32` -| 5,082,812 -| 63.002 +| 4,868,534 +| 63.067 | GCC `_Decimal64` -| 3,005,153 -| 37.250 +| 2,904,760 +| 37.628 | GCC `_Decimal128` -| 10,257,437 -| 130.490 +| 9,906,049 +| 128.323 | Intel `BID_UINT32` -| 3,242,695 -| 40.194 +| 3,356,191 +| 43.476 | Intel `BID_UINT64` -| 6,143,554 -| 76.151 +| 5,745,255 +| 74.424 | Intel `BID_UINT128` -| 13,499,022 -| 167.324 +| 14,918,326 +| 193.253 |=== === `from_chars` @@ -1080,36 +1080,36 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14. [#m4_mac_benchmarks] == ARM64 macOS -Run using a Macbook pro with M4 Max chipset running macOS Sequoia 15.5 and homebrew Clang 20.1.8 +Run using a Macbook pro with M4 Max chipset running macOS Tahoe 26.5.1 and homebrew Clang 22.1.7 === Comparisons |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 64,639 -| 1.606 +| 67,936 +| 1.536 | `double` -| 40,255 +| 44,239 | 1.000 | `decimal32_t` -| 957,179 -| 23.778 +| 998,053 +| 22.560 | `decimal64_t` -| 897,409 -| 22.293 +| 958,425 +| 21.665 | `decimal128_t` -| 2,131,391 -| 52.947 +| 2,055,981 +| 46.474 | `decimal_fast32_t` -| 380,892 -| 9.462 +| 506,597 +| 11.451 | `decimal_fast64_t` -| 481,455 -| 11.960 +| 473,861 +| 10.711 | `decimal_fast128_t` -| 465,461 -| 11.563 +| 441,387 +| 9.977 |=== === Addition @@ -1117,29 +1117,29 @@ Run using a Macbook pro with M4 Max chipset running macOS Sequoia 15.5 and homeb |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 11,853 -| 0.964 +| 12,488 +| 0.994 | `double` -| 12,295 +| 12,561 | 1.000 | `decimal32_t` -| 1,338,796 -| 108.889 +| 1,111,870 +| 88.518 | `decimal64_t` -| 1,231,462 -| 100.160 +| 825,194 +| 65.695 | `decimal128_t` -| 2,262,808 -| 184.043 +| 1,439,508 +| 114.601 | `decimal_fast32_t` -| 608,660 -| 49.505 +| 687,518 +| 54.734 | `decimal_fast64_t` -| 847,512 -| 68.931 +| 610,775 +| 48.625 | `decimal_fast128_t` -| 1,030,662 -| 83.827 +| 693,826 +| 55.237 |=== === Subtraction @@ -1147,29 +1147,29 @@ Run using a Macbook pro with M4 Max chipset running macOS Sequoia 15.5 and homeb |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 11,939 -| 0.951 +| 12,655 +| 1.001 | `double` -| 12,551 +| 12,645 | 1.000 | `decimal32_t` -| 1,296,430 -| 103.293 +| 1,151,474 +| 91.062 | `decimal64_t` -| 1,180,456 -| 94.053 +| 910,530 +| 72.007 | `decimal128_t` -| 2,078,008 -| 165.565 +| 1,537,309 +| 121.574 | `decimal_fast32_t` -| 817,989 -| 65.173 +| 755,153 +| 59.719 | `decimal_fast64_t` -| 823,569 -| 65.618 +| 702,725 +| 55.573 | `decimal_fast128_t` -| 993,447 -| 79.153 +| 789,072 +| 62.402 |=== === Multiplication @@ -1177,29 +1177,29 @@ Run using a Macbook pro with M4 Max chipset running macOS Sequoia 15.5 and homeb |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 12,186 -| 0.944 +| 12,866 +| 1.000 | `double` -| 12,914 +| 12,869 | 1.000 | `decimal32_t` -| 1,441,141 -| 111.595 +| 2,104,935 +| 163.566 | `decimal64_t` -| 2,117,061 -| 163.935 +| 2,157,113 +| 167.621 | `decimal128_t` -| 5,376,470 -| 416.329 +| 5,183,487 +| 402.789 | `decimal_fast32_t` -| 923,346 -| 71.500 +| 1,536,569 +| 119.401 | `decimal_fast64_t` -| 1,766,419 -| 136.783 +| 1,934,438 +| 150.318 | `decimal_fast128_t` -| 5,463,675 -| 423.082 +| 4,762,463 +| 370.072 |=== === Division @@ -1207,29 +1207,29 @@ Run using a Macbook pro with M4 Max chipset running macOS Sequoia 15.5 and homeb |=== | Type | Runtime (us) | Ratio to `double` | `float` -| 12,576 -| 0.722 +| 13,501 +| 0.750 | `double` -| 17,145 +| 18,001 | 1.000 | `decimal32_t` -| 1,732,611 -| 101.056 +| 2,210,859 +| 122.819 | `decimal64_t` -| 3,558,094 -| 207.529 +| 2,495,550 +| 138.634 | `decimal128_t` -| 8,985,521 -| 524.090 +| 8,639,540 +| 479.948 | `decimal_fast32_t` -| 1,075,184 -| 62.711 +| 1,557,445 +| 86.520 | `decimal_fast64_t` -| 2,027,533 -| 118.258 +| 2,394,632 +| 133.028 | `decimal_fast128_t` -| 7,583,016 -| 442.287 +| 7,662,160 +| 425.652 |=== === `from_chars` @@ -1415,3 +1415,158 @@ Run using a Macbook pro with M4 Max chipset running macOS Sequoia 15.5 and homeb | 5,300,774 | 1.189 |=== + +[#arm64_windows_benchmarks] +== ARM64 Windows + +Run on the GitHub Actions `windows-11-arm` runner using MSVC in release mode. + +=== Comparisons + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 182,667 +| 1.012 +| `double` +| 180,555 +| 1.000 +| `decimal32_t` +| 1,679,312 +| 9.301 +| `decimal64_t` +| 2,203,320 +| 12.203 +| `decimal128_t` +| 8,833,555 +| 48.924 +| `decimal_fast32_t` +| 872,493 +| 4.832 +| `decimal_fast64_t` +| 891,568 +| 4.938 +| `decimal_fast128_t` +| 907,563 +| 5.027 +|=== + +=== Addition + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 60,279 +| 0.941 +| `double` +| 64,051 +| 1.000 +| `decimal32_t` +| 2,209,244 +| 34.492 +| `decimal64_t` +| 2,231,636 +| 34.842 +| `decimal128_t` +| 5,480,362 +| 85.562 +| `decimal_fast32_t` +| 1,502,684 +| 23.461 +| `decimal_fast64_t` +| 1,982,389 +| 30.950 +| `decimal_fast128_t` +| 2,889,333 +| 45.110 +|=== + +=== Subtraction + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 55,587 +| 0.918 +| `double` +| 60,552 +| 1.000 +| `decimal32_t` +| 2,346,070 +| 38.745 +| `decimal64_t` +| 2,458,995 +| 40.610 +| `decimal128_t` +| 5,268,604 +| 87.010 +| `decimal_fast32_t` +| 1,879,880 +| 31.046 +| `decimal_fast64_t` +| 2,346,222 +| 38.747 +| `decimal_fast128_t` +| 3,237,777 +| 53.471 +|=== + +=== Multiplication + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 56,447 +| 0.914 +| `double` +| 61,774 +| 1.000 +| `decimal32_t` +| 3,276,537 +| 53.041 +| `decimal64_t` +| 4,325,985 +| 70.029 +| `decimal128_t` +| 15,726,575 +| 254.582 +| `decimal_fast32_t` +| 2,313,991 +| 37.459 +| `decimal_fast64_t` +| 3,821,815 +| 61.868 +| `decimal_fast128_t` +| 15,496,341 +| 250.855 +|=== + +=== Division + +|=== +| Type | Runtime (us) | Ratio to `double` +| `float` +| 56,701 +| 0.381 +| `double` +| 149,002 +| 1.000 +| `decimal32_t` +| 3,664,152 +| 24.591 +| `decimal64_t` +| 7,224,334 +| 48.485 +| `decimal128_t` +| 16,818,955 +| 112.877 +| `decimal_fast32_t` +| 2,750,313 +| 18.458 +| `decimal_fast64_t` +| 6,758,148 +| 45.356 +| `decimal_fast128_t` +| 15,086,240 +| 101.249 +|===