diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index 5fc24d8d3492..08ca26aa0582 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -17,6 +17,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-android.yml', 'examples/llama.android/**' diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index b99e614666e1..97f72c079f7e 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -20,6 +20,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-apple.yml', 'ggml/src/ggml-metal/**' diff --git a/.github/workflows/build-cann.yml b/.github/workflows/build-cann.yml index d39b87637339..b50fd98bfbab 100644 --- a/.github/workflows/build-cann.yml +++ b/.github/workflows/build-cann.yml @@ -17,6 +17,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-cann.yml', 'ggml/src/ggml-cann/**' diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 071f7a677ae3..cf1e1fa29a58 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -1,10 +1,13 @@ -name: HRX CI +name: HRX v2 CI on: + push: + branches: + - hrx-v2 pull_request: types: [opened, synchronize, reopened] branches: - - hrx-integration + - hrx-v2 permissions: contents: read @@ -15,81 +18,173 @@ concurrency: cancel-in-progress: true jobs: - hrx-build: - name: ubuntu-latest - runs-on: ubuntu-latest + hrx2-build: + name: ${{ matrix.name }} + strategy: + fail-fast: false + matrix: + include: + - name: gfx1151_strix-halo + gpu_target: gfx1151 + runner: linux-gfx1151-gpu-rocm + rocm_tarball: therock-dist-linux-gfx1151-7.14.0a20260617.tar.gz + # Disable gfx1201_9070 build for now, runner is unstable. + # - name: gfx1201_9070 + # gpu_target: gfx1201 + # runner: linux-gfx120X-gpu-rocm + # rocm_tarball: therock-dist-linux-gfx120X-all-7.14.0a20260617.tar.gz + runs-on: ${{ matrix.runner }} + timeout-minutes: 120 + defaults: + run: + shell: bash --noprofile --norc -exo pipefail {0} + container: + image: ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:fba5f55a122dbb15925e98c51fe65bffe88c36e11ebb25b73daf2bea04202dc3 + # --user 0:0: actions running inside the container need to write to some + # files set up outside the container by the runner agent. In + # June 2026 some runner agents set those files up with + # permissions for a "tester" user with UID/GID 1001, which + # matches the user in no_rocm_image_ubuntu24_04, and some are + # set up as root; accessing a file owned by root with user + # "tester" gives an EACCES. Running as root is the common + # denominator. + # --device kfd/dri: GPU access. + options: >- + --user 0:0 + --device /dev/kfd + --device /dev/dri env: - HRX_WORK_DIR: ${{ github.workspace }} - # Public location for bench tooling (rsuderman/llamacpp_ci and fork - # AaronStGeorge/llamacpp_ci) - #TODO: switch to ROCm/llamacpp-hrx-bench once it is open sourced. - BENCH_REPOSITORY: AaronStGeorge/llamacpp_ci - BENCH_REF: 'main' - BENCH_DIR: ${{ github.workspace }}/bench - LLAMA_SRC_DIR: ${{ github.workspace }}/llama-src - HRX_ARTIFACT_SET: 'core-with-upstream-hip' + DEVWS_REPOSITORY: AaronStGeorge/llamacpp-devws + DEVWS_REF: ci-scripts + DEVWS_DIR: ${{ github.workspace }}/devws-src + ROCM_DIR: /work/rocm + ROCM_TARBALL_BASE_URL: https://rocm.nightlies.amd.com/tarball-multi-arch + ROCM_TARBALL_NAME: ${{ matrix.rocm_tarball }} + LLAMACPP_DEVWS_SKIP_VENV: "1" CCACHE_COMPILERCHECK: content + HSA_FORCE_FINE_GRAIN_PCIE: "1" steps: - - name: Checkout llama.cpp (under test) + - name: Checkout dev workspace tooling uses: actions/checkout@v6 with: - path: llama-src + repository: ${{ env.DEVWS_REPOSITORY }} + ref: ${{ env.DEVWS_REF }} + path: devws-src - - name: Checkout bench tooling + - name: Checkout llama.cpp under test uses: actions/checkout@v6 with: - repository: ${{ env.BENCH_REPOSITORY }} - ref: ${{ env.BENCH_REF }} - path: bench + path: devws-src/sources/llama.cpp - - name: Install ROCm build dependencies - run: "${BENCH_DIR}/scripts/hrx/install-rocm-deps.sh" + - name: Checkout HRX System + uses: actions/checkout@v6 + with: + repository: ROCm/hrx-system + ref: main + path: devws-src/sources/hrx-system + + - name: Runner identity + env: + MATRIX_NAME: ${{ matrix.name }} + MATRIX_GPU_TARGET: ${{ matrix.gpu_target }} + MATRIX_RUNS_ON: ${{ matrix.runner }} + run: "${GITHUB_WORKSPACE}/devws-src/ci/runner-info.sh" + + - name: Install host build dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + glslc \ + libvulkan-dev \ + ninja-build \ + pkg-config \ + python3 \ + tar \ + xz-utils + + - name: Fetch ROCm nightly tarball + run: "${DEVWS_DIR}/ci/fetch-rocm-nightly.sh" - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-latest-${{ env.HRX_ARTIFACT_SET }} + key: hrx-v2-${{ matrix.name }} evict-old-files: 1d save: ${{ github.event_name == 'pull_request' }} - - name: Checkout HRX - run: "${BENCH_DIR}/scripts/hrx/checkout-hrx.sh" - - - name: Fetch ROCm assets - run: "${BENCH_DIR}/scripts/hrx/fetch-rocm-assets.sh" - - name: Build HRX - run: "${BENCH_DIR}/scripts/hrx/build-hrx.sh" - - - name: Validate HRX - run: "${BENCH_DIR}/scripts/hrx/validate-hrx.sh" - - - name: Build llama.cpp with HRX - run: "${BENCH_DIR}/scripts/hrx/build-llama-hrx.sh" + run: | + cd "${DEVWS_DIR}" + source .envrc + python3 skills/bootstrap-hrx-llama-builds/scripts/bootstrap_builds.py \ + --workspace "${DEVWS_DIR}" \ + --action check \ + --action hrx \ + --action loom \ + --action rocm-health \ + --action llama-hrx \ + --skip-source-branch-check \ + --gfx-targets auto \ + --jobs "$(nproc)" - name: Run sample MUL_MAT correctness config on CPU run: | - . "${BENCH_DIR}/scripts/hrx/env.sh" - "${BENCH_DIR}/tools/run-op-test.py" \ - --test-backend-ops "${LLAMA_BUILD_DIR}/bin/test-backend-ops" \ - --test-file "${BENCH_DIR}/benchmark-configs/test/mul_mat_f16.txt" \ + cd "${DEVWS_DIR}" + source .envrc + mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" + "${DEVWS_DIR}/ci/tools/run-op-test.py" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx/bin/test-backend-ops" \ + --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ --backend CPU \ - --output benchmark-results/sample-mul-mat-f16-cpu-test.jsonl + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-test.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-test.txt" - name: Run sample MUL_MAT benchmark config on CPU + env: + GGML_TEST_BACKEND_OPS_PERF_MIN_US: "2000000" run: | - . "${BENCH_DIR}/scripts/hrx/env.sh" - "${BENCH_DIR}/tools/run-op-perf.py" \ - --test-backend-ops "${LLAMA_BUILD_DIR}/bin/test-backend-ops" \ - --test-file "${BENCH_DIR}/benchmark-configs/test/mul_mat_f16.txt" \ + cd "${DEVWS_DIR}" + source .envrc + mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" + "${DEVWS_DIR}/ci/tools/run-op-perf.py" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx/bin/test-backend-ops" \ + --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ --backend CPU \ - --output benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.txt" - - name: Upload CPU benchmark results + - name: Upload HRX benchmark results uses: actions/upload-artifact@v5 with: - name: cpu-benchmark-results + name: hrx-benchmark-results-${{ matrix.name }} path: benchmark-results/ + if-no-files-found: error + + # TODO: switch to github.event.pull_request.base.sha, github.event.before is for testing + - name: Download previous HRX benchmark results + env: + GH_TOKEN: ${{ github.token }} + PARENT_SHA: ${{ github.event.before || github.event.pull_request.base.sha }} + run: | + python3 "${DEVWS_DIR}/ci/tools/download-parent-artifact.py" \ + "hrx-benchmark-results-${{ matrix.name }}" \ + "${GITHUB_WORKSPACE}/benchmark-results/previous" + + - name: Compare HRX benchmark results + run: | + python3 "${DEVWS_DIR}/ci/tools/compare-op-perf.py" \ + "${GITHUB_WORKSPACE}/benchmark-results/previous/sample-mul-mat-f16-cpu-perf.jsonl" \ + "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl" \ + --max-regression-pct 5 \ + --min-regression-us 5 \ + --require-all-cases \ + --skip-missing-baseline diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml index 9733dbaa7a21..161a3aaaa63e 100644 --- a/.github/workflows/build-riscv.yml +++ b/.github/workflows/build-riscv.yml @@ -17,6 +17,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-riscv.yml', 'ggml/src/ggml-cpu/arch/riscv/**' diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index eeea820ba169..05b3ffd27604 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -25,6 +25,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-self-hosted.yml', '**/CMakeLists.txt', diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml index de38bb2db6d3..01a7e595e4ba 100644 --- a/.github/workflows/build-vulkan.yml +++ b/.github/workflows/build-vulkan.yml @@ -19,6 +19,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-vulkan.yml', 'ggml/src/ggml-vulkan/**' diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f4ae3675602e..8a8653ca350d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,6 +26,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build.yml', '.github/workflows/build-cmake-pkg.yml', diff --git a/.github/workflows/check-vendor.yml b/.github/workflows/check-vendor.yml index 1671ed7b8bd2..0e2d13786e82 100644 --- a/.github/workflows/check-vendor.yml +++ b/.github/workflows/check-vendor.yml @@ -12,6 +12,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ 'vendor/**', 'scripts/sync_vendor.py' diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml index 6f648bac45b7..93ed12934249 100644 --- a/.github/workflows/copilot-setup-steps.yml +++ b/.github/workflows/copilot-setup-steps.yml @@ -5,9 +5,11 @@ name: "Copilot Setup Steps" on: workflow_dispatch: push: + branches-ignore: ['hrx-v2'] paths: - .github/workflows/copilot-setup-steps.yml pull_request: + branches-ignore: ['hrx-v2'] paths: - .github/workflows/copilot-setup-steps.yml diff --git a/.github/workflows/hip-quality-check.yml b/.github/workflows/hip-quality-check.yml index d00d30ed65ce..13492357f5a0 100644 --- a/.github/workflows/hip-quality-check.yml +++ b/.github/workflows/hip-quality-check.yml @@ -14,6 +14,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/hip-quality-check.yml', '**/*.cu', diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index eab20c68811e..cfa46e9bf77e 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -1,6 +1,7 @@ name: "Pull Request Labeler" on: -- pull_request_target + pull_request_target: + branches-ignore: ['hrx-v2'] jobs: labeler: diff --git a/.github/workflows/pre-tokenizer-hashes.yml b/.github/workflows/pre-tokenizer-hashes.yml index 7126b62b690b..e28798991ddf 100644 --- a/.github/workflows/pre-tokenizer-hashes.yml +++ b/.github/workflows/pre-tokenizer-hashes.yml @@ -2,10 +2,12 @@ name: Check Pre-Tokenizer Hashes on: push: + branches-ignore: ['hrx-v2'] paths: - 'convert_hf_to_gguf.py' - 'convert_hf_to_gguf_update.py' pull_request: + branches-ignore: ['hrx-v2'] paths: - 'convert_hf_to_gguf.py' - 'convert_hf_to_gguf_update.py' diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml index 1219b8745927..99002aea6af5 100644 --- a/.github/workflows/python-check-requirements.yml +++ b/.github/workflows/python-check-requirements.yml @@ -2,12 +2,14 @@ name: Python check requirements.txt on: push: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-check-requirements.yml' - 'scripts/check-requirements.sh' - 'convert*.py' - '**/requirements*.txt' pull_request: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-check-requirements.yml' - 'scripts/check-requirements.sh' diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 1e5d64c1aee6..c43bcce62c53 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -10,6 +10,7 @@ on: ] pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/python-lint.yml', '**/*.py' diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml index dc7aebe24ca2..41405d0c5846 100644 --- a/.github/workflows/python-type-check.yml +++ b/.github/workflows/python-type-check.yml @@ -2,6 +2,7 @@ name: Python Type-Check on: push: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-type-check.yml' - 'ty.toml' @@ -9,6 +10,7 @@ on: - '**/requirements*.txt' # - 'pyrightconfig.json' pull_request: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-type-check.yml' - 'ty.toml' diff --git a/.github/workflows/server-webui.yml b/.github/workflows/server-webui.yml index 492107ffd851..d852fe540f35 100644 --- a/.github/workflows/server-webui.yml +++ b/.github/workflows/server-webui.yml @@ -18,6 +18,7 @@ on: ] pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/server-webui.yml', 'tools/server/webui/**.*', diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 750c29f08e5c..8ffe34a3235d 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -29,6 +29,7 @@ on: ] pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/server.yml', '**/CMakeLists.txt', diff --git a/.github/workflows/update-ops-docs.yml b/.github/workflows/update-ops-docs.yml index 2ab06eb9811d..5535e5b69786 100644 --- a/.github/workflows/update-ops-docs.yml +++ b/.github/workflows/update-ops-docs.yml @@ -2,11 +2,13 @@ name: Update Operations Documentation on: push: + branches-ignore: ['hrx-v2'] paths: - 'docs/ops.md' - 'docs/ops/**' - 'scripts/create_ops_docs.py' pull_request: + branches-ignore: ['hrx-v2'] paths: - 'docs/ops.md' - 'docs/ops/**' diff --git a/ggml/src/ggml-hrx/ggml-hrx.cpp b/ggml/src/ggml-hrx/ggml-hrx.cpp index 120a422b4d10..7c3ef8152965 100644 --- a/ggml/src/ggml-hrx/ggml-hrx.cpp +++ b/ggml/src/ggml-hrx/ggml-hrx.cpp @@ -2521,16 +2521,16 @@ static bool ggml_backend_hrx_load_catalog_provider( executable, export_ordinal, &export_info)) && export_info.binding_count == entry->binding_count && export_info.parameter_count == entry->parameter_count && - export_info.constant_count * sizeof(uint32_t) == entry->constants_size; + export_info.constant_byte_length == entry->constants_size; if (!ok) { GGML_LOG_WARN( "%s: HRX catalog kernel %s has unsupported ABI " - "(bindings=%u expected=%u constants=%u constants_size=%u parameters=%u expected_parameters=%u workgroup=%ux%ux%u)\n", + "(bindings=%u expected=%u constant_bytes=%u expected_constant_bytes=%u parameters=%u expected_parameters=%u workgroup=%ux%ux%u)\n", __func__, entry->name, export_info.binding_count, entry->binding_count, - export_info.constant_count, + export_info.constant_byte_length, entry->constants_size, export_info.parameter_count, entry->parameter_count, diff --git a/ggml/src/ggml-hrx2/CMakeLists.txt b/ggml/src/ggml-hrx2/CMakeLists.txt index 541c3b239fe3..0f8d0620f268 100644 --- a/ggml/src/ggml-hrx2/CMakeLists.txt +++ b/ggml/src/ggml-hrx2/CMakeLists.txt @@ -2,6 +2,7 @@ message(STATUS "Using HRX2 backend") find_package(Python3 COMPONENTS Interpreter REQUIRED) find_package(hrx CONFIG REQUIRED) +find_package(loomc CONFIG REQUIRED) set(GGML_HRX2_GENERATED_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") set(GGML_HRX2_ARTIFACT_ROOT "${GGML_HRX2_GENERATED_DIR}/catalog") @@ -68,10 +69,9 @@ find_program(GGML_HRX2_LOOM_LINK_EXECUTABLE REQUIRED ) -if(NOT TARGET loom::binding::c::loomc OR NOT TARGET loom::binding::c::target::amdgpu) +if(NOT TARGET loomc::loomc) message(FATAL_ERROR - "HRX2 requires HRX to export Loom C API CMake targets " - "loom::binding::c::loomc and loom::binding::c::target::amdgpu") + "HRX2 requires HRX to export the Loom C API CMake target loomc::loomc") endif() add_custom_command( @@ -146,8 +146,7 @@ ggml_add_backend_library(ggml-hrx2 target_link_libraries(ggml-hrx2 PRIVATE hrx::hrx - loom::binding::c::loomc - loom::binding::c::target::amdgpu + loomc::loomc ) target_include_directories(ggml-hrx2 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../vendor