From e698bad17f2642195d064619a4d1a5f6bba67584 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 02:31:05 +0000 Subject: [PATCH 01/14] Updates for new hrx C and CMake APIs --- ggml/src/ggml-hrx/ggml-hrx.cpp | 6 +++--- ggml/src/ggml-hrx2/CMakeLists.txt | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-hrx/ggml-hrx.cpp b/ggml/src/ggml-hrx/ggml-hrx.cpp index 120a422b4d10..7c3ef8152965 100644 --- a/ggml/src/ggml-hrx/ggml-hrx.cpp +++ b/ggml/src/ggml-hrx/ggml-hrx.cpp @@ -2521,16 +2521,16 @@ static bool ggml_backend_hrx_load_catalog_provider( executable, export_ordinal, &export_info)) && export_info.binding_count == entry->binding_count && export_info.parameter_count == entry->parameter_count && - export_info.constant_count * sizeof(uint32_t) == entry->constants_size; + export_info.constant_byte_length == entry->constants_size; if (!ok) { GGML_LOG_WARN( "%s: HRX catalog kernel %s has unsupported ABI " - "(bindings=%u expected=%u constants=%u constants_size=%u parameters=%u expected_parameters=%u workgroup=%ux%ux%u)\n", + "(bindings=%u expected=%u constant_bytes=%u expected_constant_bytes=%u parameters=%u expected_parameters=%u workgroup=%ux%ux%u)\n", __func__, entry->name, export_info.binding_count, entry->binding_count, - export_info.constant_count, + export_info.constant_byte_length, entry->constants_size, export_info.parameter_count, entry->parameter_count, diff --git a/ggml/src/ggml-hrx2/CMakeLists.txt b/ggml/src/ggml-hrx2/CMakeLists.txt index 541c3b239fe3..0f8d0620f268 100644 --- a/ggml/src/ggml-hrx2/CMakeLists.txt +++ b/ggml/src/ggml-hrx2/CMakeLists.txt @@ -2,6 +2,7 @@ message(STATUS "Using HRX2 backend") find_package(Python3 COMPONENTS Interpreter REQUIRED) find_package(hrx CONFIG REQUIRED) +find_package(loomc CONFIG REQUIRED) set(GGML_HRX2_GENERATED_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") set(GGML_HRX2_ARTIFACT_ROOT "${GGML_HRX2_GENERATED_DIR}/catalog") @@ -68,10 +69,9 @@ find_program(GGML_HRX2_LOOM_LINK_EXECUTABLE REQUIRED ) -if(NOT TARGET loom::binding::c::loomc OR NOT TARGET loom::binding::c::target::amdgpu) +if(NOT TARGET loomc::loomc) message(FATAL_ERROR - "HRX2 requires HRX to export Loom C API CMake targets " - "loom::binding::c::loomc and loom::binding::c::target::amdgpu") + "HRX2 requires HRX to export the Loom C API CMake target loomc::loomc") endif() add_custom_command( @@ -146,8 +146,7 @@ ggml_add_backend_library(ggml-hrx2 target_link_libraries(ggml-hrx2 PRIVATE hrx::hrx - loom::binding::c::loomc - loom::binding::c::target::amdgpu + loomc::loomc ) target_include_directories(ggml-hrx2 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../../vendor From 9fc0cdbf9cd99242243677422fbc674da2040510 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 14:37:57 +0000 Subject: [PATCH 02/14] Basic CI for hrx-v2 --- .github/workflows/build-android.yml | 1 + .github/workflows/build-apple.yml | 1 + .github/workflows/build-cann.yml | 1 + .github/workflows/build-hrx.yml | 171 ++++++++++++------ .github/workflows/build-riscv.yml | 1 + .github/workflows/build-self-hosted.yml | 1 + .github/workflows/build-vulkan.yml | 1 + .github/workflows/build.yml | 1 + .github/workflows/check-vendor.yml | 1 + .github/workflows/copilot-setup-steps.yml | 2 + .github/workflows/hip-quality-check.yml | 1 + .github/workflows/labeler.yml | 3 +- .github/workflows/pre-tokenizer-hashes.yml | 2 + .../workflows/python-check-requirements.yml | 2 + .github/workflows/python-lint.yml | 1 + .github/workflows/python-type-check.yml | 2 + .github/workflows/server-webui.yml | 1 + .github/workflows/server.yml | 1 + .github/workflows/update-ops-docs.yml | 2 + 19 files changed, 135 insertions(+), 61 deletions(-) diff --git a/.github/workflows/build-android.yml b/.github/workflows/build-android.yml index 5fc24d8d3492..08ca26aa0582 100644 --- a/.github/workflows/build-android.yml +++ b/.github/workflows/build-android.yml @@ -17,6 +17,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-android.yml', 'examples/llama.android/**' diff --git a/.github/workflows/build-apple.yml b/.github/workflows/build-apple.yml index b99e614666e1..97f72c079f7e 100644 --- a/.github/workflows/build-apple.yml +++ b/.github/workflows/build-apple.yml @@ -20,6 +20,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-apple.yml', 'ggml/src/ggml-metal/**' diff --git a/.github/workflows/build-cann.yml b/.github/workflows/build-cann.yml index d39b87637339..b50fd98bfbab 100644 --- a/.github/workflows/build-cann.yml +++ b/.github/workflows/build-cann.yml @@ -17,6 +17,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-cann.yml', 'ggml/src/ggml-cann/**' diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 071f7a677ae3..84e4c3105ed3 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -1,10 +1,14 @@ -name: HRX CI +name: HRX v2 CI on: + push: + branches: + - hrx-v2 pull_request: types: [opened, synchronize, reopened] branches: - - hrx-integration + - hrx-v2 + workflow_dispatch: permissions: contents: read @@ -15,81 +19,128 @@ concurrency: cancel-in-progress: true jobs: - hrx-build: - name: ubuntu-latest - runs-on: ubuntu-latest + hrx2-build: + name: ${{ matrix.name }} + strategy: + fail-fast: false + matrix: + include: + - name: gfx1151_strix-halo + runner: linux-gfx1151-gpu-rocm + rocm_tarball: therock-dist-linux-gfx1151-7.14.0a20260617.tar.gz + - name: gfx1201_9070 + runner: linux-gfx120X-gpu-rocm + rocm_tarball: therock-dist-linux-gfx120X-all-7.14.0a20260617.tar.gz + runs-on: ${{ matrix.runner }} + timeout-minutes: 120 + defaults: + run: + shell: bash --noprofile --norc -exo pipefail {0} + container: + image: ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:fba5f55a122dbb15925e98c51fe65bffe88c36e11ebb25b73daf2bea04202dc3 + options: >- + --user 0:0 + --device /dev/kfd + --device /dev/dri env: - HRX_WORK_DIR: ${{ github.workspace }} - # Public location for bench tooling (rsuderman/llamacpp_ci and fork - # AaronStGeorge/llamacpp_ci) - #TODO: switch to ROCm/llamacpp-hrx-bench once it is open sourced. - BENCH_REPOSITORY: AaronStGeorge/llamacpp_ci - BENCH_REF: 'main' - BENCH_DIR: ${{ github.workspace }}/bench - LLAMA_SRC_DIR: ${{ github.workspace }}/llama-src - HRX_ARTIFACT_SET: 'core-with-upstream-hip' + DEVWS_REPOSITORY: AaronStGeorge/llamacpp-devws + DEVWS_REF: build-hrx-v2 + DEVWS_DIR: /work/llamacpp-devws + ROCM_DIR: /work/rocm + ROCM_TARBALL_BASE_URL: https://rocm.nightlies.amd.com/tarball-multi-arch + ROCM_TARBALL_NAME: ${{ matrix.rocm_tarball }} CCACHE_COMPILERCHECK: content + HSA_FORCE_FINE_GRAIN_PCIE: "1" steps: - - name: Checkout llama.cpp (under test) + - name: Checkout dev workspace tooling + uses: actions/checkout@v6 + with: + repository: ${{ env.DEVWS_REPOSITORY }} + ref: ${{ env.DEVWS_REF }} + path: devws-src + + - name: Checkout llama.cpp under test uses: actions/checkout@v6 with: path: llama-src - - name: Checkout bench tooling + - name: Checkout HRX System uses: actions/checkout@v6 with: - repository: ${{ env.BENCH_REPOSITORY }} - ref: ${{ env.BENCH_REF }} - path: bench + repository: ROCm/hrx-system + ref: main + path: hrx-system-src - - name: Install ROCm build dependencies - run: "${BENCH_DIR}/scripts/hrx/install-rocm-deps.sh" + - name: Runner identity + if: always() + run: | + printf 'RUNNER_NAME=%s\n' "${RUNNER_NAME:-}" + printf 'RUNNER_OS=%s\n' "${RUNNER_OS:-}" + printf 'GITHUB_EVENT_NAME=%s\n' "${GITHUB_EVENT_NAME:-}" + hostname + uname -a + ls -l /dev/kfd /dev/dri || true + printenv | sort | grep -E '^(GITHUB_|RUNNER_|ROCM_|HSA_|HIP_)' || true + + - name: Install host build dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + glslc \ + libvulkan-dev \ + ninja-build \ + pkg-config \ + python3 \ + tar \ + xz-utils + + - name: Stage dev workspace under /work + run: | + rm -rf "${DEVWS_DIR}" + mkdir -p "${DEVWS_DIR}/sources" + cp -a "${GITHUB_WORKSPACE}/devws-src/." "${DEVWS_DIR}/" + cp -a "${GITHUB_WORKSPACE}/llama-src" "${DEVWS_DIR}/sources/llama.cpp" + cp -a "${GITHUB_WORKSPACE}/hrx-system-src" "${DEVWS_DIR}/sources/hrx-system" + git -C "${DEVWS_DIR}/sources/llama.cpp" checkout -B hrx-v2 + git -C "${DEVWS_DIR}/sources/hrx-system" checkout -B main + + - name: Fetch ROCm nightly tarball + run: | + rm -rf "${ROCM_DIR}" /work/rocm-tarball + mkdir -p "${ROCM_DIR}" /work/rocm-tarball + curl -fsSL "${ROCM_TARBALL_BASE_URL}/${ROCM_TARBALL_NAME}" \ + -o "/work/rocm-tarball/${ROCM_TARBALL_NAME}" + tar -xzf "/work/rocm-tarball/${ROCM_TARBALL_NAME}" -C "${ROCM_DIR}" + ln -sfn "${ROCM_DIR}" "${DEVWS_DIR}/rocm" + test -x "${DEVWS_DIR}/rocm/bin/rocminfo" + test -x "${DEVWS_DIR}/rocm/bin/amdclang++" - name: ccache uses: ggml-org/ccache-action@v1.2.21 with: - key: ubuntu-latest-${{ env.HRX_ARTIFACT_SET }} + key: hrx-v2-${{ matrix.name }} evict-old-files: 1d save: ${{ github.event_name == 'pull_request' }} - - name: Checkout HRX - run: "${BENCH_DIR}/scripts/hrx/checkout-hrx.sh" - - - name: Fetch ROCm assets - run: "${BENCH_DIR}/scripts/hrx/fetch-rocm-assets.sh" - - - name: Build HRX - run: "${BENCH_DIR}/scripts/hrx/build-hrx.sh" - - - name: Validate HRX - run: "${BENCH_DIR}/scripts/hrx/validate-hrx.sh" - - - name: Build llama.cpp with HRX - run: "${BENCH_DIR}/scripts/hrx/build-llama-hrx.sh" - - - name: Run sample MUL_MAT correctness config on CPU + - name: Build HRX2 run: | - . "${BENCH_DIR}/scripts/hrx/env.sh" - "${BENCH_DIR}/tools/run-op-test.py" \ - --test-backend-ops "${LLAMA_BUILD_DIR}/bin/test-backend-ops" \ - --test-file "${BENCH_DIR}/benchmark-configs/test/mul_mat_f16.txt" \ - --op MUL_MAT \ - --backend CPU \ - --output benchmark-results/sample-mul-mat-f16-cpu-test.jsonl - - - name: Run sample MUL_MAT benchmark config on CPU + cd "${DEVWS_DIR}" + python3 skills/bootstrap-hrx-llama-builds/scripts/bootstrap_builds.py \ + --action check \ + --action hrx \ + --action loom \ + --action rocm-health \ + --action llama-hrx2 \ + --gfx-targets auto \ + --jobs "$(nproc)" + + - name: Verify HRX2 build configuration run: | - . "${BENCH_DIR}/scripts/hrx/env.sh" - "${BENCH_DIR}/tools/run-op-perf.py" \ - --test-backend-ops "${LLAMA_BUILD_DIR}/bin/test-backend-ops" \ - --test-file "${BENCH_DIR}/benchmark-configs/test/mul_mat_f16.txt" \ - --op MUL_MAT \ - --backend CPU \ - --output benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl - - - name: Upload CPU benchmark results - uses: actions/upload-artifact@v5 - with: - name: cpu-benchmark-results - path: benchmark-results/ + grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_DIR}/build/llama-hrx2/CMakeCache.txt" diff --git a/.github/workflows/build-riscv.yml b/.github/workflows/build-riscv.yml index 9733dbaa7a21..161a3aaaa63e 100644 --- a/.github/workflows/build-riscv.yml +++ b/.github/workflows/build-riscv.yml @@ -17,6 +17,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-riscv.yml', 'ggml/src/ggml-cpu/arch/riscv/**' diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index eeea820ba169..05b3ffd27604 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -25,6 +25,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-self-hosted.yml', '**/CMakeLists.txt', diff --git a/.github/workflows/build-vulkan.yml b/.github/workflows/build-vulkan.yml index de38bb2db6d3..01a7e595e4ba 100644 --- a/.github/workflows/build-vulkan.yml +++ b/.github/workflows/build-vulkan.yml @@ -19,6 +19,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build-vulkan.yml', 'ggml/src/ggml-vulkan/**' diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f4ae3675602e..8a8653ca350d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,6 +26,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/build.yml', '.github/workflows/build-cmake-pkg.yml', diff --git a/.github/workflows/check-vendor.yml b/.github/workflows/check-vendor.yml index 1671ed7b8bd2..0e2d13786e82 100644 --- a/.github/workflows/check-vendor.yml +++ b/.github/workflows/check-vendor.yml @@ -12,6 +12,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ 'vendor/**', 'scripts/sync_vendor.py' diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml index 6f648bac45b7..93ed12934249 100644 --- a/.github/workflows/copilot-setup-steps.yml +++ b/.github/workflows/copilot-setup-steps.yml @@ -5,9 +5,11 @@ name: "Copilot Setup Steps" on: workflow_dispatch: push: + branches-ignore: ['hrx-v2'] paths: - .github/workflows/copilot-setup-steps.yml pull_request: + branches-ignore: ['hrx-v2'] paths: - .github/workflows/copilot-setup-steps.yml diff --git a/.github/workflows/hip-quality-check.yml b/.github/workflows/hip-quality-check.yml index d00d30ed65ce..13492357f5a0 100644 --- a/.github/workflows/hip-quality-check.yml +++ b/.github/workflows/hip-quality-check.yml @@ -14,6 +14,7 @@ on: pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/hip-quality-check.yml', '**/*.cu', diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index eab20c68811e..cfa46e9bf77e 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -1,6 +1,7 @@ name: "Pull Request Labeler" on: -- pull_request_target + pull_request_target: + branches-ignore: ['hrx-v2'] jobs: labeler: diff --git a/.github/workflows/pre-tokenizer-hashes.yml b/.github/workflows/pre-tokenizer-hashes.yml index 7126b62b690b..e28798991ddf 100644 --- a/.github/workflows/pre-tokenizer-hashes.yml +++ b/.github/workflows/pre-tokenizer-hashes.yml @@ -2,10 +2,12 @@ name: Check Pre-Tokenizer Hashes on: push: + branches-ignore: ['hrx-v2'] paths: - 'convert_hf_to_gguf.py' - 'convert_hf_to_gguf_update.py' pull_request: + branches-ignore: ['hrx-v2'] paths: - 'convert_hf_to_gguf.py' - 'convert_hf_to_gguf_update.py' diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml index 1219b8745927..99002aea6af5 100644 --- a/.github/workflows/python-check-requirements.yml +++ b/.github/workflows/python-check-requirements.yml @@ -2,12 +2,14 @@ name: Python check requirements.txt on: push: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-check-requirements.yml' - 'scripts/check-requirements.sh' - 'convert*.py' - '**/requirements*.txt' pull_request: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-check-requirements.yml' - 'scripts/check-requirements.sh' diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 1e5d64c1aee6..c43bcce62c53 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -10,6 +10,7 @@ on: ] pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/python-lint.yml', '**/*.py' diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml index dc7aebe24ca2..41405d0c5846 100644 --- a/.github/workflows/python-type-check.yml +++ b/.github/workflows/python-type-check.yml @@ -2,6 +2,7 @@ name: Python Type-Check on: push: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-type-check.yml' - 'ty.toml' @@ -9,6 +10,7 @@ on: - '**/requirements*.txt' # - 'pyrightconfig.json' pull_request: + branches-ignore: ['hrx-v2'] paths: - '.github/workflows/python-type-check.yml' - 'ty.toml' diff --git a/.github/workflows/server-webui.yml b/.github/workflows/server-webui.yml index 492107ffd851..d852fe540f35 100644 --- a/.github/workflows/server-webui.yml +++ b/.github/workflows/server-webui.yml @@ -18,6 +18,7 @@ on: ] pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/server-webui.yml', 'tools/server/webui/**.*', diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 750c29f08e5c..8ffe34a3235d 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -29,6 +29,7 @@ on: ] pull_request: types: [opened, synchronize, reopened] + branches-ignore: ['hrx-v2'] paths: [ '.github/workflows/server.yml', '**/CMakeLists.txt', diff --git a/.github/workflows/update-ops-docs.yml b/.github/workflows/update-ops-docs.yml index 2ab06eb9811d..5535e5b69786 100644 --- a/.github/workflows/update-ops-docs.yml +++ b/.github/workflows/update-ops-docs.yml @@ -2,11 +2,13 @@ name: Update Operations Documentation on: push: + branches-ignore: ['hrx-v2'] paths: - 'docs/ops.md' - 'docs/ops/**' - 'scripts/create_ops_docs.py' pull_request: + branches-ignore: ['hrx-v2'] paths: - 'docs/ops.md' - 'docs/ops/**' From 012d762d694a34266ec63dabe69461dca861172f Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 15:40:18 +0000 Subject: [PATCH 03/14] ci: use devws HRX scripts --- .github/workflows/build-hrx.yml | 66 ++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 84e4c3105ed3..d8d994110d81 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -26,9 +26,11 @@ jobs: matrix: include: - name: gfx1151_strix-halo + gpu_target: gfx1151 runner: linux-gfx1151-gpu-rocm rocm_tarball: therock-dist-linux-gfx1151-7.14.0a20260617.tar.gz - name: gfx1201_9070 + gpu_target: gfx1201 runner: linux-gfx120X-gpu-rocm rocm_tarball: therock-dist-linux-gfx120X-all-7.14.0a20260617.tar.gz runs-on: ${{ matrix.runner }} @@ -44,7 +46,7 @@ jobs: --device /dev/dri env: DEVWS_REPOSITORY: AaronStGeorge/llamacpp-devws - DEVWS_REF: build-hrx-v2 + DEVWS_REF: ci-scripts DEVWS_DIR: /work/llamacpp-devws ROCM_DIR: /work/rocm ROCM_TARBALL_BASE_URL: https://rocm.nightlies.amd.com/tarball-multi-arch @@ -74,14 +76,11 @@ jobs: - name: Runner identity if: always() - run: | - printf 'RUNNER_NAME=%s\n' "${RUNNER_NAME:-}" - printf 'RUNNER_OS=%s\n' "${RUNNER_OS:-}" - printf 'GITHUB_EVENT_NAME=%s\n' "${GITHUB_EVENT_NAME:-}" - hostname - uname -a - ls -l /dev/kfd /dev/dri || true - printenv | sort | grep -E '^(GITHUB_|RUNNER_|ROCM_|HSA_|HIP_)' || true + env: + MATRIX_NAME: ${{ matrix.name }} + MATRIX_GPU_TARGET: ${{ matrix.gpu_target }} + MATRIX_RUNS_ON: ${{ matrix.runner }} + run: "${GITHUB_WORKSPACE}/devws-src/ci/runner-info.sh" - name: Install host build dependencies run: | @@ -104,12 +103,10 @@ jobs: - name: Stage dev workspace under /work run: | rm -rf "${DEVWS_DIR}" - mkdir -p "${DEVWS_DIR}/sources" + mkdir -p "${DEVWS_DIR}" cp -a "${GITHUB_WORKSPACE}/devws-src/." "${DEVWS_DIR}/" - cp -a "${GITHUB_WORKSPACE}/llama-src" "${DEVWS_DIR}/sources/llama.cpp" - cp -a "${GITHUB_WORKSPACE}/hrx-system-src" "${DEVWS_DIR}/sources/hrx-system" - git -C "${DEVWS_DIR}/sources/llama.cpp" checkout -B hrx-v2 - git -C "${DEVWS_DIR}/sources/hrx-system" checkout -B main + git -C "${GITHUB_WORKSPACE}/llama-src" checkout -B hrx-v2 + git -C "${GITHUB_WORKSPACE}/hrx-system-src" checkout -B main - name: Fetch ROCm nightly tarball run: | @@ -138,9 +135,50 @@ jobs: --action loom \ --action rocm-health \ --action llama-hrx2 \ + --hrx-source "${GITHUB_WORKSPACE}/hrx-system-src" \ + --llama-source "${GITHUB_WORKSPACE}/llama-src" \ --gfx-targets auto \ --jobs "$(nproc)" - name: Verify HRX2 build configuration run: | grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_DIR}/build/llama-hrx2/CMakeCache.txt" + + - name: Run sample MUL_MAT correctness config on HRX2 + run: | + export ROCM_PATH="${DEVWS_DIR}/rocm" + export GGML_HRX_ROCM_PATH="${DEVWS_DIR}/rocm" + export PATH="${DEVWS_DIR}/rocm/bin:${DEVWS_DIR}/rocm/lib/llvm/bin:${PATH}" + export LD_LIBRARY_PATH="${DEVWS_DIR}/build/llama-hrx2/bin:${DEVWS_DIR}/build/hrx-install/lib:${DEVWS_DIR}/build/hrx-install/lib64:${DEVWS_DIR}/rocm/lib:${DEVWS_DIR}/rocm/lib64:${DEVWS_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" + export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_DIR}/rocm/lib/libhsa-runtime64.so" + mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" + "${DEVWS_DIR}/ci/tools/run-op-test.py" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ + --op MUL_MAT \ + --backend HRX20 \ + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-test.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-test.txt" + + - name: Run sample MUL_MAT benchmark config on HRX2 + run: | + export ROCM_PATH="${DEVWS_DIR}/rocm" + export GGML_HRX_ROCM_PATH="${DEVWS_DIR}/rocm" + export PATH="${DEVWS_DIR}/rocm/bin:${DEVWS_DIR}/rocm/lib/llvm/bin:${PATH}" + export LD_LIBRARY_PATH="${DEVWS_DIR}/build/llama-hrx2/bin:${DEVWS_DIR}/build/hrx-install/lib:${DEVWS_DIR}/build/hrx-install/lib64:${DEVWS_DIR}/rocm/lib:${DEVWS_DIR}/rocm/lib64:${DEVWS_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" + export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_DIR}/rocm/lib/libhsa-runtime64.so" + mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" + "${DEVWS_DIR}/ci/tools/run-op-perf.py" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ + --op MUL_MAT \ + --backend HRX20 \ + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-perf.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-perf.txt" + + - name: Upload HRX2 benchmark results + uses: actions/upload-artifact@v5 + with: + name: hrx2-benchmark-results-${{ matrix.name }} + path: benchmark-results/ + if-no-files-found: error From bae804467e2ecd2000f0a91ffa88534b05ca964a Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 16:03:24 +0000 Subject: [PATCH 04/14] ci: stop staging dev workspace --- .github/workflows/build-hrx.yml | 48 +++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index d8d994110d81..9e5723196c42 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -47,7 +47,8 @@ jobs: env: DEVWS_REPOSITORY: AaronStGeorge/llamacpp-devws DEVWS_REF: ci-scripts - DEVWS_DIR: /work/llamacpp-devws + DEVWS_DIR: ${{ github.workspace }}/devws-src + DEVWS_BUILD_DIR: /work/llamacpp-devws-build ROCM_DIR: /work/rocm ROCM_TARBALL_BASE_URL: https://rocm.nightlies.amd.com/tarball-multi-arch ROCM_TARBALL_NAME: ${{ matrix.rocm_tarball }} @@ -100,24 +101,17 @@ jobs: tar \ xz-utils - - name: Stage dev workspace under /work - run: | - rm -rf "${DEVWS_DIR}" - mkdir -p "${DEVWS_DIR}" - cp -a "${GITHUB_WORKSPACE}/devws-src/." "${DEVWS_DIR}/" - git -C "${GITHUB_WORKSPACE}/llama-src" checkout -B hrx-v2 - git -C "${GITHUB_WORKSPACE}/hrx-system-src" checkout -B main - - name: Fetch ROCm nightly tarball run: | - rm -rf "${ROCM_DIR}" /work/rocm-tarball + rm -rf "${DEVWS_BUILD_DIR}" "${ROCM_DIR}" /work/rocm-tarball + mkdir -p "${DEVWS_BUILD_DIR}" mkdir -p "${ROCM_DIR}" /work/rocm-tarball curl -fsSL "${ROCM_TARBALL_BASE_URL}/${ROCM_TARBALL_NAME}" \ -o "/work/rocm-tarball/${ROCM_TARBALL_NAME}" tar -xzf "/work/rocm-tarball/${ROCM_TARBALL_NAME}" -C "${ROCM_DIR}" - ln -sfn "${ROCM_DIR}" "${DEVWS_DIR}/rocm" - test -x "${DEVWS_DIR}/rocm/bin/rocminfo" - test -x "${DEVWS_DIR}/rocm/bin/amdclang++" + ln -sfn "${ROCM_DIR}" "${DEVWS_BUILD_DIR}/rocm" + test -x "${DEVWS_BUILD_DIR}/rocm/bin/rocminfo" + test -x "${DEVWS_BUILD_DIR}/rocm/bin/amdclang++" - name: ccache uses: ggml-org/ccache-action@v1.2.21 @@ -130,6 +124,7 @@ jobs: run: | cd "${DEVWS_DIR}" python3 skills/bootstrap-hrx-llama-builds/scripts/bootstrap_builds.py \ + --workspace "${DEVWS_BUILD_DIR}" \ --action check \ --action hrx \ --action loom \ @@ -137,23 +132,24 @@ jobs: --action llama-hrx2 \ --hrx-source "${GITHUB_WORKSPACE}/hrx-system-src" \ --llama-source "${GITHUB_WORKSPACE}/llama-src" \ + --skip-source-branch-check \ --gfx-targets auto \ --jobs "$(nproc)" - name: Verify HRX2 build configuration run: | - grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_DIR}/build/llama-hrx2/CMakeCache.txt" + grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_BUILD_DIR}/build/llama-hrx2/CMakeCache.txt" - name: Run sample MUL_MAT correctness config on HRX2 run: | - export ROCM_PATH="${DEVWS_DIR}/rocm" - export GGML_HRX_ROCM_PATH="${DEVWS_DIR}/rocm" - export PATH="${DEVWS_DIR}/rocm/bin:${DEVWS_DIR}/rocm/lib/llvm/bin:${PATH}" - export LD_LIBRARY_PATH="${DEVWS_DIR}/build/llama-hrx2/bin:${DEVWS_DIR}/build/hrx-install/lib:${DEVWS_DIR}/build/hrx-install/lib64:${DEVWS_DIR}/rocm/lib:${DEVWS_DIR}/rocm/lib64:${DEVWS_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" - export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_DIR}/rocm/lib/libhsa-runtime64.so" + export ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" + export GGML_HRX_ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" + export PATH="${DEVWS_BUILD_DIR}/rocm/bin:${DEVWS_BUILD_DIR}/rocm/lib/llvm/bin:${PATH}" + export LD_LIBRARY_PATH="${DEVWS_BUILD_DIR}/build/llama-hrx2/bin:${DEVWS_BUILD_DIR}/build/hrx-install/lib:${DEVWS_BUILD_DIR}/build/hrx-install/lib64:${DEVWS_BUILD_DIR}/rocm/lib:${DEVWS_BUILD_DIR}/rocm/lib64:${DEVWS_BUILD_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" + export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_BUILD_DIR}/rocm/lib/libhsa-runtime64.so" mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" "${DEVWS_DIR}/ci/tools/run-op-test.py" \ - --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-backend-ops "${DEVWS_BUILD_DIR}/build/llama-hrx2/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ --backend HRX20 \ @@ -162,14 +158,14 @@ jobs: - name: Run sample MUL_MAT benchmark config on HRX2 run: | - export ROCM_PATH="${DEVWS_DIR}/rocm" - export GGML_HRX_ROCM_PATH="${DEVWS_DIR}/rocm" - export PATH="${DEVWS_DIR}/rocm/bin:${DEVWS_DIR}/rocm/lib/llvm/bin:${PATH}" - export LD_LIBRARY_PATH="${DEVWS_DIR}/build/llama-hrx2/bin:${DEVWS_DIR}/build/hrx-install/lib:${DEVWS_DIR}/build/hrx-install/lib64:${DEVWS_DIR}/rocm/lib:${DEVWS_DIR}/rocm/lib64:${DEVWS_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" - export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_DIR}/rocm/lib/libhsa-runtime64.so" + export ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" + export GGML_HRX_ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" + export PATH="${DEVWS_BUILD_DIR}/rocm/bin:${DEVWS_BUILD_DIR}/rocm/lib/llvm/bin:${PATH}" + export LD_LIBRARY_PATH="${DEVWS_BUILD_DIR}/build/llama-hrx2/bin:${DEVWS_BUILD_DIR}/build/hrx-install/lib:${DEVWS_BUILD_DIR}/build/hrx-install/lib64:${DEVWS_BUILD_DIR}/rocm/lib:${DEVWS_BUILD_DIR}/rocm/lib64:${DEVWS_BUILD_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" + export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_BUILD_DIR}/rocm/lib/libhsa-runtime64.so" mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" "${DEVWS_DIR}/ci/tools/run-op-perf.py" \ - --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-backend-ops "${DEVWS_BUILD_DIR}/build/llama-hrx2/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ --backend HRX20 \ From 02e1ac30c0d5b556baaa5d77aba91579cec7d3d7 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 16:19:38 +0000 Subject: [PATCH 05/14] ci: use ROCm nightly fetch script --- .github/workflows/build-hrx.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 9e5723196c42..5dac076fca1e 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -102,16 +102,7 @@ jobs: xz-utils - name: Fetch ROCm nightly tarball - run: | - rm -rf "${DEVWS_BUILD_DIR}" "${ROCM_DIR}" /work/rocm-tarball - mkdir -p "${DEVWS_BUILD_DIR}" - mkdir -p "${ROCM_DIR}" /work/rocm-tarball - curl -fsSL "${ROCM_TARBALL_BASE_URL}/${ROCM_TARBALL_NAME}" \ - -o "/work/rocm-tarball/${ROCM_TARBALL_NAME}" - tar -xzf "/work/rocm-tarball/${ROCM_TARBALL_NAME}" -C "${ROCM_DIR}" - ln -sfn "${ROCM_DIR}" "${DEVWS_BUILD_DIR}/rocm" - test -x "${DEVWS_BUILD_DIR}/rocm/bin/rocminfo" - test -x "${DEVWS_BUILD_DIR}/rocm/bin/amdclang++" + run: "${DEVWS_DIR}/ci/fetch-rocm-nightly.sh" - name: ccache uses: ggml-org/ccache-action@v1.2.21 From 94097a4dd15511071c1c48e6901d9e7cfa135d99 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 16:59:58 +0000 Subject: [PATCH 06/14] ci: source dev workspace env --- .github/workflows/build-hrx.yml | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 5dac076fca1e..cbc75411241d 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -48,10 +48,10 @@ jobs: DEVWS_REPOSITORY: AaronStGeorge/llamacpp-devws DEVWS_REF: ci-scripts DEVWS_DIR: ${{ github.workspace }}/devws-src - DEVWS_BUILD_DIR: /work/llamacpp-devws-build ROCM_DIR: /work/rocm ROCM_TARBALL_BASE_URL: https://rocm.nightlies.amd.com/tarball-multi-arch ROCM_TARBALL_NAME: ${{ matrix.rocm_tarball }} + LLAMACPP_DEVWS_SKIP_VENV: "1" CCACHE_COMPILERCHECK: content HSA_FORCE_FINE_GRAIN_PCIE: "1" @@ -66,14 +66,14 @@ jobs: - name: Checkout llama.cpp under test uses: actions/checkout@v6 with: - path: llama-src + path: devws-src/sources/llama.cpp - name: Checkout HRX System uses: actions/checkout@v6 with: repository: ROCm/hrx-system ref: main - path: hrx-system-src + path: devws-src/sources/hrx-system - name: Runner identity if: always() @@ -114,33 +114,29 @@ jobs: - name: Build HRX2 run: | cd "${DEVWS_DIR}" + source .envrc python3 skills/bootstrap-hrx-llama-builds/scripts/bootstrap_builds.py \ - --workspace "${DEVWS_BUILD_DIR}" \ + --workspace "${DEVWS_DIR}" \ --action check \ --action hrx \ --action loom \ --action rocm-health \ --action llama-hrx2 \ - --hrx-source "${GITHUB_WORKSPACE}/hrx-system-src" \ - --llama-source "${GITHUB_WORKSPACE}/llama-src" \ --skip-source-branch-check \ --gfx-targets auto \ --jobs "$(nproc)" - name: Verify HRX2 build configuration run: | - grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_BUILD_DIR}/build/llama-hrx2/CMakeCache.txt" + grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_DIR}/build/llama-hrx2/CMakeCache.txt" - name: Run sample MUL_MAT correctness config on HRX2 run: | - export ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" - export GGML_HRX_ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" - export PATH="${DEVWS_BUILD_DIR}/rocm/bin:${DEVWS_BUILD_DIR}/rocm/lib/llvm/bin:${PATH}" - export LD_LIBRARY_PATH="${DEVWS_BUILD_DIR}/build/llama-hrx2/bin:${DEVWS_BUILD_DIR}/build/hrx-install/lib:${DEVWS_BUILD_DIR}/build/hrx-install/lib64:${DEVWS_BUILD_DIR}/rocm/lib:${DEVWS_BUILD_DIR}/rocm/lib64:${DEVWS_BUILD_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" - export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_BUILD_DIR}/rocm/lib/libhsa-runtime64.so" + cd "${DEVWS_DIR}" + source .envrc mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" "${DEVWS_DIR}/ci/tools/run-op-test.py" \ - --test-backend-ops "${DEVWS_BUILD_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ --backend HRX20 \ @@ -149,14 +145,11 @@ jobs: - name: Run sample MUL_MAT benchmark config on HRX2 run: | - export ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" - export GGML_HRX_ROCM_PATH="${DEVWS_BUILD_DIR}/rocm" - export PATH="${DEVWS_BUILD_DIR}/rocm/bin:${DEVWS_BUILD_DIR}/rocm/lib/llvm/bin:${PATH}" - export LD_LIBRARY_PATH="${DEVWS_BUILD_DIR}/build/llama-hrx2/bin:${DEVWS_BUILD_DIR}/build/hrx-install/lib:${DEVWS_BUILD_DIR}/build/hrx-install/lib64:${DEVWS_BUILD_DIR}/rocm/lib:${DEVWS_BUILD_DIR}/rocm/lib64:${DEVWS_BUILD_DIR}/rocm/lib/rocm_sysdeps/lib:${LD_LIBRARY_PATH:-}" - export IREE_HAL_AMDGPU_LIBHSA_PATH="${DEVWS_BUILD_DIR}/rocm/lib/libhsa-runtime64.so" + cd "${DEVWS_DIR}" + source .envrc mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" "${DEVWS_DIR}/ci/tools/run-op-perf.py" \ - --test-backend-ops "${DEVWS_BUILD_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ --backend HRX20 \ From 2cd6d1ffb4c28db722526beef399ca6a418f1d26 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 21:35:22 +0000 Subject: [PATCH 07/14] ci: use HRX for sample op gates --- .github/workflows/build-hrx.yml | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index cbc75411241d..7b4634b3c44e 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -111,7 +111,7 @@ jobs: evict-old-files: 1d save: ${{ github.event_name == 'pull_request' }} - - name: Build HRX2 + - name: Build HRX run: | cd "${DEVWS_DIR}" source .envrc @@ -121,44 +121,40 @@ jobs: --action hrx \ --action loom \ --action rocm-health \ - --action llama-hrx2 \ + --action llama-hrx \ --skip-source-branch-check \ --gfx-targets auto \ --jobs "$(nproc)" - - name: Verify HRX2 build configuration - run: | - grep -F 'GGML_HRX2:BOOL=ON' "${DEVWS_DIR}/build/llama-hrx2/CMakeCache.txt" - - - name: Run sample MUL_MAT correctness config on HRX2 + - name: Run sample MUL_MAT correctness config on HRX run: | cd "${DEVWS_DIR}" source .envrc mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" "${DEVWS_DIR}/ci/tools/run-op-test.py" \ - --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ - --backend HRX20 \ - --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-test.jsonl" \ - --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-test.txt" + --backend HRX0 \ + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-test.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-test.txt" - - name: Run sample MUL_MAT benchmark config on HRX2 + - name: Run sample MUL_MAT benchmark config on HRX run: | cd "${DEVWS_DIR}" source .envrc mkdir -p "${GITHUB_WORKSPACE}/benchmark-results" "${DEVWS_DIR}/ci/tools/run-op-perf.py" \ - --test-backend-ops "${DEVWS_DIR}/build/llama-hrx2/bin/test-backend-ops" \ + --test-backend-ops "${DEVWS_DIR}/build/llama-hrx/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ - --backend HRX20 \ - --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-perf.jsonl" \ - --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx2-perf.txt" + --backend HRX0 \ + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-perf.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-perf.txt" - - name: Upload HRX2 benchmark results + - name: Upload HRX benchmark results uses: actions/upload-artifact@v5 with: - name: hrx2-benchmark-results-${{ matrix.name }} + name: hrx-benchmark-results-${{ matrix.name }} path: benchmark-results/ if-no-files-found: error From af5a85acb2e4e653ecc677f8a3c346cabf7be9eb Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 21:36:08 +0000 Subject: [PATCH 08/14] cleanup --- .github/workflows/build-hrx.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 7b4634b3c44e..a483885976c1 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -76,7 +76,6 @@ jobs: path: devws-src/sources/hrx-system - name: Runner identity - if: always() env: MATRIX_NAME: ${{ matrix.name }} MATRIX_GPU_TARGET: ${{ matrix.gpu_target }} From ee9f539bb0c042506be6daa8d74d0aea8424cf93 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 22:52:56 +0000 Subject: [PATCH 09/14] Re-add comment --- .github/workflows/build-hrx.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index a483885976c1..540d65db8c83 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -40,6 +40,15 @@ jobs: shell: bash --noprofile --norc -exo pipefail {0} container: image: ghcr.io/rocm/no_rocm_image_ubuntu24_04@sha256:fba5f55a122dbb15925e98c51fe65bffe88c36e11ebb25b73daf2bea04202dc3 + # --user 0:0: actions running inside the container need to write to some + # files set up outside the container by the runner agent. In + # June 2026 some runner agents set those files up with + # permissions for a "tester" user with UID/GID 1001, which + # matches the user in no_rocm_image_ubuntu24_04, and some are + # set up as root; accessing a file owned by root with user + # "tester" gives an EACCES. Running as root is the common + # denominator. + # --device kfd/dri: GPU access. options: >- --user 0:0 --device /dev/kfd From ac9a459a8cee32d663adef6ffb975f0f701466fc Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Thu, 18 Jun 2026 23:04:24 +0000 Subject: [PATCH 10/14] ci: run sample op gates on CPU --- .github/workflows/build-hrx.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index 540d65db8c83..c2bbb95f9565 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -134,7 +134,7 @@ jobs: --gfx-targets auto \ --jobs "$(nproc)" - - name: Run sample MUL_MAT correctness config on HRX + - name: Run sample MUL_MAT correctness config on CPU run: | cd "${DEVWS_DIR}" source .envrc @@ -143,11 +143,11 @@ jobs: --test-backend-ops "${DEVWS_DIR}/build/llama-hrx/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ - --backend HRX0 \ - --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-test.jsonl" \ - --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-test.txt" + --backend CPU \ + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-test.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-test.txt" - - name: Run sample MUL_MAT benchmark config on HRX + - name: Run sample MUL_MAT benchmark config on CPU run: | cd "${DEVWS_DIR}" source .envrc @@ -156,9 +156,9 @@ jobs: --test-backend-ops "${DEVWS_DIR}/build/llama-hrx/bin/test-backend-ops" \ --test-file "${DEVWS_DIR}/ci/benchmark-configs/test/mul_mat_f16.txt" \ --op MUL_MAT \ - --backend HRX0 \ - --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-perf.jsonl" \ - --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-hrx-perf.txt" + --backend CPU \ + --output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl" \ + --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.txt" - name: Upload HRX benchmark results uses: actions/upload-artifact@v5 From 2f0c456628882cbf2d528da4a2165255cb3316d2 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Mon, 22 Jun 2026 14:23:27 +0000 Subject: [PATCH 11/14] Compare perf with previous run --- .github/workflows/build-hrx.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index c2bbb95f9565..c8a630b150e5 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -8,7 +8,6 @@ on: types: [opened, synchronize, reopened] branches: - hrx-v2 - workflow_dispatch: permissions: contents: read @@ -166,3 +165,22 @@ jobs: name: hrx-benchmark-results-${{ matrix.name }} path: benchmark-results/ if-no-files-found: error + + # TODO: switch to github.event.pull_request.base.sha, github.event.before is for testing + - name: Download previous HRX benchmark results + env: + GH_TOKEN: ${{ github.token }} + PARENT_SHA: ${{ github.event.before || github.event.pull_request.base.sha }} + run: | + python3 "${DEVWS_DIR}/ci/tools/download-parent-artifact.py" \ + "hrx-benchmark-results-${{ matrix.name }}" \ + "${GITHUB_WORKSPACE}/benchmark-results/previous" + + - name: Compare HRX benchmark results + run: | + python3 "${DEVWS_DIR}/ci/tools/compare-op-perf.py" \ + "${GITHUB_WORKSPACE}/benchmark-results/previous/sample-mul-mat-f16-cpu-perf.jsonl" \ + "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl" \ + --max-regression-pct 5 \ + --require-all-cases \ + --skip-missing-baseline From 1a677e7ed87b03cfe8de72bc363204fffe2083ab Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Mon, 22 Jun 2026 14:49:20 +0000 Subject: [PATCH 12/14] Disable 9070 for now --- .github/workflows/build-hrx.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index c8a630b150e5..e8004cf38134 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -28,10 +28,11 @@ jobs: gpu_target: gfx1151 runner: linux-gfx1151-gpu-rocm rocm_tarball: therock-dist-linux-gfx1151-7.14.0a20260617.tar.gz - - name: gfx1201_9070 - gpu_target: gfx1201 - runner: linux-gfx120X-gpu-rocm - rocm_tarball: therock-dist-linux-gfx120X-all-7.14.0a20260617.tar.gz + # Disable gfx1201_9070 build for now, runner is unstable. + # - name: gfx1201_9070 + # gpu_target: gfx1201 + # runner: linux-gfx120X-gpu-rocm + # rocm_tarball: therock-dist-linux-gfx120X-all-7.14.0a20260617.tar.gz runs-on: ${{ matrix.runner }} timeout-minutes: 120 defaults: From 37ba2ecbee06467d0ea292ad29f4e421e32b759c Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Mon, 22 Jun 2026 15:30:04 +0000 Subject: [PATCH 13/14] ci: trigger benchmark comparison From f64f4d04432d387247ef003575b9a5c532bd1235 Mon Sep 17 00:00:00 2001 From: AaronStGeorge Date: Mon, 22 Jun 2026 15:59:32 +0000 Subject: [PATCH 14/14] Small de-noising pass on op level benchmarks. --- .github/workflows/build-hrx.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-hrx.yml b/.github/workflows/build-hrx.yml index e8004cf38134..cf1e1fa29a58 100644 --- a/.github/workflows/build-hrx.yml +++ b/.github/workflows/build-hrx.yml @@ -148,6 +148,8 @@ jobs: --raw-output "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-test.txt" - name: Run sample MUL_MAT benchmark config on CPU + env: + GGML_TEST_BACKEND_OPS_PERF_MIN_US: "2000000" run: | cd "${DEVWS_DIR}" source .envrc @@ -183,5 +185,6 @@ jobs: "${GITHUB_WORKSPACE}/benchmark-results/previous/sample-mul-mat-f16-cpu-perf.jsonl" \ "${GITHUB_WORKSPACE}/benchmark-results/sample-mul-mat-f16-cpu-perf.jsonl" \ --max-regression-pct 5 \ + --min-regression-us 5 \ --require-all-cases \ --skip-missing-baseline