From 0091489424cb6792ffed83418934438d2195f65b Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Thu, 11 Jun 2026 17:08:21 -0230 Subject: [PATCH 01/23] ci: shard race detector unit tests across runners The three unit-race jobs each ran the full test suite on a single runner, taking 60-67 minutes and sitting on the critical path of every CI run. Add a unit-race-parallel make target backed by a new scripts/unit_race_part.sh, which splits the package list round-robin into a configurable number of tranches. Known-heavy packages are listed first so they land in different tranches. Each tranche runs on its own runner: the workflow matrix becomes three flavors (plain, test_db_sqlite, test_db_postgres) times four tranches, bringing the expected per-job time down to roughly a quarter of the previous duration. --- .github/workflows/main.yml | 62 ++++++++++++++++++++-- Makefile | 7 +++ scripts/unit_race_part.sh | 103 +++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 3 deletions(-) create mode 100755 scripts/unit_race_part.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 85553fca2a6..4da96992714 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,6 +43,11 @@ env: # `make lint` to see where else it needs to be updated as well). GO_VERSION: 1.26.4 + # NUM_UNIT_RACE_TRANCHES defines the number of tranches the race detector + # unit tests are split into, each tranche running on its own runner. Must + # match the tranche list in the unit-race matrix. + NUM_UNIT_RACE_TRANCHES: 4 + jobs: static-checks: name: Static Checks @@ -212,9 +217,6 @@ jobs: - unit tags="kvdb_sqlite" - unit tags="test_db_sqlite" - unit tags="test_db_postgres" - - unit-race - - unit-race tags="test_db_sqlite" - - unit-race tags="test_db_postgres" - unit-module steps: @@ -266,6 +268,60 @@ jobs: parallel: true + ######################## + # run unit tests in race detector mode + ######################## + unit-race: + name: Unit race (${{ matrix.flavor.name }}, ${{ matrix.tranche }}) + runs-on: ubuntu-latest + strategy: + # Allow other tests in the matrix to continue if one fails. + fail-fast: false + matrix: + # This list must contain NUM_UNIT_RACE_TRANCHES entries. + tranche: [0, 1, 2, 3] + flavor: + - name: unit-race + args: '' + - name: unit-race-sqlite + args: tags="test_db_sqlite" + - name: unit-race-postgres + args: tags="test_db_postgres" + + steps: + - name: Git checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Clean up runner space + uses: ./.github/actions/cleanup-space + + - name: Fetch and rebase on ${{ github.base_ref }} + if: github.event_name == 'pull_request' + uses: ./.github/actions/rebase + + - name: Git checkout fuzzing seeds + uses: actions/checkout@v5 + with: + repository: lightninglabs/lnd-fuzz + path: lnd-fuzz + + - name: Rsync fuzzing seeds + run: rsync -a --ignore-existing lnd-fuzz/ ./ + + - name: Setup go ${{ env.GO_VERSION }} + uses: ./.github/actions/setup-go + with: + go-version: '${{ env.GO_VERSION }}' + key-prefix: unit-test + + - name: Install bitcoind + run: ./scripts/install_bitcoind.sh $BITCOIN_VERSION + + - name: Run ${{ matrix.flavor.name }} tranche ${{ matrix.tranche }} + run: make unit-race-parallel tranche=${{ matrix.tranche }} tranches=${{ env.NUM_UNIT_RACE_TRANCHES }} ${{ matrix.flavor.args }} + ######################## # run integration tests with TRANCHES ######################## diff --git a/Makefile b/Makefile index 91213847e84..87efdd0b788 100644 --- a/Makefile +++ b/Makefile @@ -300,6 +300,13 @@ unit-race: $(BTCD_BIN) @$(call print, "Running unit race tests.") env CGO_ENABLED=1 GORACE="history_size=7 halt_on_errors=1" $(UNIT_RACE) +#? unit-race-parallel: Run one tranche of the unit tests in race detector mode (tranche= tranches=) +unit-race-parallel: $(BTCD_BIN) + @$(call print, "Running unit race tests tranche ${tranche} of ${tranches}.") + PKG="$(PKG)" DEV_TAGS="$(DEV_TAGS)" \ + scripts/unit_race_part.sh $(tranche) $(tranches) \ + -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS)" $(TEST_FLAGS) + #? unit-bench: Run benchmark tests unit-bench: $(BTCD_BIN) @$(call print, "Running benchmark tests.") diff --git a/scripts/unit_race_part.sh b/scripts/unit_race_part.sh new file mode 100755 index 00000000000..19db401a909 --- /dev/null +++ b/scripts/unit_race_part.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +# unit_race_part.sh runs one tranche of the unit tests in race detector +# mode. The full package list is split round-robin into num_tranches +# tranches, allowing the race detector tests to be spread across +# multiple machines. + +set -euo pipefail + +TRANCHE=${1:-} +NUM_TRANCHES=${2:-} + +if [[ -z "${TRANCHE}" || -z "${NUM_TRANCHES}" ]]; then + echo "Usage: $0 [go test flags...]" >&2 + exit 1 +fi + +if ! [[ "${TRANCHE}" =~ ^[0-9]+$ && "${NUM_TRANCHES}" =~ ^[0-9]+$ ]]; then + echo "tranche and num_tranches must be non-negative integers" >&2 + exit 1 +fi + +if (( NUM_TRANCHES <= 0 )); then + echo "num_tranches must be greater than 0" >&2 + exit 1 +fi + +if (( TRANCHE < 0 || TRANCHE >= NUM_TRANCHES )); then + echo "tranche must be in range [0, num_tranches)" >&2 + exit 1 +fi + +shift 2 + +PKG_PREFIX=${PKG:-github.com/lightningnetwork/lnd} +DEV_TAGS=${DEV_TAGS:-dev} + +# Heavy packages listed first so the round-robin split distributes them +# across different tranches. Ordered by approximate descending test +# duration. Update periodically if the profile shifts. +HEAVY_PKGS=( + "${PKG_PREFIX}/lnwallet" + "${PKG_PREFIX}/htlcswitch" + "${PKG_PREFIX}/chainntnfs" + "${PKG_PREFIX}/channeldb" + "${PKG_PREFIX}/contractcourt" + "${PKG_PREFIX}/routing" + "${PKG_PREFIX}/graph/db" + "${PKG_PREFIX}/invoices" + "${PKG_PREFIX}/watchtower/wtclient" + "${PKG_PREFIX}/peer" +) + +all_pkgs=() +while IFS= read -r pkg; do + all_pkgs+=("${pkg}") +done < <(go list -tags="${DEV_TAGS}" -deps "${PKG_PREFIX}/..." | \ + grep "${PKG_PREFIX}" | grep -v "/vendor/") + +# Only treat heavy packages that actually appear in the package list as +# heavy, so a stale entry above cannot select a nonexistent package. +heavy=() +for pkg in "${HEAVY_PKGS[@]}"; do + if printf '%s\n' "${all_pkgs[@]}" | grep -qxF "${pkg}"; then + heavy+=("${pkg}") + fi +done + +remaining=() +while IFS= read -r pkg; do + remaining+=("${pkg}") +done < <(printf '%s\n' "${all_pkgs[@]}" | \ + grep -vxF "$(printf '%s\n' "${heavy[@]}")") + +ordered=("${heavy[@]}" "${remaining[@]}") + +selected=() +for i in "${!ordered[@]}"; do + if (( (i % NUM_TRANCHES) == TRANCHE )); then + selected+=("${ordered[$i]}") + fi +done + +if (( ${#selected[@]} == 0 )); then + echo "No packages assigned to tranche ${TRANCHE} of ${NUM_TRANCHES}" >&2 + exit 0 +fi + +exit_code=0 +for pkg in "${selected[@]}"; do + echo "Running race unit tests for ${pkg}" + if ! env CGO_ENABLED=1 GORACE="history_size=7 halt_on_errors=1" \ + go test -race "$@" "${pkg}"; then + exit_code=1 + fi +done + +if (( exit_code != 0 )); then + echo "One or more packages failed in tranche ${TRANCHE} of" \ + "${NUM_TRANCHES}" >&2 +fi + +exit ${exit_code} From 76c50d0528d788260baa8c343bf8750aebc327d5 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Thu, 11 Jun 2026 17:10:04 -0230 Subject: [PATCH 02/23] makefile+scripts: support running an offset subset of itest tranches Add a trancheoffset make variable and a new itest-only-parallel target that runs tranches without re-building the itest binaries. Together with the existing tranches and parallel variables, this allows a single itest configuration to be spread over multiple machines: each machine runs ITEST_PARALLELISM tranches starting at its own offset, against the same total tranche count and shuffle seed. itest-parallel is unchanged in behavior and is now expressed as build-itest followed by itest-only-parallel. --- Makefile | 9 ++++++--- make/testing_flags.mk | 7 +++++++ scripts/itest_parallel.sh | 6 +++++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 87efdd0b788..8c636084c1c 100644 --- a/Makefile +++ b/Makefile @@ -263,13 +263,16 @@ itest: build-itest itest-only #? itest-race: Build and run integration tests in race detector mode itest-race: build-itest-race itest-only -#? itest-parallel: Build and run integration tests in parallel mode, running up to ITEST_PARALLELISM test tranches in parallel (default 4) -itest-parallel: clean-itest-logs build-itest db-instance +#? itest-only-parallel: Run integration tests in parallel mode without re-building binaries, running up to ITEST_PARALLELISM test tranches in parallel starting at the tranche given by trancheoffset (default 0) +itest-only-parallel: clean-itest-logs db-instance @$(call print, "Running tests") date - EXEC_SUFFIX=$(EXEC_SUFFIX) scripts/itest_parallel.sh $(ITEST_PARALLELISM) $(NUM_ITEST_TRANCHES) $(SHUFFLE_SEED) $(TEST_FLAGS) $(ITEST_FLAGS) + EXEC_SUFFIX=$(EXEC_SUFFIX) ITEST_TRANCHE_OFFSET=$(ITEST_TRANCHE_OFFSET) scripts/itest_parallel.sh $(ITEST_PARALLELISM) $(NUM_ITEST_TRANCHES) $(SHUFFLE_SEED) $(TEST_FLAGS) $(ITEST_FLAGS) $(COLLECT_ITEST_COVERAGE) +#? itest-parallel: Build and run integration tests in parallel mode, running up to ITEST_PARALLELISM test tranches in parallel (default 4) +itest-parallel: build-itest itest-only-parallel + #? itest-clean: Kill all running itest processes itest-clean: @$(call print, "Cleaning old itest processes") diff --git a/make/testing_flags.mk b/make/testing_flags.mk index 4f7bd42d568..27256c9c746 100644 --- a/make/testing_flags.mk +++ b/make/testing_flags.mk @@ -10,6 +10,7 @@ COVER_PKG = $$($(GOCC) list -deps -tags="$(DEV_TAGS)" ./... | grep '$(PKG)') COVER_FLAGS = -coverprofile=coverage.txt -covermode=atomic -coverpkg=$(PKG)/... NUM_ITEST_TRANCHES = 4 ITEST_PARALLELISM = $(NUM_ITEST_TRANCHES) +ITEST_TRANCHE_OFFSET = 0 POSTGRES_START_DELAY = 5 SHUFFLE_SEED = 0 @@ -29,6 +30,12 @@ ifneq ($(parallel),) ITEST_PARALLELISM = $(parallel) endif +# Run only a subset of the total tranches, starting at this offset. This +# allows a single itest run to be spread over multiple machines. +ifneq ($(trancheoffset),) +ITEST_TRANCHE_OFFSET = $(trancheoffset) +endif + # Set the seed for shuffling the test cases. ifneq ($(shuffleseed),) SHUFFLE_SEED = $(shuffleseed) diff --git a/scripts/itest_parallel.sh b/scripts/itest_parallel.sh index ab1b3efa282..e430c745cf5 100755 --- a/scripts/itest_parallel.sh +++ b/scripts/itest_parallel.sh @@ -8,12 +8,16 @@ SHUFFLE_SEED=$3 # Here we also shift 3 times and get the rest of our flags to pass on in $@. shift 3 +# The tranche offset allows running only a subset of the total tranches, +# spreading a single itest run over multiple machines. +OFFSET=${ITEST_TRANCHE_OFFSET:-0} + # Create a variable to hold the final exit code. exit_code=0 # Run commands using xargs in parallel and capture their PIDs pids=() -for ((i=0; i Date: Thu, 11 Jun 2026 19:07:47 -0230 Subject: [PATCH 03/23] ci: split postgres itests across runners with prebuilt binaries The three postgres itest configurations were the longest jobs in the workflow at 65-81 minutes each: every job built the itest binaries from scratch and then ran all 8 tranches in parallel on a single 4-core runner. Following the approach used in lightninglabs/taproot-assets, build the itest binaries once per distinct build-tag set (plain postgres, and postgres with test_native_sql) in a dedicated job that uploads them as artifacts. The test jobs download the binaries and each runs only SPLIT_ITEST_PARALLELISM (2) of the SMALL_TRANCHES (8) tranches, spreading every postgres config over 4 runners. The shuffle seed is fixed per configuration rather than per job so that all tranche groups of a config agree on the test partition. --- .github/workflows/main.yml | 145 +++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4da96992714..c202b96d0b2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,6 +39,12 @@ env: # TODO(yy): remove this value and use TRANCHES. SMALL_TRANCHES: 8 + # SPLIT_ITEST_PARALLELISM defines the number of tranches each runner job + # of a tranche-split itest configuration runs in parallel. Split configs + # run SMALL_TRANCHES tranches in total, spread over + # SMALL_TRANCHES / SPLIT_ITEST_PARALLELISM runner jobs. + SPLIT_ITEST_PARALLELISM: 2 + # If you change this please also update GO_VERSION in Makefile (then run # `make lint` to see where else it needs to be updated as well). GO_VERSION: 1.26.4 @@ -427,12 +433,6 @@ jobs: args: backend=bitcoind dbbackend=sqlite nativesql=true - name: bitcoind-sqlite=nativesql-experiment args: backend=bitcoind dbbackend=sqlite nativesql=true tags=test_native_sql - - name: bitcoind-postgres - args: backend=bitcoind dbbackend=postgres - - name: bitcoind-postgres-nativesql - args: backend=bitcoind dbbackend=postgres nativesql=true - - name: bitcoind-postgres-nativesql-experiment - args: backend=bitcoind dbbackend=postgres nativesql=true tags=test_native_sql steps: - name: Git checkout uses: actions/checkout@v5 @@ -497,6 +497,139 @@ jobs: retention-days: 5 + ######################## + # build binaries for tranche-split integration tests + ######################## + build-split-itest: + name: Build split itest binaries + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - name: bitcoind-postgres + args: backend=bitcoind dbbackend=postgres + - name: bitcoind-postgres-experiment + args: backend=bitcoind dbbackend=postgres tags=test_native_sql + steps: + - name: Git checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Check for no-itest label + id: check-label + uses: ./.github/actions/check-label + with: + label: 'no-itest' + skip-message: "Tests auto-passed due to 'no-itest' label" + + - name: Clean up runner space + if: steps.check-label.outputs.skip != 'true' + uses: ./.github/actions/cleanup-space + + - name: Fetch and rebase on ${{ github.base_ref }} + if: github.event_name == 'pull_request' && steps.check-label.outputs.skip != 'true' + uses: ./.github/actions/rebase + + - name: Setup go ${{ env.GO_VERSION }} + if: steps.check-label.outputs.skip != 'true' + uses: ./.github/actions/setup-go + with: + go-version: '${{ env.GO_VERSION }}' + key-prefix: integration-test + + - name: Build itest binaries for ${{ matrix.name }} + if: steps.check-label.outputs.skip != 'true' + run: make build-itest ${{ matrix.args }} + + - name: Upload itest binaries + if: steps.check-label.outputs.skip != 'true' + uses: actions/upload-artifact@v4 + with: + name: itest-binaries-${{ matrix.name }} + path: | + itest/itest.test + itest/lnd-itest + itest/btcd-itest + retention-days: 1 + + ######################## + # run tranche-split integration tests + ######################## + split-integration-test: + name: Split itests (${{ matrix.config.name }}, ${{ matrix.tranche_group }}) + runs-on: ubuntu-latest + needs: build-split-itest + strategy: + # Allow other tests in the matrix to continue if one fails. + fail-fast: false + matrix: + # Each tranche group runs SPLIT_ITEST_PARALLELISM tranches, so + # this list must contain SMALL_TRANCHES / SPLIT_ITEST_PARALLELISM + # entries. + tranche_group: [0, 1, 2, 3] + config: + - name: bitcoind-postgres + binaries: bitcoind-postgres + seed: 1 + args: backend=bitcoind dbbackend=postgres + - name: bitcoind-postgres-nativesql + binaries: bitcoind-postgres + seed: 2 + args: backend=bitcoind dbbackend=postgres nativesql=true + - name: bitcoind-postgres-nativesql-experiment + binaries: bitcoind-postgres-experiment + seed: 3 + args: backend=bitcoind dbbackend=postgres nativesql=true tags=test_native_sql + steps: + - name: Git checkout + uses: actions/checkout@v5 + + - name: Check for no-itest label + id: check-label + uses: ./.github/actions/check-label + with: + label: 'no-itest' + skip-message: "Tests auto-passed due to 'no-itest' label" + + - name: Download itest binaries + if: steps.check-label.outputs.skip != 'true' + uses: actions/download-artifact@v4 + with: + name: itest-binaries-${{ matrix.config.binaries }} + path: itest + + - name: Make itest binaries executable + if: steps.check-label.outputs.skip != 'true' + run: chmod +x itest/itest.test itest/lnd-itest itest/btcd-itest + + - name: Install bitcoind + if: steps.check-label.outputs.skip != 'true' + run: ./scripts/install_bitcoind.sh $BITCOIN_VERSION + + - name: Run ${{ matrix.config.name }} tranche group ${{ matrix.tranche_group }} + if: steps.check-label.outputs.skip != 'true' + run: | + OFFSET=$((${{ matrix.tranche_group }} * ${{ env.SPLIT_ITEST_PARALLELISM }})) + make itest-only-parallel tranches=${{ env.SMALL_TRANCHES }} \ + parallel=${{ env.SPLIT_ITEST_PARALLELISM }} trancheoffset=$OFFSET \ + ${{ matrix.config.args }} \ + shuffleseed=${{ github.run_id }}${{ matrix.config.seed }} + + - name: Zip log files on failure + if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} + timeout-minutes: 5 # timeout after 5 minute + run: 7z a logs-itest-${{ matrix.config.name }}-${{ matrix.tranche_group }}.zip itest/**/*.log + + - name: Upload log files on failure + uses: actions/upload-artifact@v4 + if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} + with: + name: logs-itest-${{ matrix.config.name }}-${{ matrix.tranche_group }} + path: logs-itest-${{ matrix.config.name }}-${{ matrix.tranche_group }}.zip + retention-days: 5 + ######################## # run windows integration test ######################## From e7cf8dafe430a917ecd43d59c4c3ab1310bfa34b Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Thu, 11 Jun 2026 19:27:09 -0230 Subject: [PATCH 04/23] scripts: fail unit_race_part.sh loudly if go list fails Reading the package list through process substitution meant a go list failure was invisible to set -e: every tranche would select zero packages and exit zero, letting a broken tree pass the race jobs vacuously. Capture the list via command substitution instead, which propagates the pipeline failure, and reject an empty result. --- scripts/unit_race_part.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/unit_race_part.sh b/scripts/unit_race_part.sh index 19db401a909..37de5c4a391 100755 --- a/scripts/unit_race_part.sh +++ b/scripts/unit_race_part.sh @@ -51,11 +51,21 @@ HEAVY_PKGS=( "${PKG_PREFIX}/peer" ) +# Capture the package list via command substitution so a go list +# failure aborts the script (set -e -o pipefail) instead of silently +# yielding an empty list and a vacuously green test run. +pkg_list=$(go list -tags="${DEV_TAGS}" -deps "${PKG_PREFIX}/..." | \ + grep "${PKG_PREFIX}" | grep -v "/vendor/") + all_pkgs=() while IFS= read -r pkg; do all_pkgs+=("${pkg}") -done < <(go list -tags="${DEV_TAGS}" -deps "${PKG_PREFIX}/..." | \ - grep "${PKG_PREFIX}" | grep -v "/vendor/") +done <<< "${pkg_list}" + +if (( ${#all_pkgs[@]} == 0 )); then + echo "go list produced no packages" >&2 + exit 1 +fi # Only treat heavy packages that actually appear in the package list as # heavy, so a stale entry above cannot select a nonexistent package. From cf2ff047d47bfa13836a72b6bc2c2e8575777d12 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 07:59:32 -0230 Subject: [PATCH 05/23] ci: run split itests with finer tranches Measurement of the first split run (33-48 min per job at 8 tranches, 2 per runner) showed per-tranche wall time dominates: the itests are wait-bound rather than CPU-bound, so larger tranches set a high floor regardless of runner load. The basic configs already run 16 tranches on a single runner in 17-25 minutes total. Move the split configs to 16 tranches with 4 per runner job. The job count is unchanged; per-runner load stays below the old single runner setup (4 tranches and a dedicated postgres instance versus 8 tranches sharing one), and expected per-job time drops to roughly 15-20 minutes. --- .github/workflows/main.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c202b96d0b2..ac9dc1887b4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,11 +39,14 @@ env: # TODO(yy): remove this value and use TRANCHES. SMALL_TRANCHES: 8 + # SPLIT_TRANCHES defines the total number of tranches used by the + # tranche-split itest configurations, spread over + # SPLIT_TRANCHES / SPLIT_ITEST_PARALLELISM runner jobs. + SPLIT_TRANCHES: 16 + # SPLIT_ITEST_PARALLELISM defines the number of tranches each runner job - # of a tranche-split itest configuration runs in parallel. Split configs - # run SMALL_TRANCHES tranches in total, spread over - # SMALL_TRANCHES / SPLIT_ITEST_PARALLELISM runner jobs. - SPLIT_ITEST_PARALLELISM: 2 + # of a tranche-split itest configuration runs in parallel. + SPLIT_ITEST_PARALLELISM: 4 # If you change this please also update GO_VERSION in Makefile (then run # `make lint` to see where else it needs to be updated as well). @@ -566,7 +569,7 @@ jobs: fail-fast: false matrix: # Each tranche group runs SPLIT_ITEST_PARALLELISM tranches, so - # this list must contain SMALL_TRANCHES / SPLIT_ITEST_PARALLELISM + # this list must contain SPLIT_TRANCHES / SPLIT_ITEST_PARALLELISM # entries. tranche_group: [0, 1, 2, 3] config: @@ -612,7 +615,7 @@ jobs: if: steps.check-label.outputs.skip != 'true' run: | OFFSET=$((${{ matrix.tranche_group }} * ${{ env.SPLIT_ITEST_PARALLELISM }})) - make itest-only-parallel tranches=${{ env.SMALL_TRANCHES }} \ + make itest-only-parallel tranches=${{ env.SPLIT_TRANCHES }} \ parallel=${{ env.SPLIT_ITEST_PARALLELISM }} trancheoffset=$OFFSET \ ${{ matrix.config.args }} \ shuffleseed=${{ github.run_id }}${{ matrix.config.seed }} From 57619bc9a5939f2a3b3b9b7319c615594a8118b6 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 08:17:35 -0230 Subject: [PATCH 06/23] ci: run DB-variant unit and race tests on DB packages only The kvdb_* and test_db_* build tags work by swapping the backend used by kvdb.GetTestBackend, channeldb.MakeTestDB and the sqldb test constructors. Only the packages that construct test stores through those helpers behave differently under the tags; the remaining ~160 packages of the suite run byte-for-byte identical tests in every variant. Each DB-variant job nonetheless ran the full suite, costing 27-35 minutes per unit job and four runners per race flavor. Teach the pkg make variable to accept a list of packages, plumb an optional package filter through unit_race_part.sh, and run the variant jobs only over the DB-touching package closure (defined as DB_UNIT_PKGS in the workflow). The plain unit and race runs still cover the full suite. The DB race flavors drop from four tranches to two, removing four runner jobs. --- .github/workflows/main.yml | 69 ++++++++++++++++++++++++++++---------- Makefile | 2 +- make/testing_flags.mk | 8 ++--- scripts/unit_race_part.sh | 26 +++++++++----- 4 files changed, 74 insertions(+), 31 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac9dc1887b4..ec0e0612944 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -52,10 +52,18 @@ env: # `make lint` to see where else it needs to be updated as well). GO_VERSION: 1.26.4 - # NUM_UNIT_RACE_TRANCHES defines the number of tranches the race detector - # unit tests are split into, each tranche running on its own runner. Must - # match the tranche list in the unit-race matrix. - NUM_UNIT_RACE_TRANCHES: 4 + # DB_UNIT_PKGS is the list of packages whose unit tests exercise the + # configurable database backends, i.e. the packages whose tests behave + # differently under the kvdb_* and test_db_* build tags (they construct + # test stores via kvdb.GetTestBackend, channeldb.MakeTestDB, the sqldb + # test constructors or their wrappers). The DB-variant unit and race + # jobs run only these packages; all other packages run identically in + # the plain unit jobs. + DB_UNIT_PKGS: >- + autopilot batch chainparams channeldb cluster contractcourt graph + graph/db graph/db/migration1 invoices kvdb kvdb/etcd kvdb/postgres + kvdb/sqlbase kvdb/sqlite payments/db payments/db/migration1 routing + sqldb sweep jobs: static-checks: @@ -221,11 +229,11 @@ jobs: matrix: unit_type: - unit-cover - - unit tags="kvdb_etcd" - - unit tags="kvdb_postgres" - - unit tags="kvdb_sqlite" - - unit tags="test_db_sqlite" - - unit tags="test_db_postgres" + - unit tags="kvdb_etcd" pkg="$DB_UNIT_PKGS" + - unit tags="kvdb_postgres" pkg="$DB_UNIT_PKGS" + - unit tags="kvdb_sqlite" pkg="$DB_UNIT_PKGS" + - unit tags="test_db_sqlite" pkg="$DB_UNIT_PKGS" + - unit tags="test_db_postgres" pkg="$DB_UNIT_PKGS" - unit-module steps: @@ -281,21 +289,48 @@ jobs: # run unit tests in race detector mode ######################## unit-race: - name: Unit race (${{ matrix.flavor.name }}, ${{ matrix.tranche }}) + name: Unit race (${{ matrix.name }}, ${{ matrix.tranche }}) runs-on: ubuntu-latest strategy: # Allow other tests in the matrix to continue if one fails. fail-fast: false matrix: - # This list must contain NUM_UNIT_RACE_TRANCHES entries. - tranche: [0, 1, 2, 3] - flavor: + # The plain flavor runs the full suite split into four tranches. + # The DB flavors only run the DB-touching packages (see + # DB_UNIT_PKGS), split into two tranches each. + include: + - name: unit-race + args: '' + tranche: 0 + tranches: 4 + - name: unit-race + args: '' + tranche: 1 + tranches: 4 - name: unit-race args: '' + tranche: 2 + tranches: 4 + - name: unit-race + args: '' + tranche: 3 + tranches: 4 - name: unit-race-sqlite - args: tags="test_db_sqlite" + args: tags="test_db_sqlite" pkg="$DB_UNIT_PKGS" + tranche: 0 + tranches: 2 + - name: unit-race-sqlite + args: tags="test_db_sqlite" pkg="$DB_UNIT_PKGS" + tranche: 1 + tranches: 2 + - name: unit-race-postgres + args: tags="test_db_postgres" pkg="$DB_UNIT_PKGS" + tranche: 0 + tranches: 2 - name: unit-race-postgres - args: tags="test_db_postgres" + args: tags="test_db_postgres" pkg="$DB_UNIT_PKGS" + tranche: 1 + tranches: 2 steps: - name: Git checkout @@ -328,8 +363,8 @@ jobs: - name: Install bitcoind run: ./scripts/install_bitcoind.sh $BITCOIN_VERSION - - name: Run ${{ matrix.flavor.name }} tranche ${{ matrix.tranche }} - run: make unit-race-parallel tranche=${{ matrix.tranche }} tranches=${{ env.NUM_UNIT_RACE_TRANCHES }} ${{ matrix.flavor.args }} + - name: Run ${{ matrix.name }} tranche ${{ matrix.tranche }} + run: make unit-race-parallel tranche=${{ matrix.tranche }} tranches=${{ matrix.tranches }} ${{ matrix.args }} ######################## # run integration tests with TRANCHES diff --git a/Makefile b/Makefile index 8c636084c1c..fe1c55c7edd 100644 --- a/Makefile +++ b/Makefile @@ -306,7 +306,7 @@ unit-race: $(BTCD_BIN) #? unit-race-parallel: Run one tranche of the unit tests in race detector mode (tranche= tranches=) unit-race-parallel: $(BTCD_BIN) @$(call print, "Running unit race tests tranche ${tranche} of ${tranches}.") - PKG="$(PKG)" DEV_TAGS="$(DEV_TAGS)" \ + PKG="$(PKG)" DEV_TAGS="$(DEV_TAGS)" UNIT_RACE_PKGS="$(pkg)" \ scripts/unit_race_part.sh $(tranche) $(tranches) \ -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS)" $(TEST_FLAGS) diff --git a/make/testing_flags.mk b/make/testing_flags.mk index 27256c9c746..da510c282d1 100644 --- a/make/testing_flags.mk +++ b/make/testing_flags.mk @@ -52,12 +52,12 @@ ifneq ($(windows),) EXEC_SUFFIX = .exe endif -# If specific package is being unit tested, construct the full name of the -# subpackage. +# If specific packages are being unit tested, construct the full names of +# the subpackages. ifneq ($(pkg),) -UNITPKG := $(PKG)/$(pkg) +UNITPKG := $(addprefix $(PKG)/,$(pkg)) UNIT_TARGETED = yes -COVER_PKG = $(PKG)/$(pkg) +COVER_PKG = $(addprefix $(PKG)/,$(pkg)) endif # If a specific unit test case is being target, construct test.run filter. diff --git a/scripts/unit_race_part.sh b/scripts/unit_race_part.sh index 37de5c4a391..b73dcfa4b50 100755 --- a/scripts/unit_race_part.sh +++ b/scripts/unit_race_part.sh @@ -51,16 +51,24 @@ HEAVY_PKGS=( "${PKG_PREFIX}/peer" ) -# Capture the package list via command substitution so a go list -# failure aborts the script (set -e -o pipefail) instead of silently -# yielding an empty list and a vacuously green test run. -pkg_list=$(go list -tags="${DEV_TAGS}" -deps "${PKG_PREFIX}/..." | \ - grep "${PKG_PREFIX}" | grep -v "/vendor/") - all_pkgs=() -while IFS= read -r pkg; do - all_pkgs+=("${pkg}") -done <<< "${pkg_list}" +if [[ -n "${UNIT_RACE_PKGS:-}" ]]; then + # An explicit package list (relative to PKG) restricts the run, e.g. + # to the database-touching packages for the DB-variant race jobs. + for pkg in ${UNIT_RACE_PKGS}; do + all_pkgs+=("${PKG_PREFIX}/${pkg}") + done +else + # Capture the package list via command substitution so a go list + # failure aborts the script (set -e -o pipefail) instead of silently + # yielding an empty list and a vacuously green test run. + pkg_list=$(go list -tags="${DEV_TAGS}" -deps "${PKG_PREFIX}/..." | \ + grep "${PKG_PREFIX}" | grep -v "/vendor/") + + while IFS= read -r pkg; do + all_pkgs+=("${pkg}") + done <<< "${pkg_list}" +fi if (( ${#all_pkgs[@]} == 0 )); then echo "go list produced no packages" >&2 From 3de948791898500f3c2d5dd454605ea58fad2c78 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 08:18:06 -0230 Subject: [PATCH 07/23] ci: fold dep-pin and milestone checks into static checks Both jobs cost a full runner slot and per-job setup to run a grep on go.mod and a 20-line shell script respectively. With runner-pool queueing being the dominant contributor to CI wall time, runner slots matter more than step duration. Run both as steps of the static checks job instead, dropping three jobs per run. --- .github/workflows/main.yml | 78 ++++++++++++++------------------------ 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ec0e0612944..d0a325be68a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -129,6 +129,35 @@ jobs: - name: Check mobile specific code run: go build --tags="mobile" ./mobile + ######################## + # check pinned dependencies + ######################## + - name: Check pinned dependencies + run: | + while read -r dep; do + if ! grep -q "$dep" go.mod; then + echo "dependency $dep should not be altered" + exit 1 + fi + done < Date: Fri, 12 Jun 2026 08:38:15 -0230 Subject: [PATCH 08/23] ci: remove the auto cache cleanup job The job deleted all caches not accessed within 12 hours on every pull request from a write-access author. At current activity levels the caches produced within any 12-hour window already exceed the 10GB repository quota (currently 11GB across 15 entries), so the job only ever deletes entries that GitHub's LRU eviction would discard anyway. It also has a harmful failure mode: after any quiet 12-hour stretch the next PR wipes every cache, including the hot master branch ones, forcing a fleet-wide cold build. LRU eviction never does that, since master caches are kept fresh by restore-keys hits from PR jobs. Dropping the job also lets the workflow's actions: write permission fall back to read-only. --- .github/workflows/main.yml | 42 -------------------------------------- 1 file changed, 42 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d0a325be68a..d0948d179b2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,8 +12,6 @@ on: - "master" permissions: - # Required to manage and delete caches. - actions: write # Default permission for checking out code. contents: read @@ -820,46 +818,6 @@ jobs: path: scripts/bw-compatibility-test/logs/ retention-days: 7 - ######################################### - # Auto Cache Cleanup on Pull Requests - ######################################### - auto-cleanup-cache: - name: Cache Cleanup - runs-on: ubuntu-latest - - # This condition checks for pull requests from authors with write access. - if: >- - contains('OWNER, MEMBER, COLLABORATOR', github.event.pull_request.author_association) - - steps: - - name: Checkout repository - uses: actions/checkout@v5 - - - name: Delete caches older than 12 hours - continue-on-error: true - env: - # GITHUB_TOKEN is required for the gh CLI. - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - echo "Finding caches not used in the last 12 hours..." - - # Get the current time and the cutoff time (12 hours ago) in Unix - # timestamp format. - cutoff_timestamp=$(date -d "12 hours ago" +%s) - - # Use gh and jq to parse caches. Delete any cache last accessed - # before the cutoff time. - gh cache list --json key,lastAccessedAt | jq -r '.[] | - select(.lastAccessedAt != null) | "\(.lastAccessedAt) \(.key)"' | - while read -r last_accessed_at key; do - last_accessed_timestamp=$(date -d "$last_accessed_at" +%s) - - if (( last_accessed_timestamp < cutoff_timestamp )); then - echo "Deleting old cache. Key: $key, Last Used: $last_accessed_at" - gh cache delete "$key" - fi - done - # Notify about the completion of all coverage collecting jobs. finish: name: Send coverage report From c537e58155cb171923eadcbd0099c04f36ec8fc1 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 08:42:17 -0230 Subject: [PATCH 09/23] ci: drop the runner space cleanup step from the main workflow The cleanup-space action dates from when hosted runners shipped with roughly 14GB of free disk. Current ubuntu runners have 145GB disks with 88GB free before cleanup (measured in run 27379687191), far more than any job here uses, so the step only adds about two minutes of latency to the start of every job that carries it - roughly 60 runner-minutes per workflow run across its nine usages. The release workflow keeps its usage since it runs rarely and off the PR path. --- .github/workflows/main.yml | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d0948d179b2..051c7f201c6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -74,9 +74,6 @@ jobs: # Needed for some checks. fetch-depth: 0 - - name: Clean up runner space - uses: ./.github/actions/cleanup-space - - name: Setup Go ${{ env.GO_VERSION }} uses: ./.github/actions/setup-go with: @@ -169,9 +166,6 @@ jobs: with: fetch-depth: 0 - - name: Clean up runner space - uses: ./.github/actions/cleanup-space - - name: setup go ${{ env.GO_VERSION }} uses: ./.github/actions/setup-go with: @@ -197,9 +191,6 @@ jobs: with: fetch-depth: 0 - - name: Clean up runner space - uses: ./.github/actions/cleanup-space - - name: setup go ${{ env.GO_VERSION }} uses: ./.github/actions/setup-go with: @@ -231,9 +222,6 @@ jobs: - name: Git checkout uses: actions/checkout@v5 - - name: Clean up runner space - uses: ./.github/actions/cleanup-space - - name: Setup go ${{ env.GO_VERSION }} uses: ./.github/actions/setup-go with: @@ -269,9 +257,6 @@ jobs: with: fetch-depth: 0 - - name: Clean up runner space - uses: ./.github/actions/cleanup-space - - name: Fetch and rebase on ${{ github.base_ref }} if: github.event_name == 'pull_request' uses: ./.github/actions/rebase @@ -365,9 +350,6 @@ jobs: with: fetch-depth: 0 - - name: Clean up runner space - uses: ./.github/actions/cleanup-space - - name: Fetch and rebase on ${{ github.base_ref }} if: github.event_name == 'pull_request' uses: ./.github/actions/rebase @@ -427,10 +409,6 @@ jobs: label: 'no-itest' skip-message: "Tests auto-passed due to 'no-itest' label" - - name: Clean up runner space - if: steps.check-label.outputs.skip != 'true' - uses: ./.github/actions/cleanup-space - - name: Fetch and rebase on ${{ github.base_ref }} if: github.event_name == 'pull_request' && steps.check-label.outputs.skip != 'true' uses: ./.github/actions/rebase @@ -511,10 +489,6 @@ jobs: label: 'no-itest' skip-message: "Tests auto-passed due to 'no-itest' label" - - name: Clean up runner space - if: steps.check-label.outputs.skip != 'true' - uses: ./.github/actions/cleanup-space - - name: Fetch and rebase on ${{ github.base_ref }} if: github.event_name == 'pull_request' && steps.check-label.outputs.skip != 'true' uses: ./.github/actions/rebase @@ -589,10 +563,6 @@ jobs: label: 'no-itest' skip-message: "Tests auto-passed due to 'no-itest' label" - - name: Clean up runner space - if: steps.check-label.outputs.skip != 'true' - uses: ./.github/actions/cleanup-space - - name: Fetch and rebase on ${{ github.base_ref }} if: github.event_name == 'pull_request' && steps.check-label.outputs.skip != 'true' uses: ./.github/actions/rebase From 2cb75f670f5783f70358c52345ca5916ff5c3bd8 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 09:03:13 -0230 Subject: [PATCH 10/23] ci: trim low-value itest configurations Remove six itest configurations from per-PR CI: - bitcoind-sqlite-nativesql and bitcoind-postgres-nativesql: the test_native_sql build tag only splices in the dev migrations from sqldb/migrations_dev.go, which is empty by design now that all migrations live in the main line. The -experiment flavors are therefore identical today and remain a strict superset whenever a dev migration is in flight, so the plain nativesql flavors add no coverage. - bitcoind-miner: the miner is the lntest harness's block generator; lnd never connects to it, so this config varies no production code. Breakage in lntest's bitcoind miner support is a test-framework concern, not a per-PR lnd one. - bitcoind-notxindex and bitcoind-rpcpolling: each differs from the main bitcoind run only in a thin chain-access adapter (the txindex-less lookup fallback and the RPC polling client), both rarely touched and partially unit-covered, while the rest of the run is a duplicate. - bitcoind-etcd: niche clustered deployment shape; the etcd storage layer itself is still unit-tested on every PR via the targeted kvdb_etcd job. This removes nine runner jobs and roughly 230 runner-minutes per run, which translates directly to less queueing under the org-level concurrent-job cap. --- .github/workflows/main.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 051c7f201c6..68d8b0677f7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -390,10 +390,6 @@ jobs: args: backend=btcd cover=1 - name: bitcoind args: backend=bitcoind cover=1 - - name: bitcoind-miner - args: backend=bitcoind minerbackend=bitcoind cover=1 - - name: bitcoind-notxindex - args: backend="bitcoind notxindex" - name: neutrino args: backend=neutrino cover=1 steps: @@ -466,14 +462,8 @@ jobs: fail-fast: false matrix: include: - - name: bitcoind-rpcpolling - args: backend="bitcoind rpcpolling" - - name: bitcoind-etcd - args: backend=bitcoind dbbackend=etcd - name: bitcoind-sqlite args: backend=bitcoind dbbackend=sqlite - - name: bitcoind-sqlite-nativesql - args: backend=bitcoind dbbackend=sqlite nativesql=true - name: bitcoind-sqlite=nativesql-experiment args: backend=bitcoind dbbackend=sqlite nativesql=true tags=test_native_sql steps: @@ -609,10 +599,6 @@ jobs: binaries: bitcoind-postgres seed: 1 args: backend=bitcoind dbbackend=postgres - - name: bitcoind-postgres-nativesql - binaries: bitcoind-postgres - seed: 2 - args: backend=bitcoind dbbackend=postgres nativesql=true - name: bitcoind-postgres-nativesql-experiment binaries: bitcoind-postgres-experiment seed: 3 From b08dc19fe1f5f6f34c8116d52fe4ca3478404090 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 09:13:19 -0230 Subject: [PATCH 11/23] chainntnfs: cover no-txindex fallbacks in the interface tests The bitcoind interface test variants both ran with a transaction index available, so the notifier's manual historical lookup paths (confDetailsManually, historicalSpendDetails) were only exercised incidentally, via script-dispatch requests and tx-not-found races. End-to-end coverage of the no-txindex deployment shape came from the bitcoind-notxindex itest configuration, which was removed from per-PR CI. Add a bitcoind-no-txindex variant to the interface suite. It runs against the same registered bitcoind driver but with txindex disabled on the backend, forcing every historical confirmation and spend lookup through the manual scan fallbacks. This covers the differing surface of the removed itest configuration more systematically and at far lower cost, since it runs inside the existing unit test jobs. --- chainntnfs/test/bitcoind/bitcoind_test.go | 14 +++++++++- chainntnfs/test/test_interface.go | 33 +++++++++++++++-------- 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/chainntnfs/test/bitcoind/bitcoind_test.go b/chainntnfs/test/bitcoind/bitcoind_test.go index b5a2e6626c7..6c27e1a6566 100644 --- a/chainntnfs/test/bitcoind/bitcoind_test.go +++ b/chainntnfs/test/bitcoind/bitcoind_test.go @@ -21,8 +21,20 @@ func TestInterfaces(t *testing.T) { return } - t.Run("bitcoind rpc polling", func(st *testing.T) { + success = t.Run("bitcoind rpc polling", func(st *testing.T) { st.Parallel() chainntnfstest.TestInterfaces(st, "bitcoind-rpc-polling") }) + + if !success { + return + } + + // Run the suite against a bitcoind backend without a transaction + // index, which forces the notifier through its manual historical + // confirmation and spend lookup fallbacks. + t.Run("bitcoind no txindex", func(st *testing.T) { + st.Parallel() + chainntnfstest.TestInterfaces(st, "bitcoind-no-txindex") + }) } diff --git a/chainntnfs/test/test_interface.go b/chainntnfs/test/test_interface.go index 7536d24c503..3dfd5b06801 100644 --- a/chainntnfs/test/test_interface.go +++ b/chainntnfs/test/test_interface.go @@ -1902,9 +1902,20 @@ func TestInterfaces(t *testing.T, targetBackEnd string) { log.Printf("Running %v ChainNotifier interface tests", 2*len(txNtfnTests)+len(blockNtfnTests)+len(blockCatchupTests)) + // The bitcoind-no-txindex variant runs against the same registered + // bitcoind driver as the plain bitcoind variant, but with the + // backend's transaction index disabled. This exercises the manual + // historical confirmation/spend lookup fallbacks in the notifier. + driverType := targetBackEnd + txindex := true + if targetBackEnd == "bitcoind-no-txindex" { + driverType = "bitcoind" + txindex = false + } + for _, notifierDriver := range chainntnfs.RegisteredNotifiers() { notifierType := notifierDriver.NotifierType - if notifierType != targetBackEnd { + if notifierType != driverType { continue } @@ -1928,11 +1939,11 @@ func TestInterfaces(t *testing.T, targetBackEnd string) { newNotifier func() (chainntnfs.TestChainNotifier, error) ) - switch notifierType { - case "bitcoind": + switch targetBackEnd { + case "bitcoind", "bitcoind-no-txindex": var bitcoindConn *chain.BitcoindConn bitcoindConn = unittest.NewBitcoindBackend( - t, unittest.NetParams, miner, true, false, + t, unittest.NetParams, miner, txindex, false, ) newNotifier = func() (chainntnfs.TestChainNotifier, error) { return bitcoindnotify.New( @@ -1976,21 +1987,21 @@ func TestInterfaces(t *testing.T, targetBackEnd string) { } log.Printf("Running ChainNotifier interface tests for: %v", - notifierType) + targetBackEnd) notifier, err := newNotifier() if err != nil { t.Fatalf("unable to create %v notifier: %v", - notifierType, err) + targetBackEnd, err) } if err := notifier.Start(); err != nil { t.Fatalf("unable to start notifier %v: %v", - notifierType, err) + targetBackEnd, err) } for _, txNtfnTest := range txNtfnTests { for _, scriptDispatch := range []bool{false, true} { - testName := fmt.Sprintf("%v %v", notifierType, + testName := fmt.Sprintf("%v %v", targetBackEnd, txNtfnTest.name) if scriptDispatch { testName += " with script dispatch" @@ -2008,7 +2019,7 @@ func TestInterfaces(t *testing.T, targetBackEnd string) { } for _, blockNtfnTest := range blockNtfnTests { - testName := fmt.Sprintf("%v %v", notifierType, + testName := fmt.Sprintf("%v %v", targetBackEnd, blockNtfnTest.name) success := t.Run(testName, func(t *testing.T) { blockNtfnTest.test(miner, notifier, t) @@ -2026,10 +2037,10 @@ func TestInterfaces(t *testing.T, targetBackEnd string) { notifier, err = newNotifier() if err != nil { t.Fatalf("unable to create %v notifier: %v", - notifierType, err) + targetBackEnd, err) } - testName := fmt.Sprintf("%v %v", notifierType, + testName := fmt.Sprintf("%v %v", targetBackEnd, blockCatchupTest.name) success := t.Run(testName, func(t *testing.T) { From aa7ba7a82c5c2ce9fe0b157e2b8b323c4ee42732 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 09:13:57 -0230 Subject: [PATCH 12/23] ci: run bitcoind-miner itests only when lntest changes The bitcoind-miner itest configuration was removed from per-PR CI because it varies no production lnd code: the miner is the lntest harness's block generator. Its real purpose is validating lntest's bitcoind miner support, which downstream projects consume. Restore that coverage with a dedicated path-filtered workflow that runs the configuration only when lntest itself (or the workflow) changes, on PRs and on pushes to master. This keeps the validation where it is relevant without spending a runner slot on every push. Note that the new check must not be marked required in branch protection, since it does not run on most PRs. --- .github/workflows/lntest.yml | 79 ++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 .github/workflows/lntest.yml diff --git a/.github/workflows/lntest.yml b/.github/workflows/lntest.yml new file mode 100644 index 00000000000..00547639050 --- /dev/null +++ b/.github/workflows/lntest.yml @@ -0,0 +1,79 @@ +name: lntest + +# The bitcoind-miner itest configuration exercises the lntest +# harness's bitcoind miner support rather than any production lnd +# code, so it only runs when the harness itself (or this workflow) +# changes. +on: + push: + branches: + - "master" + paths: + - "lntest/**" + pull_request: + branches: + - "*" + paths: + - "lntest/**" + - ".github/workflows/lntest.yml" + +permissions: + contents: read + +concurrency: + group: lntest-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +defaults: + run: + shell: bash + +env: + # Accepts either a major image tag like "30" or a patch tag like "29.1". + BITCOIN_VERSION: "31" + + # TRANCHES defines the number of tranches used in the itests. + TRANCHES: 16 + + # If you change this please also update GO_VERSION in Makefile (then run + # `make lint` to see where else it needs to be updated as well). + GO_VERSION: 1.26.4 + +jobs: + bitcoind-miner-itest: + name: Run bitcoind-miner itests + runs-on: ubuntu-latest + steps: + - name: Git checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Fetch and rebase on ${{ github.base_ref }} + if: github.event_name == 'pull_request' + uses: ./.github/actions/rebase + + - name: Setup go ${{ env.GO_VERSION }} + uses: ./.github/actions/setup-go + with: + go-version: '${{ env.GO_VERSION }}' + key-prefix: integration-test + + - name: Install bitcoind + run: ./scripts/install_bitcoind.sh $BITCOIN_VERSION + + - name: Run bitcoind-miner itests + run: make itest-parallel tranches=${{ env.TRANCHES }} backend=bitcoind minerbackend=bitcoind shuffleseed=${{ github.run_id }} + + - name: Zip log files on failure + if: ${{ failure() }} + timeout-minutes: 5 # timeout after 5 minute + run: 7z a logs-itest-bitcoind-miner.zip itest/**/*.log + + - name: Upload log files on failure + uses: actions/upload-artifact@v4 + if: ${{ failure() }} + with: + name: logs-itest-bitcoind-miner + path: logs-itest-bitcoind-miner.zip + retention-days: 5 From e7efbb906f6222cff226515bda9515ac6886a23d Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 10:38:19 -0230 Subject: [PATCH 13/23] makefile: serialize test binaries for multi-package unit runs The targeted DB-variant unit jobs run their package list through a single go test invocation, which executes package test binaries in parallel. The untargeted path serializes packages via xargs -L 1, and some test fixtures depend on that: the kvdb postgres fixture starts an embedded postgres bound to the fixed port 9876, so under the kvdb_postgres tag concurrent package binaries race for the port (seen as 'process already listening on port 9876' failures in CI). Pass -p 1 to go test when more than one package is targeted, restoring the serial execution the suite has always assumed. Single-package invocations are unaffected and keep parallel builds. --- make/testing_flags.mk | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/make/testing_flags.mk b/make/testing_flags.mk index da510c282d1..86691a91561 100644 --- a/make/testing_flags.mk +++ b/make/testing_flags.mk @@ -58,6 +58,15 @@ ifneq ($(pkg),) UNITPKG := $(addprefix $(PKG)/,$(pkg)) UNIT_TARGETED = yes COVER_PKG = $(addprefix $(PKG)/,$(pkg)) + +# When more than one package is targeted, run their test binaries +# serially. The untargeted path serializes packages via xargs -L 1, and +# some test fixtures bind fixed ports (e.g. the kvdb postgres fixture's +# embedded postgres on port 9876), so package binaries collide when go +# test runs them in parallel. +ifneq ($(word 2,$(pkg)),) +UNIT_PKG_FLAGS = -p 1 +endif endif # If a specific unit test case is being target, construct test.run filter. @@ -148,9 +157,9 @@ UNIT_TARGETED ?= no # If a specific package/test case was requested, run the unit test for the # targeted case. Otherwise, default to running all tests. ifeq ($(UNIT_TARGETED), yes) -UNIT := $(GOTEST) -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS)" $(TEST_FLAGS) $(UNITPKG) +UNIT := $(GOTEST) $(UNIT_PKG_FLAGS) -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS)" $(TEST_FLAGS) $(UNITPKG) UNIT_DEBUG := $(GOTEST) -v -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS)" $(TEST_FLAGS) $(UNITPKG) -UNIT_RACE := $(GOTEST) -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS) lowscrypt" $(TEST_FLAGS) -race $(UNITPKG) +UNIT_RACE := $(GOTEST) $(UNIT_PKG_FLAGS) -tags="$(DEV_TAGS) $(RPC_TAGS) $(LOG_TAGS) lowscrypt" $(TEST_FLAGS) -race $(UNITPKG) # NONE is a special value which selects no other tests but only executes the benchmark tests here. UNIT_BENCH := $(GOTEST) -tags="$(DEV_TAGS) $(LOG_TAGS)" -test.bench=. -test.run=NONE $(UNITPKG) endif From 5bdfde36b1b7e716ef76536c06e5d5481c50362a Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 11:10:35 -0230 Subject: [PATCH 14/23] ci: cross-compile windows itest binaries on linux The windows itest job spent roughly a quarter of its 43 minutes compiling the itest binaries on the (much slower) windows runner. All itest binaries are built with CGO_ENABLED=0, so they cross-compile cleanly: build them with GOOS=windows in the existing itest binary build job and have the windows job download the artifact and run itest-only-parallel instead. This also drops the Go toolchain setup and cache download from the windows runner entirely. --- .github/workflows/main.yml | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 68d8b0677f7..d7a90693491 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -527,10 +527,10 @@ jobs: ######################## - # build binaries for tranche-split integration tests + # build binaries for itest jobs that run prebuilt binaries ######################## build-split-itest: - name: Build split itest binaries + name: Build itest binaries runs-on: ubuntu-latest strategy: fail-fast: false @@ -540,6 +540,13 @@ jobs: args: backend=bitcoind dbbackend=postgres - name: bitcoind-postgres-experiment args: backend=bitcoind dbbackend=postgres tags=test_native_sql + # The windows binaries are cross-compiled here since building + # them on the (much slower) windows runner dominated that + # job's running time. All itest binaries are built with + # CGO_ENABLED=0, so cross-compilation is safe. + - name: windows + args: windows=1 + goos: windows steps: - name: Git checkout uses: actions/checkout@v5 @@ -566,6 +573,8 @@ jobs: - name: Build itest binaries for ${{ matrix.name }} if: steps.check-label.outputs.skip != 'true' + env: + GOOS: ${{ matrix.goos }} run: make build-itest ${{ matrix.args }} - name: Upload itest binaries @@ -574,9 +583,9 @@ jobs: with: name: itest-binaries-${{ matrix.name }} path: | - itest/itest.test - itest/lnd-itest - itest/btcd-itest + itest/itest.test* + itest/lnd-itest* + itest/btcd-itest* retention-days: 1 ######################## @@ -657,11 +666,10 @@ jobs: windows-integration-test: name: Run windows itest runs-on: windows-latest + needs: build-split-itest steps: - name: Git checkout uses: actions/checkout@v5 - with: - fetch-depth: 0 - name: Check for no-itest label id: check-label @@ -670,20 +678,16 @@ jobs: label: 'no-itest' skip-message: "Tests auto-passed due to 'no-itest' label" - - name: Fetch and rebase on ${{ github.base_ref }} - if: github.event_name == 'pull_request' && steps.check-label.outputs.skip != 'true' - uses: ./.github/actions/rebase - - - name: Setup go ${{ env.GO_VERSION }} + - name: Download itest binaries if: steps.check-label.outputs.skip != 'true' - uses: ./.github/actions/setup-go + uses: actions/download-artifact@v4 with: - go-version: '${{ env.GO_VERSION }}' - key-prefix: integration-test + name: itest-binaries-windows + path: itest - name: Run itest if: steps.check-label.outputs.skip != 'true' - run: make itest-parallel tranches=${{ env.SMALL_TRANCHES }} windows=1 shuffleseed=${{ github.run_id }} + run: make itest-only-parallel tranches=${{ env.SMALL_TRANCHES }} windows=1 shuffleseed=${{ github.run_id }} - name: Kill any remaining lnd processes if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} From 845bbd98795f6f9d7e540d409db0bf69b3b89a3d Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 11:10:35 -0230 Subject: [PATCH 15/23] ci: run sqlite itests with 16 tranches The two sqlite itest configurations were the slowest single-runner jobs left at 31-32 minutes, still running SMALL_TRANCHES (8). The basic configurations demonstrate that a single runner handles 16 tranches in about 20 minutes, since the itests are wait-bound rather than CPU-bound and benefit from finer-grained tranches. Switch the job to TRANCHES (16), leaving SMALL_TRANCHES to the windows and macOS jobs. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d7a90693491..7907a9c347b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -496,7 +496,7 @@ jobs: - name: Run ${{ matrix.name }} if: steps.check-label.outputs.skip != 'true' - run: make itest-parallel tranches=${{ env.SMALL_TRANCHES }} ${{ matrix.args }} shuffleseed=${{ github.run_id }}${{ strategy.job-index }} + run: make itest-parallel tranches=${{ env.TRANCHES }} ${{ matrix.args }} shuffleseed=${{ github.run_id }}${{ strategy.job-index }} - name: Clean coverage run: grep -Ev '(\.pb\.go|\.pb\.json\.go|\.pb\.gw\.go)' coverage.txt > coverage-norpc.txt From cde980d158c9e3282111de0e2249c3b6326fb780 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 11:10:35 -0230 Subject: [PATCH 16/23] scripts: rebalance unit race tranche assignment Two CI runs of data show the tranche holding both channeldb and invoices is consistently the slowest at 24-28 minutes against 12-17 for its siblings, with channeldb the heaviest package by a wide margin. Reorder HEAVY_PKGS so that under the four-tranche CI split channeldb shares a tranche only with light packages and invoices moves next to chainntnfs. The partition remains disjoint and covering (verified across all four tranches). --- scripts/unit_race_part.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/unit_race_part.sh b/scripts/unit_race_part.sh index b73dcfa4b50..3757267efe1 100755 --- a/scripts/unit_race_part.sh +++ b/scripts/unit_race_part.sh @@ -36,19 +36,21 @@ PKG_PREFIX=${PKG:-github.com/lightningnetwork/lnd} DEV_TAGS=${DEV_TAGS:-dev} # Heavy packages listed first so the round-robin split distributes them -# across different tranches. Ordered by approximate descending test -# duration. Update periodically if the profile shifts. +# across different tranches. The order is chosen so that the heaviest +# packages (by measured CI duration; channeldb leads by a wide margin) +# do not share a tranche under the four-tranche split used in CI. +# Update periodically if the profile shifts. HEAVY_PKGS=( + "${PKG_PREFIX}/channeldb" "${PKG_PREFIX}/lnwallet" "${PKG_PREFIX}/htlcswitch" - "${PKG_PREFIX}/chainntnfs" - "${PKG_PREFIX}/channeldb" + "${PKG_PREFIX}/invoices" + "${PKG_PREFIX}/graph/db" "${PKG_PREFIX}/contractcourt" "${PKG_PREFIX}/routing" - "${PKG_PREFIX}/graph/db" - "${PKG_PREFIX}/invoices" - "${PKG_PREFIX}/watchtower/wtclient" + "${PKG_PREFIX}/chainntnfs" "${PKG_PREFIX}/peer" + "${PKG_PREFIX}/watchtower/wtclient" ) all_pkgs=() From 92ee23a25e08852ab8fd7ce3695f79b986fa0965 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 12:17:02 -0230 Subject: [PATCH 17/23] ci: build representative cross-compile subset (experimental) The three cross-compile jobs built all 18 release targets on every push, costing roughly 45 runner-minutes per run. Platform-specific compile errors come in a small number of classes (OS family, word size, ARM), so build one job with one target per class instead. The full target list is still built by the release workflow. Experimental: revert this commit to restore the full per-PR matrix. --- .github/workflows/main.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7907a9c347b..55457c261aa 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -207,17 +207,6 @@ jobs: cross-compile: name: Cross compilation runs-on: ubuntu-latest - strategy: - fail-fast: true - matrix: - # Please keep this list in sync with make/release_flags.mk! - include: - - name: i386 - sys: freebsd-386 linux-386 windows-386 - - name: amd64 - sys: darwin-amd64 freebsd-amd64 linux-amd64 netbsd-amd64 openbsd-amd64 windows-amd64 - - name: arm - sys: darwin-arm64 freebsd-arm linux-armv6 linux-armv7 linux-arm64 windows-arm64 steps: - name: Git checkout uses: actions/checkout@v5 @@ -229,8 +218,13 @@ jobs: key-prefix: cross-compile use-build-cache: 'no' - - name: Build release for all architectures - run: make release sys="${{ matrix.sys }}" + # Build a representative subset of the release targets: one per + # OS family plus a 32-bit and an ARM target, which together cover + # the distinct classes of platform-specific compile errors. The + # full target list from make/release_flags.mk is still built by + # the release workflow. + - name: Build release for representative architectures + run: make release sys="linux-386 linux-amd64 linux-armv6 darwin-arm64 freebsd-amd64 windows-amd64" ######################## # run unit tests From 151901b0c201dc71075eadafcc8cb9201269796b Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 12:17:27 -0230 Subject: [PATCH 18/23] ci: drop the DB-flavor race jobs (experimental) The plain unit-race tranches already race-test every package on the default backend. The sqlite and postgres race flavors existed to catch races in backend-specific concurrency handling, but that surface is exercised for correctness by the non-race test_db_* unit variants on every PR, and the four flavor jobs cost roughly 50 runner-minutes per run. Experimental: revert this commit to restore the DB race flavors. --- .github/workflows/main.yml | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 55457c261aa..c54892f2657 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,9 +54,9 @@ env: # configurable database backends, i.e. the packages whose tests behave # differently under the kvdb_* and test_db_* build tags (they construct # test stores via kvdb.GetTestBackend, channeldb.MakeTestDB, the sqldb - # test constructors or their wrappers). The DB-variant unit and race - # jobs run only these packages; all other packages run identically in - # the plain unit jobs. + # test constructors or their wrappers). The DB-variant unit jobs run + # only these packages; all other packages run identically in the + # plain unit jobs. DB_UNIT_PKGS: >- autopilot batch chainparams channeldb cluster contractcourt graph graph/db graph/db/migration1 invoices kvdb kvdb/etcd kvdb/postgres @@ -301,9 +301,8 @@ jobs: # Allow other tests in the matrix to continue if one fails. fail-fast: false matrix: - # The plain flavor runs the full suite split into four tranches. - # The DB flavors only run the DB-touching packages (see - # DB_UNIT_PKGS), split into two tranches each. + # The full suite is split into four tranches, each running on + # its own runner. include: - name: unit-race args: '' @@ -321,22 +320,6 @@ jobs: args: '' tranche: 3 tranches: 4 - - name: unit-race-sqlite - args: tags="test_db_sqlite" pkg="$DB_UNIT_PKGS" - tranche: 0 - tranches: 2 - - name: unit-race-sqlite - args: tags="test_db_sqlite" pkg="$DB_UNIT_PKGS" - tranche: 1 - tranches: 2 - - name: unit-race-postgres - args: tags="test_db_postgres" pkg="$DB_UNIT_PKGS" - tranche: 0 - tranches: 2 - - name: unit-race-postgres - args: tags="test_db_postgres" pkg="$DB_UNIT_PKGS" - tranche: 1 - tranches: 2 steps: - name: Git checkout From 482e24b50f0c0866d32df854d447e614eff3e6a2 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 12:17:52 -0230 Subject: [PATCH 19/23] ci: drop the btcd basic itest configuration (experimental) The btcd configuration differs from the bitcoind one only in the chain-access layer (BtcdNotifier and the rpcclient paths), which the chainntnfs interface test suite exercises against a real btcd node in the unit jobs on every PR. btcd itself also runs as the miner in every other itest configuration. This is the same reasoning that removed the rpcpolling and notxindex configurations. Experimental: revert this commit to restore the configuration. --- .github/workflows/main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c54892f2657..b5a2f0031c5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -363,8 +363,6 @@ jobs: fail-fast: false matrix: include: - - name: btcd - args: backend=btcd cover=1 - name: bitcoind args: backend=bitcoind cover=1 - name: neutrino From 0be05d670d6706a233690d7be2e43a1a1f61c60c Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 12:19:02 -0230 Subject: [PATCH 20/23] ci: reduce the SQL itest matrix to its diagonal (experimental) The SQL itests formed a 2x2 product of engine (sqlite, postgres) and store mode (kv shim, native SQL experiment). Two cells cover all the main effects: postgres-native-experiment, where active native-SQL development and the hardest concurrency semantics live, and sqlite-shim, which covers the sqlite engine and the (stable, engine-generic) shim layer. The dropped interactions keep unit-level coverage on every PR: shim-on-postgres via the kvdb_postgres unit variant and native-on-sqlite via the test_db_sqlite unit variant, which also includes any in-flight dev migrations. What is lost is only the whole-daemon run of those two cells, at a saving of roughly 105 runner-minutes per run. Experimental: revert this commit to restore the full matrix. --- .github/workflows/main.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b5a2f0031c5..4349f57771e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -439,8 +439,6 @@ jobs: include: - name: bitcoind-sqlite args: backend=bitcoind dbbackend=sqlite - - name: bitcoind-sqlite=nativesql-experiment - args: backend=bitcoind dbbackend=sqlite nativesql=true tags=test_native_sql steps: - name: Git checkout uses: actions/checkout@v5 @@ -511,8 +509,6 @@ jobs: fail-fast: false matrix: include: - - name: bitcoind-postgres - args: backend=bitcoind dbbackend=postgres - name: bitcoind-postgres-experiment args: backend=bitcoind dbbackend=postgres tags=test_native_sql # The windows binaries are cross-compiled here since building @@ -579,10 +575,6 @@ jobs: # entries. tranche_group: [0, 1, 2, 3] config: - - name: bitcoind-postgres - binaries: bitcoind-postgres - seed: 1 - args: backend=bitcoind dbbackend=postgres - name: bitcoind-postgres-nativesql-experiment binaries: bitcoind-postgres-experiment seed: 3 From eee438a16b1bdc056d8f3e86c9b9f794b4adc7f1 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 13:46:13 -0230 Subject: [PATCH 21/23] scripts: balance race tranches by measured package duration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The heavy-package list driving the round-robin tranche split was based on received wisdom about which packages are slow, and the CI logs show that wisdom is badly stale: channeldb runs in 6 seconds under race while invoices takes 691, with lnwire (328s, fuzz seed corpus), internal/musig2v040 (246s) and the lnwallet/test suites (420s combined) following — none of which were in the list. The rebalancing attempts were rearranging seconds-sized packages around an 11.5-minute boulder, which is why the slowest tranche stayed at 27 minutes. Replace the heavy-first round-robin with a greedy longest-processing-time assignment driven by a table of measured durations (unlisted packages get a small default). Each tranche job computes the same deterministic assignment and runs its own bucket. The estimated per-tranche weights come out at 700-702 seconds, so the slowest race job should drop from 27 to roughly 19 minutes. --- scripts/unit_race_part.sh | 109 +++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/scripts/unit_race_part.sh b/scripts/unit_race_part.sh index 3757267efe1..d60b6befdbd 100755 --- a/scripts/unit_race_part.sh +++ b/scripts/unit_race_part.sh @@ -1,9 +1,10 @@ #!/bin/bash # unit_race_part.sh runs one tranche of the unit tests in race detector -# mode. The full package list is split round-robin into num_tranches -# tranches, allowing the race detector tests to be spread across -# multiple machines. +# mode. Packages are distributed over num_tranches tranches using a +# greedy longest-processing-time assignment driven by measured package +# durations, allowing the race detector tests to be spread across +# multiple machines with roughly equal runtimes. set -euo pipefail @@ -35,28 +36,41 @@ shift 2 PKG_PREFIX=${PKG:-github.com/lightningnetwork/lnd} DEV_TAGS=${DEV_TAGS:-dev} -# Heavy packages listed first so the round-robin split distributes them -# across different tranches. The order is chosen so that the heaviest -# packages (by measured CI duration; channeldb leads by a wide margin) -# do not share a tranche under the four-tranche split used in CI. -# Update periodically if the profile shifts. -HEAVY_PKGS=( - "${PKG_PREFIX}/channeldb" - "${PKG_PREFIX}/lnwallet" - "${PKG_PREFIX}/htlcswitch" - "${PKG_PREFIX}/invoices" - "${PKG_PREFIX}/graph/db" - "${PKG_PREFIX}/contractcourt" - "${PKG_PREFIX}/routing" - "${PKG_PREFIX}/chainntnfs" - "${PKG_PREFIX}/peer" - "${PKG_PREFIX}/watchtower/wtclient" -) +# pkg_weight echoes the approximate race-mode test duration of a +# package in seconds, measured from CI logs (the per-package "ok" +# lines of the unit race jobs). Unlisted packages get a small default +# weight. The exact values only matter relative to each other; refresh +# them occasionally if the tranches drift out of balance. +pkg_weight() { + case "${1#"${PKG_PREFIX}"/}" in + invoices) echo 691 ;; + lnwire) echo 328 ;; + internal/musig2v040) echo 246 ;; + lnwallet/test/neutrino) echo 171 ;; + lnwallet/test/bitcoind) echo 170 ;; + lnwallet/btcwallet) echo 103 ;; + htlcswitch) echo 84 ;; + lnwallet/test/btcd) echo 82 ;; + routing/chainview) echo 66 ;; + watchtower/wtclient) echo 54 ;; + chainntnfs/test/bitcoind) echo 46 ;; + lnwallet) echo 45 ;; + chainntnfs/test/btcd) echo 45 ;; + chainntnfs/test/neutrino) echo 41 ;; + contractcourt) echo 38 ;; + channeldb/migration30) echo 26 ;; + sqldb) echo 24 ;; + chainntnfs) echo 21 ;; + discovery) echo 21 ;; + funding) echo 17 ;; + *) echo 3 ;; + esac +} all_pkgs=() if [[ -n "${UNIT_RACE_PKGS:-}" ]]; then # An explicit package list (relative to PKG) restricts the run, e.g. - # to the database-touching packages for the DB-variant race jobs. + # to the database-touching packages. for pkg in ${UNIT_RACE_PKGS}; do all_pkgs+=("${PKG_PREFIX}/${pkg}") done @@ -70,42 +84,49 @@ else while IFS= read -r pkg; do all_pkgs+=("${pkg}") done <<< "${pkg_list}" -fi -if (( ${#all_pkgs[@]} == 0 )); then - echo "go list produced no packages" >&2 - exit 1 + if (( ${#all_pkgs[@]} == 0 )); then + echo "go list produced no packages" >&2 + exit 1 + fi fi -# Only treat heavy packages that actually appear in the package list as -# heavy, so a stale entry above cannot select a nonexistent package. -heavy=() -for pkg in "${HEAVY_PKGS[@]}"; do - if printf '%s\n' "${all_pkgs[@]}" | grep -qxF "${pkg}"; then - heavy+=("${pkg}") - fi +# Sort packages by descending weight (ties broken by name for +# determinism across tranche jobs) and greedily assign each to the +# currently lightest tranche. Every tranche job computes the same +# assignment and runs only its own bucket. +weighted=$(for pkg in "${all_pkgs[@]}"; do + echo "$(pkg_weight "${pkg}") ${pkg}" +done | LC_ALL=C sort -k1,1rn -k2,2) + +sums=() +for ((i=0; i&2 exit 0 fi +echo "Tranche ${TRANCHE} of ${NUM_TRANCHES}:" \ + "${#selected[@]} packages, estimated weight ${sums[TRANCHE]}s" + exit_code=0 for pkg in "${selected[@]}"; do echo "Running race unit tests for ${pkg}" From 73c05cf55ec90d6d1ea59916f3b845683a12b30c Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 13:46:35 -0230 Subject: [PATCH 22/23] ci: run windows itests with 16 tranches (experimental) Prebuilding the windows itest binaries on linux barely moved the job's duration: the Run itest step is 44 of its 45 minutes, so test execution dominates, not compilation. The same wait-bound dynamics that took the sqlite itests from 31 to 21 minutes when moving from 8 to 16 tranches should apply here, since the windows runner has the same four cores. Experimental: revert to SMALL_TRANCHES if the windows runner proves unable to handle 16 concurrent tranches. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4349f57771e..28075c9989d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -654,7 +654,7 @@ jobs: - name: Run itest if: steps.check-label.outputs.skip != 'true' - run: make itest-only-parallel tranches=${{ env.SMALL_TRANCHES }} windows=1 shuffleseed=${{ github.run_id }} + run: make itest-only-parallel tranches=${{ env.TRANCHES }} windows=1 shuffleseed=${{ github.run_id }} - name: Kill any remaining lnd processes if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} From 1d564a0c91042cf11f4ff0003675591a63a4ca27 Mon Sep 17 00:00:00 2001 From: Jared Tobin Date: Fri, 12 Jun 2026 15:27:03 -0230 Subject: [PATCH 23/23] ci: split windows itests across two runners (experimental) Neither prebuilding the binaries nor doubling the tranche count moved the windows itest job below 41 minutes: its Run itest step is machine-bound at roughly 35 minutes of work, since process creation and file I/O are far more expensive on the windows runner than on linux, and a saturated machine gains nothing from finer tranches. More machines do help, so spread the 16 tranches over two windows runners with 8 apiece, reusing the prebuilt binaries and the tranche offset machinery. Expected per-job time is roughly 20 minutes. Experimental: revert this commit to restore the single windows job. --- .github/workflows/main.yml | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 28075c9989d..45e448955df 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -631,9 +631,17 @@ jobs: # run windows integration test ######################## windows-integration-test: - name: Run windows itest + name: Run windows itest (${{ matrix.tranche_group }}) runs-on: windows-latest needs: build-split-itest + strategy: + # Allow the other group to continue if one fails. + fail-fast: false + matrix: + # The windows runner is machine-bound rather than wait-bound, + # so the suite is spread over two runners with half of the + # TRANCHES (16) tranches each. + tranche_group: [0, 1] steps: - name: Git checkout uses: actions/checkout@v5 @@ -652,10 +660,14 @@ jobs: name: itest-binaries-windows path: itest - - name: Run itest + - name: Run itest tranche group ${{ matrix.tranche_group }} if: steps.check-label.outputs.skip != 'true' - run: make itest-only-parallel tranches=${{ env.TRANCHES }} windows=1 shuffleseed=${{ github.run_id }} - + run: | + OFFSET=$((${{ matrix.tranche_group }} * 8)) + make itest-only-parallel tranches=${{ env.TRANCHES }} \ + parallel=8 trancheoffset=$OFFSET windows=1 \ + shuffleseed=${{ github.run_id }} + - name: Kill any remaining lnd processes if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} shell: powershell @@ -664,14 +676,14 @@ jobs: - name: Zip log files on failure if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} timeout-minutes: 5 # timeout after 5 minute - run: 7z a logs-itest-windows.zip itest/**/*.log + run: 7z a logs-itest-windows-${{ matrix.tranche_group }}.zip itest/**/*.log - name: Upload log files on failure uses: actions/upload-artifact@v4 if: ${{ failure() && steps.check-label.outputs.skip != 'true' }} with: - name: logs-itest-windows - path: logs-itest-windows.zip + name: logs-itest-windows-${{ matrix.tranche_group }} + path: logs-itest-windows-${{ matrix.tranche_group }}.zip retention-days: 5 ########################