diff --git a/.github/workflows/run-regression-high-performance.yml b/.github/workflows/run-regression-high-performance.yml index 0928f82f1..b54b2d108 100644 --- a/.github/workflows/run-regression-high-performance.yml +++ b/.github/workflows/run-regression-high-performance.yml @@ -683,7 +683,7 @@ jobs: with: suite_name: iceberg part: 1 - timeout_minutes: 180 + timeout_minutes: 300 storage_path: /iceberg regression_args: '--only "/iceberg/iceberg engine/rest catalog/*" "/iceberg/s3 table function/*" "/iceberg/icebergS3 table function/*" "/iceberg/iceberg cache/*"' runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-cpx62' }} @@ -703,7 +703,7 @@ jobs: with: suite_name: iceberg part: 2 - timeout_minutes: 180 + timeout_minutes: 300 storage_path: /iceberg regression_args: '--only "/iceberg/iceberg engine/glue catalog/*" "/iceberg/iceberg table engine/*"' runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-cpx62' }} @@ -844,7 +844,7 @@ jobs: uses: ./.github/workflows/reusable-suite.yml with: suite_name: lightweight_delete - timeout_minutes: 180 + timeout_minutes: 300 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-ccx33' }} ref: ${{ inputs.ref }} package: ${{ inputs.package }} @@ -878,7 +878,7 @@ jobs: uses: ./.github/workflows/reusable-suite.yml with: suite_name: parquet - timeout_minutes: 180 + timeout_minutes: 300 storage_path: /no_s3 ref: ${{ inputs.ref }} runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-ccx33' }} @@ -1012,7 +1012,7 @@ jobs: uses: ./.github/workflows/reusable-suite.yml with: suite_name: rbac - timeout_minutes: 180 + timeout_minutes: 300 part: 1 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-cpx62' }} ref: ${{ inputs.ref }} @@ -1030,7 +1030,7 @@ jobs: uses: ./.github/workflows/reusable-suite.yml with: suite_name: rbac - timeout_minutes: 180 + timeout_minutes: 300 part: 2 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-cpx62' }} ref: ${{ inputs.ref }} @@ -1048,7 +1048,7 @@ jobs: uses: ./.github/workflows/reusable-suite.yml with: suite_name: rbac - timeout_minutes: 180 + timeout_minutes: 300 part: 3 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-cpx62' }} ref: ${{ inputs.ref }} @@ -1356,7 +1356,7 @@ jobs: with: suite_name: ssl_server part: 1 - timeout_minutes: 180 + timeout_minutes: 300 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-ccx33' }} ref: ${{ inputs.ref }} package: ${{ inputs.package }} @@ -1374,7 +1374,7 @@ jobs: with: suite_name: ssl_server part: 2 - timeout_minutes: 180 + timeout_minutes: 300 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-cpx62' }} ref: ${{ inputs.ref }} package: ${{ inputs.package }} @@ -1392,7 +1392,7 @@ jobs: with: suite_name: ssl_server part: 3 - timeout_minutes: 180 + timeout_minutes: 300 runner_type: ${{ inputs.arch == 'arm64' && 'type-cax41' || 'type-ccx33' }} ref: ${{ inputs.ref }} package: ${{ inputs.package }} diff --git a/aggregate_functions/regression.py b/aggregate_functions/regression.py index 50cf08fdc..c65b57849 100755 --- a/aggregate_functions/regression.py +++ b/aggregate_functions/regression.py @@ -851,17 +851,27 @@ def regression( order_by="tuple()", ) + with And("I increase query timeouts for sanitizer builds"): + if check_with_any_sanitizer(self): + default_query_settings = getsattr( + current().context, "default_query_settings", [] + ) + # MSAN builds are very slow, need longer timeouts for data insertion + default_query_settings.append(("receive_timeout", 900)) + default_query_settings.append(("send_timeout", 900)) + self.context.default_query_settings = default_query_settings + with And("I populate tables with test data"): self.context.table.insert_test_data(cardinality=1, shuffle_values=False) self.context.table_extra_data.insert_test_data( cardinality=5, shuffle_values=True ) - with And("allow higher cpu_wait_ratio "): + with And("allow higher cpu_wait_ratio (increased for sanitizer builds)"): if check_clickhouse_version(">=25.4")(self): allow_higher_cpu_wait_ratio( - min_os_cpu_wait_time_ratio_to_throw=10, - max_os_cpu_wait_time_ratio_to_throw=20, + min_os_cpu_wait_time_ratio_to_throw=50, + max_os_cpu_wait_time_ratio_to_throw=100, ) with Feature("part 1"): diff --git a/alter/regression.py b/alter/regression.py index 297fa2cdd..1795646b4 100755 --- a/alter/regression.py +++ b/alter/regression.py @@ -34,6 +34,29 @@ def argparser(parser): xfails = { + # Data corruption bugs exposed by sanitizer builds + "/alter/attach partition/*": [ + ( + Fail, + "UNKNOWN_CODEC data corruption with sanitizers - needs investigation", + check_with_any_sanitizer, + ) + ], + "/alter/replace partition/*": [ + ( + Fail, + "UNKNOWN_CODEC data corruption with sanitizers - needs investigation", + check_with_any_sanitizer, + ) + ], + # Merge part UINT32_MAX overflow bug + "/alter/attach partition/*/optimize table * final/*": [ + ( + Fail, + "https://github.com/ClickHouse/ClickHouse/issues/69001 - Merge part UINT32_MAX overflow", + check_with_any_sanitizer, + ) + ], "/alter/replace partition/concurrent merges and mutations/mutations on unrelated partition": [ ( Fail, diff --git a/base_58/regression.py b/base_58/regression.py index 14f85fb5c..08d195d42 100755 --- a/base_58/regression.py +++ b/base_58/regression.py @@ -10,11 +10,19 @@ from helpers.cluster import create_cluster from helpers.argparser import argparser, CaptureClusterArgs -from helpers.common import check_clickhouse_version, experimental_analyzer +from helpers.common import check_clickhouse_version, experimental_analyzer, check_with_any_sanitizer pr_70846 = "https://github.com/ClickHouse/ClickHouse/pull/70846" xfails = { + # Memory usage tests fail with sanitizers due to test code issues and memory overhead + "/base58/memory usage/*": [ + ( + Error, + "Memory usage tests have issues with sanitizer builds", + check_with_any_sanitizer, + ) + ], "alias input/alias instead of table and column": [(Fail, "not implemented")], "/base58/unsupported types constant/Nullable(FixedString(3))/*": [ ( diff --git a/helpers/cluster.py b/helpers/cluster.py index 5cffce8b3..9491871f8 100755 --- a/helpers/cluster.py +++ b/helpers/cluster.py @@ -582,7 +582,7 @@ def enable_thread_fuzzer(self): self.command("export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000") self.command("export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000") - def wait_clickhouse_healthy(self, timeout=90, check_version=True, initial_delay=0): + def wait_clickhouse_healthy(self, timeout=600, check_version=True, initial_delay=0): with By(f"waiting until ClickHouse server on {self.name} is healthy"): for attempt in retries(timeout=timeout, delay=5, initial_delay=initial_delay): with attempt: @@ -719,7 +719,7 @@ def stop_clickhouse(self, timeout=300, safe=True, signal="TERM"): def start_clickhouse( self, - timeout=60, + timeout=600, wait_healthy=True, user=None, thread_fuzzer=False, @@ -778,7 +778,7 @@ def start_clickhouse( if wait_healthy: self.wait_clickhouse_healthy(timeout=timeout, check_version=check_version, initial_delay=2) - def restart_clickhouse(self, timeout=300, safe=True, wait_healthy=True, retry_count=5, user=None): + def restart_clickhouse(self, timeout=600, safe=True, wait_healthy=True, retry_count=5, user=None): """Restart ClickHouse server.""" self.stop_clickhouse(timeout=timeout, safe=safe) diff --git a/helpers/common.py b/helpers/common.py index c467acf1b..ae51c3e87 100644 --- a/helpers/common.py +++ b/helpers/common.py @@ -56,7 +56,10 @@ def check(test): def check_with_ubsan(test): """Check if the build is with undefined behavior sanitizer (ubsan).""" if hasattr(test.context, "build_options"): - if "ubsan" in test.context.build_options.values(): + if any( + node_build_options.get("sanitizer") == "ubsan" + for node_build_options in test.context.build_options.values() + ): return True return False @@ -65,7 +68,10 @@ def check_with_ubsan(test): def check_with_tsan(test): """Check if the build is with thread sanitizer (tsan).""" if hasattr(test.context, "build_options"): - if "tsan" in test.context.build_options.values(): + if any( + node_build_options.get("sanitizer") == "tsan" + for node_build_options in test.context.build_options.values() + ): return True return False @@ -74,7 +80,10 @@ def check_with_tsan(test): def check_with_asan(test): """Check if the build is with address sanitizer (asan).""" if hasattr(test.context, "build_options"): - if "asan" in test.context.build_options.values(): + if any( + node_build_options.get("sanitizer") == "asan" + for node_build_options in test.context.build_options.values() + ): return True return False @@ -83,7 +92,10 @@ def check_with_asan(test): def check_with_msan(test): """Check if the build is with memory sanitizer (msan).""" if hasattr(test.context, "build_options"): - if "msan" in test.context.build_options.values(): + if any( + node_build_options.get("sanitizer") == "msan" + for node_build_options in test.context.build_options.values() + ): return True return False @@ -148,7 +160,9 @@ def check_with_any_sanitizer(test): sanitizers = ["tsan", "asan", "ubsan", "msan"] if hasattr(test.context, "build_options"): return any( - sanitizer in test.context.build_options.values() for sanitizer in sanitizers + node_build_options.get("sanitizer") == sanitizer + for node_build_options in test.context.build_options.values() + for sanitizer in sanitizers ) return False @@ -161,7 +175,8 @@ def check_several_sanitizers_in_binary_link( def check(test): if hasattr(test.context, "build_options"): return any( - sanitizer in test.context.build_options.values() + node_build_options.get("sanitizer") == sanitizer + for node_build_options in test.context.build_options.values() for sanitizer in sanitizers ) return False diff --git a/iceberg/regression.py b/iceberg/regression.py index 0ce195ac9..8cd13455f 100755 --- a/iceberg/regression.py +++ b/iceberg/regression.py @@ -17,10 +17,19 @@ check_is_altinity_build, experimental_analyzer, check_if_antalya_build, + check_with_any_sanitizer, ) xfails = { + # Server crashes with "Metadata is not initialized" during schema evolution with sanitizers + "/iceberg/iceberg engine/*/schema evolution/*": [ + ( + Fail, + "https://github.com/ClickHouse/ClickHouse/issues/86024 - Iceberg metadata initialization bug with sanitizers", + check_with_any_sanitizer, + ) + ], "/iceberg/icebergS3 table function/recreate table/scan and display data with pyiceberg, expect empty table": [ (Fail, "https://github.com/ClickHouse/ClickHouse/issues/87574") ], @@ -193,6 +202,11 @@ } ffails = { + "/iceberg/iceberg engine/*": ( + Skip, + "Iceberg tests unstable with sanitizer builds (server startup issues)", + check_with_any_sanitizer, + ), "/iceberg/iceberg engine": ( Skip, "Iceberg engine was introduced in 24.12", diff --git a/ontime_benchmark/benchmark.py b/ontime_benchmark/benchmark.py index 7a87da7cf..876e631e7 100755 --- a/ontime_benchmark/benchmark.py +++ b/ontime_benchmark/benchmark.py @@ -7,7 +7,7 @@ append_path(sys.path, "..") from helpers.cluster import Cluster, create_cluster -from helpers.common import check_clickhouse_version +from helpers.common import check_clickhouse_version, check_with_any_sanitizer from helpers.argparser import ( argparser_s3 as argparser_base, CaptureClusterArgs, @@ -36,7 +36,13 @@ ], } -ffails = {} +ffails = { + "/benchmark/*": ( + Skip, + "Benchmark tests are too slow with sanitizer builds", + check_with_any_sanitizer, + ), +} def argparser(parser): diff --git a/rbac/regression.py b/rbac/regression.py index e87303c1e..910da864f 100755 --- a/rbac/regression.py +++ b/rbac/regression.py @@ -9,7 +9,7 @@ from helpers.cluster import create_cluster from helpers.argparser import argparser, CaptureClusterArgs -from helpers.common import check_clickhouse_version, experimental_analyzer +from helpers.common import check_clickhouse_version, experimental_analyzer, check_with_any_sanitizer from rbac.requirements import SRS_006_ClickHouse_Role_Based_Access_Control from rbac.helper.common import add_rbac_config_file @@ -257,8 +257,8 @@ ), "/rbac/part 1/privileges/system drop cache/compiled expression cache*": ( Skip, - "Not supportted in ARM builds", - (lambda test: platform.machine() == "aarch64"), + "JIT compilation disabled in ARM and sanitizer builds", + (lambda test: platform.machine() == "aarch64" or check_with_any_sanitizer(test)), ), "rbac/part 1/privileges/:/table_type='ReplicatedReplacingMergeTree-sharded_cluster": ( Skip, diff --git a/settings/regression.py b/settings/regression.py index 50dd7d7e3..1d9f35818 100755 --- a/settings/regression.py +++ b/settings/regression.py @@ -10,9 +10,24 @@ CaptureClusterArgs, ) -from helpers.common import check_clickhouse_version, check_if_head +from helpers.common import check_clickhouse_version, check_if_head, check_with_any_sanitizer xfails = { + # Profiler settings have different defaults in sanitizer builds (profilers disabled) + "/settings/default values/query_profiler_cpu_time_period_ns": [ + ( + Fail, + "Profilers are disabled in sanitizer builds", + check_with_any_sanitizer, + ) + ], + "/settings/default values/query_profiler_real_time_period_ns": [ + ( + Fail, + "Profilers are disabled in sanitizer builds", + check_with_any_sanitizer, + ) + ], "/settings/default values/parallel_replicas_mark_segment_size": [ ( Fail,