diff --git a/.gitignore b/.gitignore index 259888fdff..151f6cb25b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,8 @@ log.txt load_out.txt server.log server.pid +server.endpoint +server.endpoint.tmp arc_token.txt data-size.txt .doris_home diff --git a/hyper-parquet/benchmark.sh b/hyper-parquet/benchmark.sh index fea177f34d..12a00b626c 100755 --- a/hyper-parquet/benchmark.sh +++ b/hyper-parquet/benchmark.sh @@ -1,10 +1,4 @@ #!/bin/bash # Thin shim — actual flow is in lib/benchmark-common.sh. export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned" -export BENCH_RESTARTABLE=no -# Single-process engine: each query forks a fresh full-machine process with no -# shared scheduler across connections, so the concurrent-QPS test only -# oversubscribes RAM rather than measuring throughput. Skip it by default; -# override BENCH_CONCURRENT_DURATION to re-enable. See issue #946. -export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}" exec ../lib/benchmark-common.sh diff --git a/hyper-parquet/check b/hyper-parquet/check index 23ad27458a..f3fdc4a1f4 100755 --- a/hyper-parquet/check +++ b/hyper-parquet/check @@ -1,12 +1,24 @@ #!/bin/bash +# Readiness probe: connect to the persistent Hyper server (via the descriptor +# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means +# "not up yet" — the benchmark driver polls this in a loop after ./start and +# uses its transition to failing as the "server is really stopped" signal in +# the cold cycle. set -e # shellcheck disable=SC1091 source myenv/bin/activate +# No endpoint published => server isn't up. +[ -s server.endpoint ] || exit 1 + python3 - <<'PY' -from tableauhyperapi import HyperProcess, Telemetry, Connection -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - connection.execute_list_query("SELECT 1") +from tableauhyperapi import Connection, Endpoint + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint) as connection: + connection.execute_list_query("SELECT 1") PY diff --git a/hyper-parquet/query b/hyper-parquet/query index 36e4c2efb9..6308e2d926 100755 --- a/hyper-parquet/query +++ b/hyper-parquet/query @@ -1,9 +1,16 @@ #!/bin/bash -# Reads a SQL query from stdin, runs it via tableau hyperapi against the -# partitioned parquet files (registered as a temp external table from -# create.sql). +# Reads a SQL query from stdin, runs it once against the partitioned parquet +# files on the PERSISTENT Hyper server started by ./start (descriptor in +# server.endpoint). The temp external table is (re)created from create.sql +# before the timer starts, so its setup is not counted. # Stdout: query result. # Stderr: query runtime in fractional seconds on the last line. +# +# The benchmark driver calls this once per try (BENCH_TRIES). Because every +# call connects to the SAME long-lived server (and the parquet files stay in +# the OS page cache between tries), try 1 (right after the driver's +# stop/drop_caches/start cold cycle) is cold and tries 2..N are genuinely hot. +# See issue #936. set -e # shellcheck disable=SC1091 @@ -18,17 +25,20 @@ cat > "$query_file" python3 - "$query_file" <<'PY' import sys import timeit -from tableauhyperapi import HyperProcess, Telemetry, Connection +from tableauhyperapi import Connection, Endpoint with open(sys.argv[1]) as f: query = f.read() -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - connection.execute_command(open("create.sql").read()) - start = timeit.default_timer() - rows = connection.execute_list_query(query) - end = timeit.default_timer() +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint) as connection: + connection.execute_command(open("create.sql").read()) + start = timeit.default_timer() + rows = connection.execute_list_query(query) + end = timeit.default_timer() for r in rows: print(r) diff --git a/hyper-parquet/results/20260630/c6a.2xlarge.json b/hyper-parquet/results/20260630/c6a.2xlarge.json new file mode 100644 index 0000000000..90e50a6061 --- /dev/null +++ b/hyper-parquet/results/20260630/c6a.2xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "c6a.2xlarge", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 9, + "data_size": 14737666736, + "result": [ + [0.847, 0.367, 0.317], + [1.126, 0.391, 0.347], + [1.4, 0.616, 0.636], + [2.35, 0.543, 0.509], + [3.176, 1.523, 1.487], + [3.219, 1.595, 1.509], + [1.071, 0.525, 0.483], + [0.869, 0.411, 0.355], + [3.477, 1.806, 1.803], + [4.339, 2.352, 2.312], + [1.779, 0.525, 0.468], + [1.783, 0.569, 0.549], + [2.293, 1.486, 1.422], + [4.487, 2.536, 2.471], + [2.368, 1.555, 1.555], + [2.353, 1.546, 1.57], + [4.085, 2.61, 2.593], + [4.524, 2.384, 2.433], + [6.829, 5.147, 5.073], + [2.395, 0.382, 0.352], + [10.528, 2.095, 2.07], + [12.01, 2.275, 2.238], + [20.442, 6.167, 6.241], + [48.226, 6.163, 6.642], + [3.793, 1.127, 1.109], + [1.998, 0.982, 1.014], + [3.781, 1.112, 1.156], + [10.704, 2.51, 2.425], + [18.011, 17.04, 16.781], + [9.618, 9.05, 9.074], + [3.916, 1.703, 1.67], + [7.593, 2.016, 1.99], + [9.679, 7.514, 7.472], + [11.325, 3.903, 3.914], + [11.336, 3.95, 3.941], + [2.285, 1.433, 1.462], + [0.889, 0.429, 0.372], + [1.503, 0.417, 0.371], + [0.942, 0.37, 0.33], + [1.131, 0.495, 0.497], + [0.969, 0.311, 0.346], + [0.988, 0.315, 0.346], + [0.969, 0.331, 0.341] +] +} diff --git a/hyper-parquet/results/20260630/c6a.4xlarge.json b/hyper-parquet/results/20260630/c6a.4xlarge.json new file mode 100644 index 0000000000..c2be4f61ae --- /dev/null +++ b/hyper-parquet/results/20260630/c6a.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 21, + "data_size": 14737666736, + "result": [ + [0.657, 0.292, 0.294], + [0.772, 0.265, 0.269], + [1.258, 0.425, 0.437], + [2.182, 0.363, 0.362], + [2.787, 1.005, 0.979], + [2.889, 0.938, 0.932], + [0.778, 0.362, 0.343], + [0.751, 0.291, 0.284], + [3.047, 1.128, 1.146], + [3.739, 1.374, 1.35], + [2.394, 0.395, 0.333], + [2.247, 0.413, 0.407], + [2.242, 0.884, 0.857], + [3.97, 1.393, 1.432], + [2.289, 0.904, 0.901], + [2.059, 0.992, 1.024], + [3.829, 1.582, 1.59], + [3.711, 1.517, 1.465], + [6.15, 2.973, 3.022], + [1.346, 0.32, 0.304], + [10.512, 1.212, 1.226], + [12.017, 1.314, 1.3], + [20.395, 3.316, 3.333], + [48.211, 4.209, 4.199], + [3.763, 0.719, 0.704], + [2.004, 0.623, 0.626], + [3.763, 0.73, 0.696], + [10.7, 1.407, 1.41], + [9.505, 7.625, 7.817], + [5.284, 4.757, 4.786], + [3.812, 0.991, 0.979], + [7.393, 1.185, 1.178], + [7.914, 4.482, 4.515], + [11.074, 2.51, 2.471], + [11.77, 2.459, 2.464], + [1.69, 0.915, 0.921], + [0.771, 0.339, 0.335], + [0.73, 0.335, 0.348], + [0.767, 0.282, 0.308], + [1.112, 0.433, 0.449], + [0.833, 0.296, 0.25], + [0.863, 0.283, 0.274], + [0.741, 0.287, 0.28] +] +} diff --git a/hyper-parquet/results/20260630/c6a.large.json b/hyper-parquet/results/20260630/c6a.large.json new file mode 100644 index 0000000000..aceb389cb0 --- /dev/null +++ b/hyper-parquet/results/20260630/c6a.large.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "c6a.large", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 1, + "data_size": 14737666736, + "result": [ + [2.169, 0.85, 0.832], + [2.334, 0.964, 0.941], + [3.583, 1.921, 1.892], + [3.737, 1.536, 1.511], + [7.31, 4.716, null], + [7.577, 5.311, 5.346], + [2.784, 1.424, 1.393], + [2.297, 0.992, 0.954], + [8.149, 5.721, 5.623], + [10.376, 7.372, 7.44], + [4.182, 1.48, 1.472], + [4.163, 1.659, 1.661], + [7.066, 5.025, 5.035], + [36.164, 37.906, 49.875], + [7.427, 5.155, 5.177], + [7.282, 4.856, 4.848], + [72.059, 71.407, 69.517], + [72.527, 35.932, 32.879], + [141.378, 148.188, 144.954], + [3.038, 0.904, 0.914], + [12.314, 7.582, 7.528], + [14.006, 9.733, 8.838], + [33.304, 34.997, 36.306], + [66.348, 71.636, 73.609], + [6.795, 3.814, 3.784], + [5.15, 3.358, 3.324], + [6.958, 3.852, 3.783], + [13.887, 8.938, 9.148], + [69.928, 131.586, 83.562], + [36.88, 35.969, 35.892], + [8.842, 5.472, 5.526], + [12.767, 6.441, 6.679], + [195.523, 196.508, 195.204], + [null, null, null], + [null, null, null], + [6.517, 4.35, 4.288], + [2.104, 0.811, 0.761], + [2.155, 0.812, 0.78], + [1.958, 0.722, 0.703], + [2.348, 0.923, 0.927], + [2.112, 0.709, 0.678], + [2.259, 0.726, 0.685], + [1.922, 0.704, 0.733] +] +} diff --git a/hyper-parquet/results/20260630/c6a.metal.json b/hyper-parquet/results/20260630/c6a.metal.json new file mode 100644 index 0000000000..87bae2ad6a --- /dev/null +++ b/hyper-parquet/results/20260630/c6a.metal.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "c6a.metal", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 64, + "data_size": 14737666736, + "result": [ + [0.68, 0.238, 0.24], + [0.792, 0.245, 0.25], + [1.219, 0.284, 0.281], + [1.914, 0.273, 0.273], + [2.037, 0.389, 0.378], + [2.448, 0.446, 0.441], + [0.786, 0.263, 0.264], + [0.804, 0.268, 0.266], + [2.263, 0.428, 0.425], + [3.151, 0.543, 0.546], + [2.12, 0.328, 0.321], + [1.744, 0.313, 0.313], + [2.04, 0.468, 0.442], + [3.4, 0.58, 0.581], + [2.09, 0.482, 0.444], + [1.696, 0.42, 0.417], + [3.406, 0.619, 0.636], + [3.326, 0.566, 0.53], + [5.37, 0.831, 0.844], + [1.34, 0.299, 0.268], + [10.515, 0.501, 0.491], + [11.984, 0.518, 0.541], + [20.361, 0.889, 0.84], + [48.146, 1.71, 1.549], + [3.738, 0.369, 0.385], + [1.954, 0.357, 0.356], + [3.741, 0.376, 0.401], + [10.65, 0.586, 0.598], + [9.313, 2.29, 2.318], + [1.259, 0.916, 0.835], + [3.551, 0.465, 0.452], + [7.075, 0.543, 0.541], + [6.055, 1.129, 1.133], + [10.864, 0.915, 0.884], + [10.896, 0.937, 0.972], + [1.341, 0.381, 0.361], + [0.771, 0.341, 0.335], + [0.718, 0.346, 0.322], + [0.876, 0.282, 0.265], + [1.118, 0.444, 0.402], + [0.662, 0.275, 0.251], + [0.803, 0.259, 0.273], + [0.671, 0.275, 0.291] +] +} diff --git a/hyper-parquet/results/20260630/c6a.xlarge.json b/hyper-parquet/results/20260630/c6a.xlarge.json new file mode 100644 index 0000000000..3598a8090c --- /dev/null +++ b/hyper-parquet/results/20260630/c6a.xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "c6a.xlarge", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 4, + "data_size": 14737666736, + "result": [ + [1.17, 0.49, 0.488], + [1.312, 0.551, 0.524], + [1.866, 1.036, 1.031], + [2.497, 0.826, 0.847], + [3.844, 2.371, 2.526], + [3.891, 2.785, 2.731], + [1.644, 0.775, 0.744], + [1.466, 0.556, 0.545], + [4.258, 3.015, 3.015], + [5.38, 3.958, 3.986], + [2.242, 0.797, 0.778], + [2.387, 0.902, 0.898], + [3.732, 2.553, 2.565], + [6.118, 4.393, 4.339], + [3.907, 2.664, 2.712], + [3.917, 2.607, 2.553], + [6.242, 4.579, 4.591], + [5.958, 4.311, 4.306], + [94.259, 89.972, 94.724], + [1.768, 0.525, 0.522], + [10.538, 3.855, 3.87], + [12.004, 4.12, 4.132], + [20.44, 11.925, 11.879], + [53.704, 53.236, 54.728], + [3.784, 1.978, 1.978], + [2.839, 1.759, 1.719], + [3.773, 2.004, 1.996], + [10.705, 4.559, 4.576], + [34.637, 33.404, 33.052], + [18.629, 17.956, 17.862], + [4.697, 2.935, 2.901], + [7.94, 3.469, 3.405], + [173.433, 172.276, 174.015], + [null, null, null], + [null, null, null], + [3.533, 2.335, 2.442], + [1.369, 0.498, 0.494], + [1.285, 0.541, 0.493], + [1.226, 0.47, 0.456], + [1.485, 0.616, 0.609], + [1.203, 0.431, 0.41], + [1.34, 0.43, 0.445], + [1.206, 0.455, 0.454] +] +} diff --git a/hyper-parquet/results/20260630/c7a.metal-48xl.json b/hyper-parquet/results/20260630/c7a.metal-48xl.json new file mode 100644 index 0000000000..b5365ac58a --- /dev/null +++ b/hyper-parquet/results/20260630/c7a.metal-48xl.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "c7a.metal-48xl", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 64, + "data_size": 14737666736, + "result": [ + [0.622, 0.213, 0.212], + [0.684, 0.229, 0.227], + [1.099, 0.242, 0.243], + [1.836, 0.249, 0.238], + [1.903, 0.325, 0.304], + [2.3, 0.356, 0.352], + [0.698, 0.247, 0.24], + [0.695, 0.227, 0.224], + [2.131, 0.353, 0.342], + [2.926, 0.396, 0.394], + [2.006, 0.261, 0.251], + [2.06, 0.264, 0.252], + [2.277, 0.346, 0.339], + [3.654, 0.436, 0.434], + [2.329, 0.366, 0.345], + [1.922, 0.347, 0.34], + [3.645, 0.462, 0.462], + [3.643, 0.423, 0.395], + [5.709, 0.648, 0.624], + [1.617, 0.232, 0.234], + [10.931, 0.389, 0.397], + [12.435, 0.417, 0.395], + [20.37, 0.626, 0.617], + [48.156, 1.008, 1.005], + [3.758, 0.308, 0.312], + [1.964, 0.296, 0.309], + [3.756, 0.315, 0.319], + [10.68, 0.429, 0.42], + [9.223, 2.198, 1.946], + [0.966, 0.526, 0.513], + [3.548, 0.387, 0.362], + [7.042, 0.414, 0.415], + [6.004, 0.839, 0.814], + [10.716, 0.643, 0.619], + [10.749, 0.64, 0.635], + [1.34, 0.334, 0.325], + [0.734, 0.304, 0.301], + [0.687, 0.283, 0.292], + [0.694, 0.24, 0.242], + [0.892, 0.386, 0.387], + [0.636, 0.23, 0.227], + [0.785, 0.233, 0.229], + [0.707, 0.252, 0.254] +] +} diff --git a/hyper-parquet/results/20260630/t3a.small.json b/hyper-parquet/results/20260630/t3a.small.json new file mode 100644 index 0000000000..e28c974624 --- /dev/null +++ b/hyper-parquet/results/20260630/t3a.small.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper (Parquet)", + "date": "2026-06-30", + "machine": "t3a.small", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","stateless"], + "load_time": 0, + "data_size": 14737666736, + "result": [ + [4.065, 2.412, 2.364], + [5.003, 2.61, 2.394], + [6.57, 3.919, 4.117], + [6.553, 3.288, 3.451], + [29.336, 29.931, 28.981], + [null, null, null], + [5.996, 3.241, 3.062], + [4.881, 2.587, 2.406], + [39.113, 38.896, 41.564], + [47.997, 49.905, 46.69], + [7.112, 3.545, 3.684], + [7.142, 4, 3.867], + [null, null, null], + [null, null, null], + [null, null, null], + [50.365, 51.845, 50.455], + [null, null, null], + [null, null, null], + [null, null, null], + [5.971, 2.536, 2.52], + [20.095, 22.041, 22.857], + [22.991, 25.905, 26.5], + [55.34, 56.72, 59.667], + [null, null, null], + [11.33, 6.874, 6.907], + [9.382, 6.193, 6.126], + [11.051, 7.032, 6.768], + [24.572, 26.673, 28.496], + [null, null, null], + [118.46, 115.895, 118.548], + [25.877, 18.005, 35.221], + [72.397, 73.285, 68.595], + [305.252, 305.445, 306.043], + [null, null, null], + [null, null, null], + [37.294, 37.787, 38.207], + [4.974, 2.23, 2.35], + [4.548, 2.197, 2.067], + [4.435, 2.19, 2.138], + [4.937, 2.6, 2.521], + [4.375, 2.215, 2.181], + [4.952, 2.022, 2.217], + [4.307, 2.266, 2.233] +] +} diff --git a/hyper-parquet/start b/hyper-parquet/start index 06bd986563..ce066f41e3 100755 --- a/hyper-parquet/start +++ b/hyper-parquet/start @@ -1,2 +1,67 @@ #!/bin/bash -exit 0 +# Launch ONE long-lived Hyper server (hyperd) and publish its connection +# descriptor to server.endpoint. Every ./query invocation then connects to +# this single persistent process instead of spawning its own. +# +# Unlike hyper/, there is no hits.hyper to keep attached: the data is external +# parquet read through the OS page cache (which the driver preserves between +# tries), and the temp external table is connection-scoped so each ./query +# recreates it untimed. +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# Already running? The pidfile + a live process is authoritative. +if [ -f server.pid ] && kill -0 "$(cat server.pid 2>/dev/null)" 2>/dev/null; then + exit 0 +fi + +# Clean up stale artifacts from a previous (possibly crashed) server. +rm -f server.pid server.endpoint + +# Background a supervisor that opens HyperProcess, writes the descriptor, then +# blocks until ./stop signals it. nohup so it survives this script exiting; +# $! is the supervisor PID we kill in ./stop. +nohup python3 - >server.log 2>&1 <<'PY' & +import os +import signal +import sys +from tableauhyperapi import HyperProcess, Telemetry + + +def _terminate(*_): + # Raise SystemExit so the `with HyperProcess` block exits cleanly and + # hyperd is shut down with us (it is terminated when its controlling + # process exits). + sys.exit(0) + + +signal.signal(signal.SIGTERM, _terminate) +signal.signal(signal.SIGINT, _terminate) + +with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: + # Publish the descriptor atomically so ./check and ./query never read a + # half-written file. + with open("server.endpoint.tmp", "w") as f: + f.write(hyper.endpoint.connection_descriptor) + os.replace("server.endpoint.tmp", "server.endpoint") + # Block until a signal arrives; loop so a stray signal can't tear the + # server down (only the handler's sys.exit does). + while True: + signal.pause() +PY +echo $! > server.pid + +# Give the supervisor a moment to publish the endpoint. The benchmark driver +# also runs ./check in a loop afterwards, so this is just a fast-path / clean +# error rather than the authoritative readiness gate. +for _ in $(seq 1 60); do + if [ -s server.endpoint ]; then + exit 0 + fi + sleep 1 +done + +echo "hyper-parquet: server did not publish server.endpoint within 60s" >&2 +exit 1 diff --git a/hyper-parquet/stop b/hyper-parquet/stop index 06bd986563..cb2bbedc17 100755 --- a/hyper-parquet/stop +++ b/hyper-parquet/stop @@ -1,2 +1,24 @@ #!/bin/bash +# Stop the persistent Hyper server started by ./start. SIGTERM the supervisor +# (see ./start); its handler exits the `with HyperProcess` block, which shuts +# down hyperd. Idempotent: a missing/stale pidfile is not an error. +set -e + +if [ -f server.pid ]; then + pid="$(cat server.pid 2>/dev/null || true)" + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + # Wait for the supervisor (and thus hyperd) to actually exit so the + # benchmark driver's drop_caches isn't defeated by pages still pinned + # by a live mmap. + for _ in $(seq 1 60); do + kill -0 "$pid" 2>/dev/null || break + sleep 1 + done + # Still alive after 60s? Force it. + kill -9 "$pid" 2>/dev/null || true + fi +fi + +rm -f server.pid server.endpoint exit 0 diff --git a/hyper/benchmark.sh b/hyper/benchmark.sh index 1b692a2155..7d4ef95c38 100755 --- a/hyper/benchmark.sh +++ b/hyper/benchmark.sh @@ -1,10 +1,4 @@ #!/bin/bash # Thin shim — actual flow is in lib/benchmark-common.sh. export BENCH_DOWNLOAD_SCRIPT="download-hits-csv" -export BENCH_RESTARTABLE=no -# Single-process engine: each query forks a fresh full-machine process with no -# shared scheduler across connections, so the concurrent-QPS test only -# oversubscribes RAM rather than measuring throughput. Skip it by default; -# override BENCH_CONCURRENT_DURATION to re-enable. See issue #946. -export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}" exec ../lib/benchmark-common.sh diff --git a/hyper/check b/hyper/check index 23ad27458a..f3fdc4a1f4 100755 --- a/hyper/check +++ b/hyper/check @@ -1,12 +1,24 @@ #!/bin/bash +# Readiness probe: connect to the persistent Hyper server (via the descriptor +# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means +# "not up yet" — the benchmark driver polls this in a loop after ./start and +# uses its transition to failing as the "server is really stopped" signal in +# the cold cycle. set -e # shellcheck disable=SC1091 source myenv/bin/activate +# No endpoint published => server isn't up. +[ -s server.endpoint ] || exit 1 + python3 - <<'PY' -from tableauhyperapi import HyperProcess, Telemetry, Connection -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint) as connection: - connection.execute_list_query("SELECT 1") +from tableauhyperapi import Connection, Endpoint + +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint) as connection: + connection.execute_list_query("SELECT 1") PY diff --git a/hyper/load b/hyper/load index a4a1d58cb0..9c34e7d839 100755 --- a/hyper/load +++ b/hyper/load @@ -1,4 +1,8 @@ #!/bin/bash +# Create hits.hyper and COPY hits.csv into it, using the PERSISTENT Hyper +# server started by ./start (descriptor in server.endpoint). Loading through +# the already-running server avoids briefly running two hyperd instances +# (each of which would try to claim up to 80% of RAM) during the heavy COPY. set -e # shellcheck disable=SC1091 @@ -8,12 +12,15 @@ source myenv/bin/activate rm -f hits.hyper python3 - <<'PY' -from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode +from tableauhyperapi import Connection, Endpoint, CreateMode -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint, 'hits.hyper', CreateMode.CREATE_AND_REPLACE) as connection: - connection.execute_command(open("create.sql").read()) - connection.execute_command("copy hits from 'hits.csv' with (format csv)") +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, 'hits.hyper', CreateMode.CREATE_AND_REPLACE) as connection: + connection.execute_command(open("create.sql").read()) + connection.execute_command("copy hits from 'hits.csv' with (format csv)") PY rm -f hits.csv diff --git a/hyper/query b/hyper/query index d0f59d1a0e..3465f9df13 100755 --- a/hyper/query +++ b/hyper/query @@ -1,8 +1,13 @@ #!/bin/bash -# Reads a SQL query from stdin, runs it via tableau hyperapi against -# hits.hyper. +# Reads a SQL query from stdin, runs it once against hits.hyper on the +# PERSISTENT Hyper server started by ./start (descriptor in server.endpoint). # Stdout: query result. # Stderr: query runtime in fractional seconds on the last line. +# +# The benchmark driver calls this once per try (BENCH_TRIES). Because every +# call connects to the SAME long-lived server, the buffer pool stays warm +# across tries: try 1 (right after the driver's stop/drop_caches/start cold +# cycle) is cold, tries 2..N are genuinely hot. See issue #936. set -e # shellcheck disable=SC1091 @@ -17,16 +22,19 @@ cat > "$query_file" python3 - "$query_file" <<'PY' import sys import timeit -from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode +from tableauhyperapi import Connection, Endpoint with open(sys.argv[1]) as f: query = f.read() -with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: - with Connection(hyper.endpoint, 'hits.hyper', CreateMode.NONE) as connection: - start = timeit.default_timer() - rows = connection.execute_list_query(query) - end = timeit.default_timer() +with open("server.endpoint") as f: + descriptor = f.read().strip() + +endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench") +with Connection(endpoint, 'hits.hyper') as connection: + start = timeit.default_timer() + rows = connection.execute_list_query(query) + end = timeit.default_timer() for r in rows: print(r) diff --git a/hyper/results/20260630/c6a.2xlarge.json b/hyper/results/20260630/c6a.2xlarge.json new file mode 100644 index 0000000000..2f5f5198dc --- /dev/null +++ b/hyper/results/20260630/c6a.2xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper", + "date": "2026-06-30", + "machine": "c6a.2xlarge", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented"], + "load_time": 657, + "data_size": 18959040512, + "result": [ + [0.075, 0.012, 0.012], + [0.186, 0.019, 0.02], + [0.425, 0.059, 0.059], + [1.077, 0.059, 0.059], + [1.307, 1.011, 1], + [2.044, 0.399, 0.382], + [0.163, 0.002, 0.002], + [0.193, 0.02, 0.02], + [2.361, 1.337, 1.351], + [3.716, 1.493, 1.576], + [1.201, 0.09, 0.09], + [1.211, 0.094, 0.094], + [2.168, 0.664, 0.676], + [5.091, 1.692, 1.657], + [2.221, 0.71, 0.722], + [1.785, 1.246, 1.254], + [4.782, 1.83, 1.837], + [4.553, 1.652, 1.675], + [9.241, 4.008, 3.919], + [0.28, 0.002, 0.002], + [14.696, 0.696, 0.691], + [16.049, 0.731, 0.695], + [16.911, 0.463, 0.455], + [8.148, 0.182, 0.163], + [1.145, 0.032, 0.023], + [1.887, 0.114, 0.113], + [1.153, 0.032, 0.021], + [15.076, 0.985, 0.985], + [17.9, 15.133, 15.225], + [1.203, 0.987, 0.992], + [4.903, 0.577, 0.569], + [8.63, 0.952, 0.944], + [9.822, 6.735, 6.763], + [15.518, 2.641, 2.562], + [15.552, 2.62, 2.706], + [1.395, 1.033, 1.034], + [0.261, 0.02, 0.019], + [0.196, 0.008, 0.007], + [0.239, 0.006, 0.006], + [0.335, 0.034, 0.035], + [0.327, 0.003, 0.003], + [0.33, 0.004, 0.004], + [0.191, 0.006, 0.006] +] +} diff --git a/hyper/results/20260630/c6a.4xlarge.json b/hyper/results/20260630/c6a.4xlarge.json new file mode 100644 index 0000000000..d5503f73ce --- /dev/null +++ b/hyper/results/20260630/c6a.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper", + "date": "2026-06-30", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented"], + "load_time": 663, + "data_size": 18959040512, + "result": [ + [0.059, 0.007, 0.007], + [0.107, 0.011, 0.01], + [0.379, 0.03, 0.031], + [1.062, 0.03, 0.031], + [0.783, 0.581, 0.569], + [1.908, 0.148, 0.143], + [0.103, 0.002, 0.002], + [0.188, 0.012, 0.011], + [2.048, 0.751, 0.747], + [3.383, 0.848, 0.828], + [1.154, 0.047, 0.046], + [1.168, 0.049, 0.049], + [2.063, 0.303, 0.298], + [4.501, 0.772, 0.767], + [2.057, 0.302, 0.295], + [1.452, 0.681, 0.679], + [4.39, 1.021, 1.019], + [4.255, 0.903, 0.913], + [8.37, 2.097, 2.094], + [0.143, 0.002, 0.002], + [14.673, 0.384, 0.386], + [16.035, 0.386, 0.384], + [16.906, 0.257, 0.255], + [7.551, 0.069, 0.066], + [1.108, 0.014, 0.028], + [1.866, 0.058, 0.059], + [1.105, 0.018, 0.012], + [15.087, 0.516, 0.515], + [12.636, 7.563, 7.587], + [0.647, 0.496, 0.496], + [4.766, 0.26, 0.258], + [8.302, 0.47, 0.47], + [7.765, 3.676, 3.671], + [15.204, 1.515, 1.489], + [15.199, 1.426, 1.427], + [0.96, 0.607, 0.613], + [0.144, 0.013, 0.013], + [0.167, 0.006, 0.006], + [0.147, 0.004, 0.004], + [0.178, 0.022, 0.021], + [0.201, 0.002, 0.003], + [0.236, 0.004, 0.003], + [0.134, 0.005, 0.005] +] +} diff --git a/hyper/results/20260630/c6a.large.json b/hyper/results/20260630/c6a.large.json new file mode 100644 index 0000000000..8d7e6d0c03 --- /dev/null +++ b/hyper/results/20260630/c6a.large.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper", + "date": "2026-06-30", + "machine": "c6a.large", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented"], + "load_time": 751, + "data_size": 18959040512, + "result": [ + [0.093, 0.042, 0.042], + [0.536, 0.07, 0.07], + [1.443, 0.226, 0.227], + [1.999, 0.217, 0.218], + [3.504, 2.497, 2.539], + [3.26, 1.209, 1.24], + [0.484, 0.002, 0.003], + [0.545, 0.072, 0.072], + [5.183, 3.098, 3.087], + [6.733, 3.711, 3.511], + [2.752, 0.359, 0.348], + [2.845, 0.365, 0.369], + [4.826, 1.857, 1.922], + [9.548, 4.75, 4.764], + [4.983, 1.774, 1.886], + [4.811, 2.97, 2.949], + [9.87, 5.241, 4.858], + [9.409, 5.022, 4.315], + [113.284, 116.505, 118.841], + [0.83, 0.004, 0.004], + [21.106, 22.444, 22.489], + [22.357, 23.677, 26.324], + [23.549, 21.127, 21.931], + [14.271, 3.48, 0.389], + [1.967, 0.113, 0.055], + [3.511, 0.407, 0.417], + [2.072, 0.06, 0.063], + [22.159, 23.71, 23.32], + [72.436, 70.068, 89.195], + [4.498, 3.932, 3.931], + [6.93, 1.657, 1.645], + [10.285, 3.722, 3.53], + [153.699, 150.025, 152.315], + [336.556, 410.162, 418.082], + [464.888, 415.227, 447.73], + [3.643, 2.298, 2.317], + [0.777, 0.051, 0.051], + [0.58, 0.019, 0.019], + [0.603, 0.008, 0.009], + [1.032, 0.108, 0.096], + [1.015, 0.004, 0.004], + [1.029, 0.007, 0.006], + [0.546, 0.013, 0.013] +] +} diff --git a/hyper/results/20260630/c6a.metal.json b/hyper/results/20260630/c6a.metal.json new file mode 100644 index 0000000000..003cce8dda --- /dev/null +++ b/hyper/results/20260630/c6a.metal.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper", + "date": "2026-06-30", + "machine": "c6a.metal", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented"], + "load_time": 423, + "data_size": 18959040512, + "result": [ + [0.043, 0.007, 0.007], + [0.153, 0.016, 0.015], + [0.575, 0.008, 0.008], + [1.055, 0.01, 0.009], + [0.534, 0.107, 0.107], + [2.113, 0.062, 0.061], + [0.082, 0.002, 0.002], + [0.166, 0.008, 0.007], + [1.744, 0.168, 0.162], + [3.127, 0.18, 0.177], + [1.152, 0.022, 0.02], + [1.156, 0.02, 0.02], + [1.926, 0.09, 0.08], + [4.044, 0.202, 0.205], + [1.935, 0.093, 0.095], + [1.207, 0.165, 0.173], + [4.039, 0.192, 0.203], + [4.04, 0.172, 0.161], + [7.906, 0.629, 0.648], + [0.137, 0.003, 0.002], + [14.65, 0.132, 0.119], + [16.007, 0.096, 0.087], + [16.869, 0.074, 0.063], + [12.042, 0.081, 0.06], + [1.078, 0.018, 0.031], + [1.862, 0.016, 0.016], + [1.091, 0.021, 0.009], + [15.061, 0.09, 0.081], + [12.65, 0.862, 0.797], + [0.226, 0.082, 0.074], + [4.645, 0.072, 0.072], + [8.084, 0.127, 0.12], + [6.352, 0.859, 0.881], + [14.888, 0.345, 0.32], + [14.863, 0.321, 0.3], + [0.792, 0.146, 0.128], + [0.183, 0.012, 0.013], + [0.155, 0.007, 0.007], + [0.131, 0.006, 0.008], + [0.158, 0.028, 0.024], + [0.218, 0.007, 0.009], + [0.168, 0.006, 0.006], + [0.102, 0.007, 0.007] +] +} diff --git a/hyper/results/20260630/c6a.xlarge.json b/hyper/results/20260630/c6a.xlarge.json new file mode 100644 index 0000000000..8522c22b34 --- /dev/null +++ b/hyper/results/20260630/c6a.xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper", + "date": "2026-06-30", + "machine": "c6a.xlarge", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented"], + "load_time": 621, + "data_size": 18959040512, + "result": [ + [0.072, 0.022, 0.022], + [0.298, 0.036, 0.036], + [0.751, 0.115, 0.115], + [1.068, 0.109, 0.11], + [1.922, 1.414, 1.412], + [2.132, 0.659, 0.649], + [0.255, 0.002, 0.002], + [0.303, 0.037, 0.037], + [2.875, 1.89, 1.839], + [4.02, 2.133, 2.197], + [1.416, 0.162, 0.159], + [1.443, 0.167, 0.164], + [2.515, 0.97, 0.969], + [5.803, 2.622, 2.598], + [2.619, 1.042, 1.03], + [2.577, 1.718, 1.762], + [5.156, 2.749, 2.757], + [4.804, 2.43, 2.438], + [10.239, 6.051, 6.132], + [0.454, 0.003, 0.003], + [15.133, 1.131, 1.131], + [17.375, 1.141, 1.132], + [19.453, 0.741, 0.738], + [12.448, 0.206, 0.202], + [1.11, 0.038, 0.047], + [1.891, 0.207, 0.206], + [1.145, 0.044, 0.042], + [15.645, 1.761, 1.761], + [34.636, 29.731, 29.719], + [2.267, 1.966, 1.977], + [4.985, 0.906, 0.903], + [8.785, 1.412, 1.394], + [135.883, 136.003, 136.527], + [16.788, 4.316, 4.056], + [16.944, 4.076, 4.064], + [2.067, 1.406, 1.423], + [0.421, 0.029, 0.029], + [0.33, 0.011, 0.011], + [0.346, 0.006, 0.005], + [0.545, 0.056, 0.055], + [0.552, 0.003, 0.003], + [0.551, 0.005, 0.004], + [0.302, 0.008, 0.008] +] +} diff --git a/hyper/results/20260630/c7a.metal-48xl.json b/hyper/results/20260630/c7a.metal-48xl.json new file mode 100644 index 0000000000..2621ef1275 --- /dev/null +++ b/hyper/results/20260630/c7a.metal-48xl.json @@ -0,0 +1,57 @@ +{ + "system": "Salesforce Hyper", + "date": "2026-06-30", + "machine": "c7a.metal-48xl", + "cluster_size": 1, + "proprietary": "yes", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented"], + "load_time": 352, + "data_size": 18959040512, + "result": [ + [0.031, 0.007, 0.007], + [0.089, 0.009, 0.009], + [0.368, 0.007, 0.009], + [1.044, 0.009, 0.009], + [0.496, 0.082, 0.081], + [1.815, 0.034, 0.034], + [0.082, 0.003, 0.003], + [0.097, 0.008, 0.01], + [1.744, 0.112, 0.112], + [3.078, 0.151, 0.139], + [1.148, 0.014, 0.014], + [1.159, 0.019, 0.028], + [1.918, 0.052, 0.05], + [4.018, 0.137, 0.129], + [1.917, 0.062, 0.058], + [1.146, 0.108, 0.11], + [4.032, 0.168, 0.164], + [3.99, 0.124, 0.118], + [7.868, 0.518, 0.532], + [0.225, 0.002, 0.002], + [14.656, 0.073, 0.046], + [16.025, 0.065, 0.059], + [16.887, 0.044, 0.038], + [13.302, 0.1, 0.073], + [1.173, 0.009, 0.02], + [1.866, 0.013, 0.014], + [1.128, 0.017, 0.034], + [15.071, 0.079, 0.074], + [12.623, 0.592, 0.593], + [0.245, 0.047, 0.046], + [4.65, 0.054, 0.051], + [8.079, 0.084, 0.079], + [6.029, 0.623, 0.615], + [14.828, 0.208, 0.195], + [14.853, 0.224, 0.195], + [0.719, 0.089, 0.09], + [0.164, 0.012, 0.01], + [0.128, 0.006, 0.006], + [0.156, 0.006, 0.005], + [0.235, 0.019, 0.02], + [0.23, 0.008, 0.008], + [0.199, 0.005, 0.005], + [0.125, 0.007, 0.006] +] +} diff --git a/hyper/start b/hyper/start index 06bd986563..0dddc07e3f 100755 --- a/hyper/start +++ b/hyper/start @@ -1,2 +1,86 @@ #!/bin/bash -exit 0 +# Launch ONE long-lived Hyper server (hyperd) and publish its connection +# descriptor to server.endpoint. Every ./query invocation then connects to +# this single persistent process instead of spawning its own. +# +# To keep the buffer pool warm across the SEPARATE processes that each ./query +# spawns, the supervisor also holds a keep-alive connection that keeps +# hits.hyper attached for the server's whole lifetime. Without it, hits.hyper +# would be detached the moment a per-try ./query process exits and its pages +# evicted, so tries 2..N would re-read from a cold pool. (On the very first +# ./start, before ./load has created hits.hyper, there is nothing to attach; +# the keep-alive is established by the next cold-cycle ./start once the file +# exists.) +set -e + +# shellcheck disable=SC1091 +source myenv/bin/activate + +# Already running? The pidfile + a live process is authoritative. +if [ -f server.pid ] && kill -0 "$(cat server.pid 2>/dev/null)" 2>/dev/null; then + exit 0 +fi + +# Clean up stale artifacts from a previous (possibly crashed) server. +rm -f server.pid server.endpoint + +# Background a supervisor that opens HyperProcess, writes the descriptor, +# attaches hits.hyper (if present) to keep it warm, then blocks until ./stop +# signals it. nohup so it survives this script exiting; $! is the supervisor +# PID we kill in ./stop. +nohup python3 - >server.log 2>&1 <<'PY' & +import os +import signal +import sys +from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode + + +def _terminate(*_): + # Raise SystemExit so the `with HyperProcess` block exits cleanly and + # hyperd is shut down with us (it is terminated when its controlling + # process exits). + sys.exit(0) + + +signal.signal(signal.SIGTERM, _terminate) +signal.signal(signal.SIGINT, _terminate) + +with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper: + # Keep-alive: hold hits.hyper attached for the server's lifetime so its + # buffer pool isn't torn down between per-try ./query processes. Skipped + # on the pre-load ./start (file not yet created). Established BEFORE + # publishing the endpoint so ./check never sees a "ready" server that is + # actually still mid-attach (or about to die on a bad attach). + keepalive = None + if os.path.exists("hits.hyper"): + keepalive = Connection(hyper.endpoint, "hits.hyper", CreateMode.NONE) + + # Publish the descriptor atomically so ./check and ./query never read a + # half-written file. + with open("server.endpoint.tmp", "w") as f: + f.write(hyper.endpoint.connection_descriptor) + os.replace("server.endpoint.tmp", "server.endpoint") + + try: + # Block until a signal arrives; loop so a stray signal can't tear the + # server down (only the handler's sys.exit does). + while True: + signal.pause() + finally: + if keepalive is not None: + keepalive.close() +PY +echo $! > server.pid + +# Give the supervisor a moment to publish the endpoint. The benchmark driver +# also runs ./check in a loop afterwards, so this is just a fast-path / clean +# error rather than the authoritative readiness gate. +for _ in $(seq 1 60); do + if [ -s server.endpoint ]; then + exit 0 + fi + sleep 1 +done + +echo "hyper: server did not publish server.endpoint within 60s" >&2 +exit 1 diff --git a/hyper/stop b/hyper/stop index 06bd986563..cb2bbedc17 100755 --- a/hyper/stop +++ b/hyper/stop @@ -1,2 +1,24 @@ #!/bin/bash +# Stop the persistent Hyper server started by ./start. SIGTERM the supervisor +# (see ./start); its handler exits the `with HyperProcess` block, which shuts +# down hyperd. Idempotent: a missing/stale pidfile is not an error. +set -e + +if [ -f server.pid ]; then + pid="$(cat server.pid 2>/dev/null || true)" + if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then + kill "$pid" 2>/dev/null || true + # Wait for the supervisor (and thus hyperd) to actually exit so the + # benchmark driver's drop_caches isn't defeated by pages still pinned + # by a live mmap. + for _ in $(seq 1 60); do + kill -0 "$pid" 2>/dev/null || break + sleep 1 + done + # Still alive after 60s? Force it. + kill -9 "$pid" 2>/dev/null || true + fi +fi + +rm -f server.pid server.endpoint exit 0