diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ccfaa64..b1856df8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,10 +17,10 @@ defaults: jobs: # JDK 8 is the source of truth: the client ships as a Java 8 artifact # (io.questdb:questdb-client) and is released from JDK 8, so on JDK 8 it must - # compile, the full test suite must pass against the committed native - # libraries, and the javadoc jar must build (-P javadoc attaches it at the - # package phase). The committed native .so/.dylib/.dll are enough -- the only - # git submodule (zstd) is needed solely for C++ native rebuilds, not here. + # compile, the full test suite must pass, and the javadoc jar must build + # (-P javadoc attaches it at the package phase). The native libraries are no + # longer committed, so this job compiles libquestdb.so from source (hence the + # zstd submodule + cmake/nasm/build-essential toolchain) before the tests run. build-jdk8: name: Build, test & javadoc (JDK 8) runs-on: ubuntu-latest @@ -28,6 +28,9 @@ jobs: steps: - name: Check out uses: actions/checkout@v4 + with: + # zstd is required to compile the native library. + submodules: recursive - name: Set up JDK 8 uses: actions/setup-java@v4 @@ -36,6 +39,23 @@ jobs: java-version: "8" cache: maven + - name: Install native build toolchain + run: sudo apt-get update && sudo apt-get install -y cmake nasm build-essential + + - name: Build native libquestdb.so + # JAVA_HOME points at the JDK 8 above, so the lib is compiled against the + # Java 8 JNI headers -- the artifact's floor. Copy it into src resources + # (not target/) so it survives the `mvn clean` in the next step and gets + # packaged + loaded via the production bin/ path. + run: | + cd core + cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S. + cmake --build cmake-build-release --config Release + test -f target/classes/io/questdb/client/bin-local/libquestdb.so + mkdir -p src/main/resources/io/questdb/client/bin/linux-x86-64 + cp target/classes/io/questdb/client/bin-local/libquestdb.so \ + src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so + - name: Compile, test, and build javadoc run: mvn -B -ntp -P javadoc clean install diff --git a/.github/workflows/rebuild_native_libs.yml b/.github/workflows/rebuild_native_libs.yml index 026d3c3e..26b25dbc 100644 --- a/.github/workflows/rebuild_native_libs.yml +++ b/.github/workflows/rebuild_native_libs.yml @@ -68,57 +68,38 @@ jobs: key: nativelibs-osx-${{ github.sha }} build-all-linux-x86-64: runs-on: ubuntu-latest - # manylinux2014 is a container with new-ish compilers and tools, but old glibc - 2.17 - # 2.17 is old enough to be compatible with most Linux distributions out there + # manylinux_2_28 (glibc 2.28) replaces the previous manylinux2014 (glibc + # 2.17) container: GitHub Actions now forces actions (checkout, cache) onto + # Node 24, whose binary requires glibc >= 2.27, so it can no longer run + # inside the glibc-2.17 image (the old Node-20-glibc-217 override hack only + # patched /__e/node20, not /__e/node24). 2.28 still runs stock Node 24 and + # matches the linux-aarch64 job, which already ships glibc-2.28 binaries. + # + # NOTE: the build container's glibc (2.28) does NOT dictate the artifact's + # runtime glibc floor. clock_gettime is pinned back to GLIBC_2.2.5 via + # src/main/c/share/glibc_compat.h so the linux-x86-64 .so keeps loading on + # glibc 2.14+ (its floor is memcpy@GLIBC_2.14), unchanged from before the + # container move. If you add a symbol with a higher version node here, the + # floor will rise -- check with: objdump -T libquestdb.so | grep GLIBC_. container: - image: quay.io/pypa/manylinux2014_x86_64 - volumes: - - /node20217:/node20217 - - /node20217:/__e/node20 + image: quay.io/pypa/manylinux_2_28_x86_64 steps: - - name: Install tools, most are needed to build nasm - run: | - ldd --version - yum update -y - yum install 'perl(Env)' perl-Font-TTF perl-Sort-Versions gcc wget perf asciidoc xmlto ghostscript adobe-source-sans-pro-fonts adobe-source-code-pro-fonts rpm-build zstd curl -y - - name: Build nasm - # we need nasm 2.14+ due to this bug https://bugzilla.nasm.us/show_bug.cgi?id=3392205 - # manylinux2014 distribution includes nasm 2.10 - # the nasm project itself provides RPMs, but they built against a newer glibc and other dependencies too - # thus we take src.rpm from nasm project and rebuild it in the manylinux2014 container - # this way we get a nasm binary that is compatible with the manylinux2014 environment - run: | - wget https://www.nasm.us/pub/nasm/releasebuilds/2.16.03/linux/nasm-2.16.03-0.fc39.src.rpm - rpmbuild --rebuild ./nasm-2.16.03-0.fc39.src.rpm - rpm -i ~/rpmbuild/RPMS/x86_64/nasm-2.16.03-0.el7.x86_64.rpm - - name: Install Node.js 20 glibc2.17 - # A hack to override default nodejs 20 to a build compatible with older glibc. - # Inspired by https://github.com/pytorch/test-infra/pull/5959 If it's good for pytorch, it's good for us too! :) - # Q: Why do we need this hack at all? A: Because many github actions, include action/checkout@v4, depend on nodejs 20. - # GitHub Actions runner provides a build of nodejs 20 that requires a newer glibc than manylinux2014 has. - # Thus we download a build of nodejs 20 that is compatible with manylinux2014 and override the default one. - run: | - curl -LO https://unofficial-builds.nodejs.org/download/release/v20.9.0/node-v20.9.0-linux-x64-glibc-217.tar.xz - tar -xf node-v20.9.0-linux-x64-glibc-217.tar.xz --strip-components 1 -C /node20217 - ldd /__e/node20/bin/node - uses: actions/checkout@v4 with: submodules: true - - name: Install up-to-date CMake + - name: Install tooling run: | - wget -nv https://github.com/Kitware/CMake/releases/download/v3.29.2/cmake-3.29.2-linux-x86_64.tar.gz - tar -zxf cmake-3.29.2-linux-x86_64.tar.gz - echo "PATH=`pwd`/cmake-3.29.2-linux-x86_64/bin/:$PATH" >> "$GITHUB_ENV" + yum update -y + yum install wget nasm zstd -y - name: Install GraalVM JDK 25 (for jni.h) run: | - wget -nv -O graalvm.tar.gz https://download.oracle.com/graalvm/25/latest/graalvm-jdk-25_linux-x64_bin.tar.gz + wget -v --timeout=180 -O graalvm.tar.gz https://download.oracle.com/graalvm/25/latest/graalvm-jdk-25_linux-x64_bin.tar.gz mkdir graalvm tar xfz graalvm.tar.gz -C graalvm --strip-components=1 echo "JAVA_HOME=`pwd`/graalvm" >> "$GITHUB_ENV" - name: Generate Makefiles run: | cd ./core - # git submodule update --init cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S. - name: Build linux-x86-64 CXX Library run: | diff --git a/ci/build_native.yaml b/ci/build_native.yaml new file mode 100644 index 00000000..a831e58d --- /dev/null +++ b/ci/build_native.yaml @@ -0,0 +1,92 @@ +# Builds the native libquestdb shared library on the test runner itself. +# +# The Linux (.so) and Windows (.dll) binaries are no longer committed to the +# repository -- they are produced and committed only by the release +# "Build and Push Release CXX Libraries" GitHub Action. So the test CI has to +# compile them locally before running the tests. +# +# All three platforms are built on their own native runner: Linux (.so), +# Windows (.dll) and macOS (.dylib). None of these binaries are committed. +# +# CMake writes the artifact to: +# core/target/classes/io/questdb/client/bin-local/libquestdb. +# which io.questdb.client.std.Os loads first (the "dev CXX lib" path), so the +# client tests pick it up directly. We additionally copy it into +# core/src/main/resources/io/questdb/client/bin//libquestdb. +# so that `mvn install` packages it into the client jar exactly like the +# committed binary used to be -- this is what the downstream QuestDB OSS server +# tests load from the installed jar. +# +# JAVA_HOME (set to GraalVM JDK 25 by setup.yaml) provides jni.h / jni_md.h: +# - Linux: $JAVA_HOME/include + $JAVA_HOME/include/linux +# - macOS: $JAVA_HOME/include + $JAVA_HOME/include/darwin +# - Windows: %JAVA_HOME%\include + %JAVA_HOME%\include\win32 +steps: + - bash: | + set -eux + git submodule update --init --recursive core/src/main/c/share/zstd + displayName: "Init zstd submodule" + + - bash: | + set -eux + sudo apt-get update + sudo apt-get install -y cmake nasm build-essential + cd core + cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S. + cmake --build cmake-build-release --config Release + lib="target/classes/io/questdb/client/bin-local/libquestdb.so" + test -f "$lib" + # Fail fast if the linker left an unresolved dependency in the .so. + if ldd "$lib" | grep -i "not found"; then + echo "libquestdb.so has unresolved dependencies" + exit 1 + fi + mkdir -p src/main/resources/io/questdb/client/bin/linux-x86-64 + cp "$lib" src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so + displayName: "Build native libquestdb.so (Linux x86-64)" + condition: eq(variables['Agent.OS'], 'Linux') + + - bash: | + set -eux + command -v cmake >/dev/null 2>&1 || brew install cmake + command -v nasm >/dev/null 2>&1 || brew install nasm + # darwin-aarch64 on Apple silicon agents, darwin-x86-64 on Intel agents. + case "$(uname -m)" in + arm64) platform="darwin-aarch64" ;; + x86_64) platform="darwin-x86-64" ;; + *) echo "unsupported macOS arch: $(uname -m)"; exit 1 ;; + esac + cd core + # Pin the dylib's minimum macOS version so the artifact stays loadable on + # older macOS, matching the release build. + export MACOSX_DEPLOYMENT_TARGET=13.0 + cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S. + cmake --build cmake-build-release --config Release + lib="target/classes/io/questdb/client/bin-local/libquestdb.dylib" + test -f "$lib" + mkdir -p "src/main/resources/io/questdb/client/bin/${platform}" + cp "$lib" "src/main/resources/io/questdb/client/bin/${platform}/libquestdb.dylib" + displayName: "Build native libquestdb.dylib (macOS)" + condition: eq(variables['Agent.OS'], 'Darwin') + + - powershell: | + $ErrorActionPreference = "Stop" + # The CMake build is GCC/MinGW based (gcc flags, -static-libgcc/-static-libstdc++), + # so build the Windows DLL with the MinGW-w64 toolchain + NASM, not MSVC. + choco install -y --no-progress nasm mingw + Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" + refreshenv + # choco's nasm package does not put nasm on PATH; add it explicitly. + $env:PATH = "C:\Program Files\NASM;" + $env:PATH + gcc --version + mingw32-make --version + nasm --version + cd core + cmake -G "MinGW Makefiles" -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S . + cmake --build cmake-build-release --config Release + $lib = "target/classes/io/questdb/client/bin-local/libquestdb.dll" + if (!(Test-Path $lib)) { throw "native build produced no $lib" } + New-Item -ItemType Directory -Force -Path "src/main/resources/io/questdb/client/bin/windows-x86-64" | Out-Null + Copy-Item $lib "src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll" -Force + displayName: "Build native libquestdb.dll (Windows x86-64)" + condition: eq(variables['Agent.OS'], 'Windows_NT') diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml index 3268313b..86d65410 100644 --- a/ci/run_tests_pipeline.yaml +++ b/ci/run_tests_pipeline.yaml @@ -54,10 +54,6 @@ stages: imageName: "macos-15-arm64" poolName: "Azure Pipelines" jdkArch: "arm64" - mac-x64: - imageName: "macos-15" - poolName: "Azure Pipelines" - jdkArch: "x64" windows-msvc-2022-x64: imageName: "windows-2022" poolName: "Azure Pipelines" @@ -82,6 +78,13 @@ stages: maven | "$(Agent.OS)" path: $(HOME)/.m2/repository displayName: "Cache Maven repository" + # Compile the native libquestdb shared library on the runner; no + # platform's binary is committed anymore. Must run before the client + # jar is installed so the freshly built lib is packaged into it. The + # template builds the right artifact for the current native agent -- + # Linux (.so), Windows (.dll), and macOS (.dylib) alike (see + # build_native.yaml). + - template: build_native.yaml - bash: | BRANCH="${SYSTEM_PULLREQUEST_SOURCEBRANCH:-$BUILD_SOURCEBRANCHNAME}" BRANCH="${BRANCH#refs/heads/}" @@ -149,6 +152,9 @@ stages: maven | "$(Agent.OS)" path: $(HOME)/.m2/repository displayName: "Cache Maven repository" + # Native binaries are no longer committed; compile libquestdb.so on the + # runner so the coverage test run can load it (same as BuildAndTest). + - template: build_native.yaml - task: Maven@3 displayName: "Run tests with coverage" inputs: diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 3538aa7f..29611089 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -48,6 +48,7 @@ set( src/main/c/share/files.h src/main/c/share/net.h src/main/c/share/os.h + src/main/c/share/glibc_compat.h src/main/c/share/ooo.cpp src/main/c/share/cpprt_overrides.h src/main/c/share/cpprt_overrides.cpp diff --git a/core/src/main/c/share/glibc_compat.h b/core/src/main/c/share/glibc_compat.h new file mode 100644 index 00000000..24ea6211 --- /dev/null +++ b/core/src/main/c/share/glibc_compat.h @@ -0,0 +1,53 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +#ifndef QUESTDB_GLIBC_COMPAT_H +#define QUESTDB_GLIBC_COMPAT_H + +// Pin clock_gettime() to its original GLIBC_2.2.5 symbol version. +// +// glibc 2.17 moved clock_gettime() out of librt and into libc, exporting it +// under a NEW version node: clock_gettime@GLIBC_2.17. The release binaries are +// built in a modern toolchain container (CI uses manylinux_2_28 / glibc 2.28), +// so without this pin the linker binds our calls to clock_gettime@GLIBC_2.17. +// That single symbol raises the whole library's glibc floor to 2.17 and makes +// it fail to LOAD on hosts running glibc 2.14-2.16 with: +// +// version `GLIBC_2.17' not found (required by libquestdb.so) +// +// The original clock_gettime@GLIBC_2.2.5 symbol is still exported as a compat +// symbol by librt.so.1 on every glibc since (and by libc after the 2.34 librt +// merge), so forcing the reference back to it keeps the library loadable down +// to the previous floor (glibc 2.14, set by memcpy@GLIBC_2.14) with no change +// in runtime behaviour. librt is already a NEEDED dependency (CMake links rt). +// +// Scope: x86-64 glibc only. aarch64 glibc started at 2.17 and has only ever +// shipped clock_gettime in libc@GLIBC_2.17 -- there is no GLIBC_2.2.5 version +// there, so emitting the pin on aarch64 would fail the link with an undefined +// clock_gettime@GLIBC_2.2.5. The directive is a no-op on macOS/Windows. +#if defined(__linux__) && defined(__GLIBC__) && defined(__x86_64__) +__asm__(".symver clock_gettime,clock_gettime@GLIBC_2.2.5"); +#endif + +#endif // QUESTDB_GLIBC_COMPAT_H diff --git a/core/src/main/c/share/net.c b/core/src/main/c/share/net.c index 05660f2b..3b0162fc 100644 --- a/core/src/main/c/share/net.c +++ b/core/src/main/c/share/net.c @@ -33,6 +33,9 @@ #include #include #include +#include +#include +#include "glibc_compat.h" #include "net.h" #include #include "sysutil.h" @@ -298,6 +301,100 @@ JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfo return handleEintrInConnect(fd, result); } +// Waits up to timeout_millis for an in-progress non-blocking connect on fd to +// finish. Returns 0 on success, -1 on connection failure (errno set so the +// caller can read it via Os.errno()), or com_questdb_network_Net_ECONNTIMEOUT +// on timeout. +static jint awaitConnectComplete(int fd, jint timeout_millis) { + // Fix a single absolute deadline up front. Recomputing the remaining budget + // against a moving baseline on each EINTR (reset start = now, then subtract + // whole milliseconds) lets a high-frequency signal storm extend the timeout: + // under sub-millisecond interrupts every interval truncates to 0 ms, the + // budget never decrements, and poll is re-armed with the full budget each + // time. A fixed deadline is immune to interrupt frequency -- the remaining + // time can only ever decrease. + struct timespec deadline; + clock_gettime(CLOCK_MONOTONIC, &deadline); + long budget_millis = timeout_millis > 0 ? timeout_millis : 0; + deadline.tv_sec += budget_millis / 1000L; + deadline.tv_nsec += (budget_millis % 1000L) * 1000000L; + if (deadline.tv_nsec >= 1000000000L) { + deadline.tv_sec += 1; + deadline.tv_nsec -= 1000000000L; + } + + for (;;) { + struct timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + // Remaining time until the deadline, truncated to whole milliseconds for + // poll(). Truncation only ever under-shoots by < 1 ms (it never extends + // the wait), which keeps the timeout a strict upper bound. + long remaining_millis = (deadline.tv_sec - now.tv_sec) * 1000L + + (deadline.tv_nsec - now.tv_nsec) / 1000000L; + if (remaining_millis <= 0) { + errno = ETIMEDOUT; + return com_questdb_network_Net_ECONNTIMEOUT; + } + + struct pollfd pfd; + pfd.fd = fd; + pfd.events = POLLOUT; + pfd.revents = 0; + + int rc = poll(&pfd, 1, (int) remaining_millis); + if (rc > 0) { + // The connect attempt has finished one way or another; the only + // authoritative result is SO_ERROR (POLLOUT alone does not mean + // success -- a refused connection is also reported as writable). + int so_error = 0; + socklen_t len = sizeof(so_error); + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len) < 0) { + return -1; + } + if (so_error != 0) { + errno = so_error; + return -1; + } + return 0; + } + if (rc == 0) { + errno = ETIMEDOUT; + return com_questdb_network_Net_ECONNTIMEOUT; + } + if (errno != EINTR) { + return -1; + } + // Interrupted by a signal: loop and recompute the remaining time against + // the fixed deadline. EINTR storms cannot extend the timeout. + } +} + +JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfoTimeout + (JNIEnv *e, jclass cl, jint fd, jlong lpAddrInfo, jint timeoutMillis) { + struct addrinfo *addr = (struct addrinfo *) lpAddrInfo; + + // Switch to non-blocking BEFORE connect so connect() returns immediately + // with EINPROGRESS instead of blocking on the OS connect timeout. The + // socket is left non-blocking on success, matching the post-connect + // configureNonBlocking() the callers already perform. + int flags = fcntl((int) fd, F_GETFL, 0); + if (flags < 0) { + return -1; + } + if (fcntl((int) fd, F_SETFL, flags | O_NONBLOCK) < 0) { + return -1; + } + + int result = connect((int) fd, addr->ai_addr, (int) addr->ai_addrlen); + if (result == 0) { + return 0; // connected immediately (e.g. loopback) + } + if (errno == EINPROGRESS || errno == EINTR || errno == EWOULDBLOCK) { + return awaitConnectComplete((int) fd, timeoutMillis); + } + return -1; // immediate failure, errno set +} + JNIEXPORT void JNICALL Java_io_questdb_client_network_Net_freeAddrInfo0 (JNIEnv *e, jclass cl, jlong address) { if (address != 0) { diff --git a/core/src/main/c/share/net.h b/core/src/main/c/share/net.h index 13adafcb..27143639 100644 --- a/core/src/main/c/share/net.h +++ b/core/src/main/c/share/net.h @@ -13,6 +13,8 @@ extern "C" { #define com_questdb_network_Net_EPEERDISCONNECT -1L #undef com_questdb_network_Net_EOTHERDISCONNECT #define com_questdb_network_Net_EOTHERDISCONNECT -2L +#undef com_questdb_network_Net_ECONNTIMEOUT +#define com_questdb_network_Net_ECONNTIMEOUT -3L /* * Class: io_questdb_client_network_Net diff --git a/core/src/main/c/share/os.c b/core/src/main/c/share/os.c index 7262e3f4..ee0b1f69 100644 --- a/core/src/main/c/share/os.c +++ b/core/src/main/c/share/os.c @@ -30,6 +30,7 @@ #include #include #include +#include "glibc_compat.h" #include "../share/os.h" #ifdef __APPLE__ diff --git a/core/src/main/c/windows/net.c b/core/src/main/c/windows/net.c index c32957d4..fd290629 100644 --- a/core/src/main/c/windows/net.c +++ b/core/src/main/c/windows/net.c @@ -160,6 +160,66 @@ JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfo return res; } +JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfoTimeout + (JNIEnv *e, jclass cl, jint fd, jlong lpAddrInfo, jint timeoutMillis) { + struct addrinfo *addr = (struct addrinfo *) lpAddrInfo; + SOCKET s = (SOCKET) fd; + + // Switch to non-blocking BEFORE connect so it returns immediately with + // WSAEWOULDBLOCK instead of blocking on the OS connect timeout. + u_long mode = 1; + if (ioctlsocket(s, FIONBIO, &mode) != 0) { + SaveLastError(); + return -1; + } + + int res = connect(s, addr->ai_addr, (int) addr->ai_addrlen); + if (res == 0) { + return 0; // connected immediately (e.g. loopback) + } + if (WSAGetLastError() != WSAEWOULDBLOCK) { + SaveLastError(); + return -1; + } + + fd_set writefds, exceptfds; + FD_ZERO(&writefds); + FD_ZERO(&exceptfds); + FD_SET(s, &writefds); + FD_SET(s, &exceptfds); + + struct timeval tv; + tv.tv_sec = timeoutMillis / 1000; + tv.tv_usec = (timeoutMillis % 1000) * 1000; + + // Winsock signals a failed non-blocking connect via the exception set. + int sel = select(0, NULL, &writefds, &exceptfds, &tv); + if (sel == 0) { + WSASetLastError(WSAETIMEDOUT); + SaveLastError(); + return com_questdb_network_Net_ECONNTIMEOUT; + } + if (sel == SOCKET_ERROR) { + SaveLastError(); + return -1; + } + + int so_error = 0; + int len = sizeof(so_error); + if (FD_ISSET(s, &exceptfds) || !FD_ISSET(s, &writefds)) { + getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &so_error, &len); + WSASetLastError(so_error != 0 ? so_error : WSAECONNREFUSED); + SaveLastError(); + return -1; + } + if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &so_error, &len) == 0 && so_error != 0) { + WSASetLastError(so_error); + SaveLastError(); + return -1; + } + return 0; +} + JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_configureNonBlocking (JNIEnv *e, jclass cl, jint fd) { u_long mode = 1; diff --git a/core/src/main/java/io/questdb/client/Completion.java b/core/src/main/java/io/questdb/client/Completion.java index 0888370d..615799e0 100644 --- a/core/src/main/java/io/questdb/client/Completion.java +++ b/core/src/main/java/io/questdb/client/Completion.java @@ -36,15 +36,22 @@ * {@link #await(long, TimeUnit)} returning {@code true}, or an explicit * {@link #cancel()} that races to terminal). *

- * Signaling: the Completion is signaled from the I/O thread of the pooled - * query client when the handler's terminal callback ({@code onEnd}, - * {@code onError}, or {@code onExecDone}) returns. + * Signaling: the Completion is signaled on the worker (dispatch) thread of the + * pooled query client when the handler's terminal callback ({@code onEnd}, + * {@code onError}, or {@code onExecDone}) returns -- that callback runs inline + * on the worker thread, not on the I/O thread. Because of this, {@code await()} + * must never be called from inside a handler (it would self-deadlock on the + * worker thread); use {@link #cancel()} to stop a query from inside a handler. */ public interface Completion { /** * Blocks until the query completes. Rethrows any server-reported failure * as a {@link QueryException}. Returns normally on success. + *

+ * Must NOT be called from a result handler (it runs on the worker thread + * and would self-deadlock); calling it there throws + * {@link IllegalStateException}. Use {@link #cancel()} instead. * * @throws QueryException if the server reported an error or * {@link #cancel()} won the race diff --git a/core/src/main/java/io/questdb/client/HttpClientConfiguration.java b/core/src/main/java/io/questdb/client/HttpClientConfiguration.java index c644f698..587b8111 100644 --- a/core/src/main/java/io/questdb/client/HttpClientConfiguration.java +++ b/core/src/main/java/io/questdb/client/HttpClientConfiguration.java @@ -38,6 +38,15 @@ default boolean fixBrokenConnection() { return true; } + /** + * Upper bound, in milliseconds, on establishing the TCP connection. When + * {@code <= 0} (the default) no application-level connect timeout is applied + * and the connect falls back to the OS-level TCP connect timeout. + */ + default int getConnectTimeout() { + return 0; + } + default EpollFacade getEpollFacade() { return EpollFacadeImpl.INSTANCE; } diff --git a/core/src/main/java/io/questdb/client/Query.java b/core/src/main/java/io/questdb/client/Query.java index f6832e84..c2a752f7 100644 --- a/core/src/main/java/io/questdb/client/Query.java +++ b/core/src/main/java/io/questdb/client/Query.java @@ -27,19 +27,29 @@ import io.questdb.client.cutlass.qwp.client.QwpBindSetter; import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; +import java.io.Closeable; + /** - * Per-thread, reusable builder for one query. Obtained from - * {@link QuestDB#query()}: every call on the same thread returns the same - * instance, reset to empty. + * A query handle leased from the {@link QuestDB} pool via + * {@link QuestDB#borrowQuery()}. The handle holds one pooled query client (one + * WebSocket + I/O thread) for the lifetime of the borrow; the caller MUST + * {@link #close()} it to release the client back to the pool (typically via + * try-with-resources). + *

+ * Allocation: the per-submit path is allocation-free -- the heavy query state + * is pre-allocated on the leased pool slot and reused, and {@link #submit()} + * returns this same handle as its {@link Completion}. {@code borrowQuery()} + * creates one small lease handle per borrow (often scalar-replaced by the JIT + * when used with try-with-resources). *

* Lifecycle: configure with {@link #sql}, optional {@link #binds}, and - * {@link #handler}, then call {@link #submit()} to obtain a {@link Completion}. - * After the Completion terminates, the next {@code QuestDB.query()} call on - * the same thread returns this same instance with its state reset. + * {@link #handler}, then call {@link #submit()} to obtain a {@link Completion} + * and {@code await()} it before the next {@link #submit()}. *

- * Thread safety: not thread-safe. One in-flight query per thread. + * Thread safety: not thread-safe and single-flight -- one in-flight query per + * handle. To run queries concurrently, borrow one handle per concurrent query. */ -public interface Query { +public interface Query extends Closeable { /** Discards the current configuration without submitting. */ void abandon(); @@ -53,9 +63,39 @@ public interface Query { Query binds(QwpBindSetter binds); /** - * Sets the result-batch handler. The handler is invoked on the pooled - * query client's I/O thread; if it touches caller state, it is - * responsible for its own synchronization. + * Releases the leased pooled query client back to the pool. The caller + * MUST call this (typically via try-with-resources). A real disconnect only + * happens at {@link QuestDB#close()}. Idempotent. + *

+ * If a submit is still in flight (the caller never awaited, or its + * {@code await(timeout)} expired), {@code close()} cancels it and waits for + * the terminal event so the client is idle before it returns to the pool. + * That wait is bounded by {@code query_close_timeout_ms} (default 5000ms, + * see {@link QuestDBBuilder#queryCloseTimeoutMillis(long)}) and is + * interruptible -- interrupting the calling thread aborts it. If the query + * does not drain within the budget, the client is discarded rather than + * returned (its connection may carry late frames for the abandoned query), + * and the pool grows a fresh one on the next borrow. {@code close()} + * therefore never blocks the caller unbounded, even when the server is slow + * to honor the cancel. + *

+ * Must NOT be called from a result handler: handlers run on the worker + * thread, so {@code close()} would block waiting for a terminal event that + * only that thread can deliver. Calling it there throws + * {@link IllegalStateException}. Use {@link #cancel()} (non-blocking) to + * stop a query from inside a handler. + */ + @Override + void close(); + + /** + * Sets the result-batch handler. The handler is invoked on the worker + * (dispatch) thread that drives {@code execute()} -- it consumes the pooled + * query client's I/O-thread event queue inline, it does NOT run on the I/O + * thread. If it touches caller state, it is responsible for its own + * synchronization. A handler must not call the blocking {@link #close()} or + * {@link Completion#await()} (they would self-deadlock on the worker + * thread); use {@link #cancel()} to stop from inside a handler. */ Query handler(QwpColumnBatchHandler handler); @@ -65,11 +105,12 @@ public interface Query { Query sql(CharSequence sql); /** - * Submits the query for execution. Returns the {@link Completion} field - * cached on this instance; never allocates. Blocks up to the builder's - * configured acquire timeout if the query pool is exhausted. + * Submits the query for execution on the leased client. Returns this handle + * as its own {@link Completion}; never allocates. The handle is + * single-flight: {@code await()} the returned Completion before the next + * {@code submit()}. * - * @return the single-flight Completion bound to this Query instance + * @return the single-flight Completion bound to this Query handle */ Completion submit(); } diff --git a/core/src/main/java/io/questdb/client/QuestDB.java b/core/src/main/java/io/questdb/client/QuestDB.java index a608e12f..ee93afcf 100644 --- a/core/src/main/java/io/questdb/client/QuestDB.java +++ b/core/src/main/java/io/questdb/client/QuestDB.java @@ -24,8 +24,6 @@ package io.questdb.client; -import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; - import java.io.Closeable; /** @@ -34,37 +32,42 @@ * share across threads. *

* Steady-state allocation is zero: pooled instances are pre-allocated and - * reused, the per-thread {@link Query} handle is cached in a {@code ThreadLocal}, - * and the {@link Completion} associated with each query is a field on that - * cached handle. + * reused, each borrowed {@link Query} handle is a pre-allocated front bound to + * its pool slot, and the {@link Completion} associated with each query is a + * field on that handle. *

- * Configuration: use {@link #connect(CharSequence)} when the same address list - * and credentials serve both ingest and egress -- the most common case. - * Use {@link #connect(CharSequence, CharSequence)} or {@link #builder()} when - * ingest and egress endpoints differ. + * Configuration: one {@code ws}/{@code wss} string describes the whole cluster + * (a single {@code addr} server list) and both the ingest and query pools + * connect across it. Use {@link #connect(CharSequence)} for the common case, or + * {@link #builder()} for pool sizing and the ingest callbacks. To tolerate the + * server being down at startup, set {@code lazy_connect=true} in the config + * (async ingest + lazy reads; reads stay enabled and connect once the server + * is up). *

* Thread safety: instances are safe to share. {@link #borrowSender()} and - * {@link #query()} may be called concurrently from any thread; the pool + * {@link #borrowQuery()} may be called concurrently from any thread; the pool * guarantees mutual exclusion of pooled resources. */ public interface QuestDB extends Closeable { /** * Builder for advanced configuration (pool sizes, acquisition timeouts, - * differing ingest/egress configs). + * ingest callbacks). */ static QuestDBBuilder builder() { return new QuestDBBuilder(); } /** - * Connects with a single configuration string used for both ingest and - * egress. The schema must be {@code ws} or {@code wss}: QuestDB ingests and - * queries over QWP (the QuestDB WebSocket protocol), so one string - * configures both clients. + * Connects with a single configuration string for the whole QuestDB cluster, + * used for both ingest and egress. The schema must be {@code ws} or + * {@code wss}: QuestDB ingests and queries over QWP (the QuestDB WebSocket + * protocol), so one string configures both clients. List every cluster node + * in a single {@code addr} server list and both pools connect across it. *

- * Use {@link #connect(CharSequence, CharSequence)} or {@link #builder()} - * when ingest and egress use different addresses or credentials. + * Use {@link #builder()} for pool sizing and the ingest callbacks. To + * tolerate the server being down at startup, set {@code lazy_connect=true} + * in the config (async ingest + lazy reads, reads still enabled). * * @param configurationString a {@code ws}/{@code wss} config string (see * {@link Sender#fromConfig} or @@ -76,20 +79,29 @@ static QuestDB connect(CharSequence configurationString) { } /** - * Connects with explicit ingest and egress configuration strings. + * Borrows a {@link Query} handle from the pool. The caller MUST call + * {@link Query#close()} on the returned instance to release it back to the + * pool (typically via try-with-resources). The handle leases one pooled + * query client (one WebSocket + I/O thread) for the borrow's lifetime; + * submit one or more queries on it, then close it. + *

+ * Allocation: zero at steady state -- the returned instance is a + * pre-allocated handle bound to the leased pool slot. + *

+ * Blocking: blocks up to the builder's + * {@link QuestDBBuilder#acquireTimeoutMillis(long) acquire timeout} when + * the pool is exhausted; throws on timeout. + *

+ * Concurrency: a single handle is single-flight. To run queries + * concurrently, borrow one handle per concurrent query (up to + * {@code query_pool_max}). * - * @param ingestConfigurationString config for the {@link Sender} pool - * ({@link Sender#fromConfig} format) - * @param queryConfigurationString config for the query pool - * ({@link io.questdb.client.cutlass.qwp.client.QwpQueryClient#fromConfig} format) - * @return a connected QuestDB handle + * @return a Query handle leased from the pool; release with + * {@link Query#close()} + * @throws QueryException if the pool is exhausted beyond the acquire + * timeout, or if this handle is closed */ - static QuestDB connect(CharSequence ingestConfigurationString, CharSequence queryConfigurationString) { - return builder() - .ingestConfig(ingestConfigurationString) - .queryConfig(queryConfigurationString) - .build(); - } + Query borrowQuery(); /** * Borrows a {@link Sender} from the pool. The caller MUST call @@ -125,61 +137,4 @@ static QuestDB connect(CharSequence ingestConfigurationString, CharSequence quer */ @Override void close(); - - /** - * One-shot convenience for queries with no bind parameters. Equivalent to - * {@code query().sql(sql).handler(handler).submit()}. Returns the same - * thread-local {@link Completion} instance that {@link #query()} would, - * so this method is also zero-allocation at steady state. - * - * @param sql the SQL text; the buffer is not retained after submit - * @param handler the result-batch handler; invoked on the pooled query - * client's I/O thread - * @return a single-flight handle for the in-flight query - */ - Completion executeSql(CharSequence sql, QwpColumnBatchHandler handler); - - /** - * Allocates a fresh {@link Query} handle. Unlike {@link #query()}, this - * does NOT return the per-thread cached instance; every call allocates. - *

- * Use this when one thread needs to hold multiple in-flight queries - * concurrently (each {@code submit()} acquires its own worker from the - * query pool, so up to {@code queryPoolSize} concurrent queries on a - * single thread is fine). For the common case of one query at a time, - * prefer {@link #query()} -- it is allocation-free. - */ - Query newQuery(); - - /** - * Opens a query builder for the calling thread. Returns the same - * thread-local instance on every call: callers do not need to cache it - * themselves. The returned {@code Query} is in a reset state and is not - * thread-safe -- one in-flight query per thread. - *

- * For multiple concurrent in-flight queries from a single thread, use - * {@link #newQuery()} instead. - */ - Query query(); - - /** - * Releases the thread-affine {@link Sender} (if any) currently attached - * to the calling thread back to the pool. Call this on threads borrowed - * from pools you do not own (for example, Netty event loops) before they - * are recycled, to avoid pinning a {@link Sender} for the lifetime of - * a thread that no longer needs it. - */ - void releaseSender(); - - /** - * Returns a {@link Sender} pinned to the calling thread. First call on - * a thread takes one from the pool and pins it; subsequent calls on the - * same thread return the same instance. The pin is released by - * {@link #releaseSender()} or by {@link #close()} on this handle. - *

- * Use this for long-lived, dedicated producer threads where borrow/return - * overhead would dominate. For short-lived or event-loop callers, prefer - * {@link #borrowSender()}. - */ - Sender sender(); } diff --git a/core/src/main/java/io/questdb/client/QuestDBBuilder.java b/core/src/main/java/io/questdb/client/QuestDBBuilder.java index cae00942..71f78c0a 100644 --- a/core/src/main/java/io/questdb/client/QuestDBBuilder.java +++ b/core/src/main/java/io/questdb/client/QuestDBBuilder.java @@ -35,14 +35,20 @@ /** * Builder for {@link QuestDB}. Most callers use {@link QuestDB#connect(CharSequence)}; - * this builder is for pool sizing, idle/lifetime knobs, acquire timeout, - * and the case where ingest and egress configs differ. + * this builder adds pool sizing, idle/lifetime knobs, the acquire timeout, and + * the ingest callbacks. *

- * Both configs must use the {@code ws} or {@code wss} schema (QWP over - * WebSocket). A pool key (e.g. {@code sender_pool_min}) may be carried in the - * connect string or set with an explicit builder call; an explicit call always - * wins. When both connect strings carry the same pool key with different values, - * {@link #build()} fails. + * To tolerate the server being down at startup, set {@code lazy_connect=true} + * in the config: the ingest side connects asynchronously (writes buffer until + * the wire is up) and the read pool connects lazily on first use. Reads stay + * fully enabled -- they just connect once the server is available. + *

+ * One configuration string describes the whole QuestDB cluster (see + * {@link #fromConfig}): list every node in a single {@code addr} server list and + * both the ingest and query pools connect across it. The schema must be + * {@code ws} or {@code wss} (QWP over WebSocket). A pool key (e.g. + * {@code sender_pool_min}) may be carried in the connect string or set with an + * explicit builder call; an explicit call always wins. */ public final class QuestDBBuilder { @@ -52,6 +58,7 @@ public final class QuestDBBuilder { static final long DEFAULT_MAX_LIFETIME_MILLIS = 30 * 60_000L; static final int DEFAULT_POOL_MAX = 4; static final int DEFAULT_POOL_MIN = 1; + static final long DEFAULT_QUERY_CLOSE_TIMEOUT_MILLIS = 5_000; // Every valid pool value is >= 0, so -1 unambiguously marks "not set // explicitly". The public pool setters are the only writers of these @@ -59,11 +66,15 @@ public final class QuestDBBuilder { private static final int UNSET = -1; private long acquireTimeoutMillis = UNSET; + // Optional ingest-side async callbacks. Null -> each pooled Sender uses its + // loud-not-silent default. Applied to every Sender the pool builds. + private SenderConnectionListener connectionListener; + private SenderErrorHandler errorHandler; private long housekeeperIntervalMillis = UNSET; + private String config; private long idleTimeoutMillis = UNSET; - private String ingestConfig; private long maxLifetimeMillis = UNSET; - private String queryConfig; + private long queryCloseTimeoutMillis = UNSET; private int queryPoolMax = UNSET; private int queryPoolMin = UNSET; private int senderPoolMax = UNSET; @@ -85,6 +96,54 @@ public QuestDBBuilder acquireTimeoutMillis(long millis) { return this; } + /** + * Maximum time {@link Query#close()} waits for an in-flight query to drain + * (after issuing a cancel) before discarding the leased query client and + * letting the pool grow a fresh one. Bounds the close of a handle whose + * {@code submit()} is still running -- e.g. when the caller's own + * {@code await(timeout)} expired and they gave up. Defaults to 5000ms. + */ + public QuestDBBuilder queryCloseTimeoutMillis(long millis) { + if (millis < 0) { + throw new IllegalArgumentException("queryCloseTimeoutMillis must be >= 0"); + } + this.queryCloseTimeoutMillis = millis; + return this; + } + + /** + * Sets the async connection-event listener applied to every pooled ingest + * {@link Sender}. The listener observes connect / disconnect / failover + * transitions across the whole sender pool; events are delivered on the + * senders' I/O threads, so the listener must be thread-safe and must not + * block. Pass {@code null} (the default) to keep each sender's + * loud-not-silent default listener. + * + * @param listener the shared connection listener, or {@code null} for the default + * @return this instance for method chaining + */ + public QuestDBBuilder connectionListener(SenderConnectionListener listener) { + this.connectionListener = listener; + return this; + } + + /** + * Sets the async error handler applied to every pooled ingest + * {@link Sender}. The handler receives terminal/async ingest errors + * (connect-budget exhaustion, terminal upgrade failures, write errors) + * from across the whole sender pool; notifications are delivered on the + * senders' I/O threads, so the handler must be thread-safe and must not + * block. Pass {@code null} (the default) to keep each sender's + * loud-not-silent default handler. + * + * @param handler the shared error handler, or {@code null} for the default + * @return this instance for method chaining + */ + public QuestDBBuilder errorHandler(SenderErrorHandler handler) { + this.errorHandler = handler; + return this; + } + /** * Builds the {@link QuestDB} handle. Validates both connect strings up * front -- so a malformed config fails here even when both pools have @@ -101,39 +160,45 @@ public QuestDBBuilder acquireTimeoutMillis(long millis) { * and is delivered once the server acks; until then it stays preserved. */ public QuestDB build() { - if (ingestConfig == null) { - throw new IllegalStateException("ingest configuration is required; call fromConfig() or ingestConfig()"); + if (config == null) { + throw new IllegalStateException("configuration is required; call fromConfig()"); } - if (queryConfig == null) { - throw new IllegalStateException("query configuration is required; call fromConfig() or queryConfig()"); + ConfigString cs = ConfigString.parse(config); + ConfigView view = new ConfigView(cs); + // Validate the single cluster config exactly as both pools will, but + // without connecting: the full Sender parse plus validateParameters + // (ingress value keys are registry-STRING, so only the real parse + // validates their values), then the typed egress validateConfig. Each + // side applies the keys it owns and silently ignores the rest, so one + // string drives both. A malformed config therefore fails here even when + // a pool min is 0 and nothing connects. + Sender.LineSenderBuilder.validateWsConfigString(config); + QwpQueryClient.validateConfig(view, "wss".equals(cs.schema())); + + // lazy_connect: tolerate a down server at startup without disabling + // reads. The ingest side connects asynchronously (writes buffer until the + // wire is up) and the read pool defaults to min=0 -- it connects lazily + // on the first query once the server is up. Reads stay enabled. + boolean lazyConnect = view.getBool("lazy_connect", false); + String ingestConfig = config; + if (lazyConnect) { + ingestConfig = resolveLazyConnect(view); } - ConfigString ingestCs = ConfigString.parse(ingestConfig); - ConfigString queryCs = ConfigString.parse(queryConfig); - ConfigView ingestView = new ConfigView(ingestCs); - ConfigView queryView = new ConfigView(queryCs); - // Validate both connect strings exactly as the pools will, but without - // connecting. The ingest string runs the full Sender parse plus - // validateParameters -- ingress value keys are registry-STRING, so only - // the real parse validates their values. The egress string runs the - // typed validateConfig. A malformed config therefore fails here even - // when a pool min is 0 and nothing connects. - Sender.LineSenderBuilder.validateWsConfigString(ingestConfig); - QwpQueryClient.validateConfig(queryView, "wss".equals(queryCs.schema())); - - // A view carries no side; getInt/getLong read any key, so the ingest - // and query views also serve the POOL reads. - resolvePoolInt(senderPoolMin, "sender_pool_min", ingestView, queryView, DEFAULT_POOL_MIN, this::senderPoolMin); - resolvePoolInt(senderPoolMax, "sender_pool_max", ingestView, queryView, DEFAULT_POOL_MAX, this::senderPoolMax); - resolvePoolInt(queryPoolMin, "query_pool_min", ingestView, queryView, DEFAULT_POOL_MIN, this::queryPoolMin); - resolvePoolInt(queryPoolMax, "query_pool_max", ingestView, queryView, DEFAULT_POOL_MAX, this::queryPoolMax); - resolvePoolLong(acquireTimeoutMillis, "acquire_timeout_ms", ingestView, queryView, DEFAULT_ACQUIRE_TIMEOUT_MILLIS, this::acquireTimeoutMillis); - resolvePoolLong(idleTimeoutMillis, "idle_timeout_ms", ingestView, queryView, DEFAULT_IDLE_TIMEOUT_MILLIS, this::idleTimeoutMillis); - resolvePoolLong(maxLifetimeMillis, "max_lifetime_ms", ingestView, queryView, DEFAULT_MAX_LIFETIME_MILLIS, this::maxLifetimeMillis); - resolvePoolLong(housekeeperIntervalMillis, "housekeeper_interval_ms", ingestView, queryView, DEFAULT_HOUSEKEEPER_INTERVAL_MILLIS, this::housekeeperIntervalMillis); + + resolvePoolInt(senderPoolMin, "sender_pool_min", view, DEFAULT_POOL_MIN, this::senderPoolMin); + resolvePoolInt(senderPoolMax, "sender_pool_max", view, DEFAULT_POOL_MAX, this::senderPoolMax); + // lazy_connect makes the read pool lazy (min=0); without it the default min is 1. + resolvePoolInt(queryPoolMin, "query_pool_min", view, lazyConnect ? 0 : DEFAULT_POOL_MIN, this::queryPoolMin); + resolvePoolInt(queryPoolMax, "query_pool_max", view, DEFAULT_POOL_MAX, this::queryPoolMax); + resolvePoolLong(acquireTimeoutMillis, "acquire_timeout_ms", view, DEFAULT_ACQUIRE_TIMEOUT_MILLIS, this::acquireTimeoutMillis); + resolvePoolLong(queryCloseTimeoutMillis, "query_close_timeout_ms", view, DEFAULT_QUERY_CLOSE_TIMEOUT_MILLIS, this::queryCloseTimeoutMillis); + resolvePoolLong(idleTimeoutMillis, "idle_timeout_ms", view, DEFAULT_IDLE_TIMEOUT_MILLIS, this::idleTimeoutMillis); + resolvePoolLong(maxLifetimeMillis, "max_lifetime_ms", view, DEFAULT_MAX_LIFETIME_MILLIS, this::maxLifetimeMillis); + resolvePoolLong(housekeeperIntervalMillis, "housekeeper_interval_ms", view, DEFAULT_HOUSEKEEPER_INTERVAL_MILLIS, this::housekeeperIntervalMillis); return new QuestDBImpl( ingestConfig, - queryConfig, + config, senderPoolMin, senderPoolMax, queryPoolMin, @@ -141,19 +206,62 @@ public QuestDB build() { acquireTimeoutMillis, idleTimeoutMillis, maxLifetimeMillis, - housekeeperIntervalMillis + housekeeperIntervalMillis, + queryCloseTimeoutMillis, + errorHandler, + connectionListener ); } + // Validates the lazy_connect contract and returns the ingest config to use: + // the original string with a non-blocking async initial connect injected + // when the user did not set one. lazy_connect requires BOTH sides to start + // non-blocking, so an explicit knob that forces a blocking / fail-fast + // startup is a configuration conflict and is rejected with a clear remedy. + private String resolveLazyConnect(ConfigView view) { + // (1) ingest side: only initial_connect_retry=async is non-blocking; + // off/false/on/true/sync all block or fail-fast at startup. + String mode = view.getStr("initial_connect_retry"); + if (mode != null && !"async".equalsIgnoreCase(mode)) { + throw new IllegalArgumentException( + "conflicting configuration: lazy_connect=true needs a non-blocking startup, but " + + "initial_connect_retry=" + mode + " makes the initial connect block / fail-fast. " + + "Resolve by removing initial_connect_retry (lazy_connect implies " + + "initial_connect_retry=async) or setting initial_connect_retry=async."); + } + // (2) read side: lazy_connect requires query_pool_min=0 so the read pool + // does not eagerly fail-fast at startup. An explicit query_pool_min > 0 + // (builder call or connect string) contradicts that. + int explicitQueryMin; + if (queryPoolMin != UNSET) { + explicitQueryMin = queryPoolMin; // explicit builder call + } else if (view.has("query_pool_min")) { + explicitQueryMin = view.getInt("query_pool_min", UNSET); // connect string + } else { + explicitQueryMin = 0; // unset -> lazy default of 0 + } + if (explicitQueryMin > 0) { + throw new IllegalArgumentException( + "conflicting configuration: lazy_connect=true needs query_pool_min=0 (the read pool " + + "connects lazily on first use and must not fail-fast at startup), but query_pool_min=" + + explicitQueryMin + " was set. Resolve by removing query_pool_min (lazy_connect " + + "defaults it to 0) or setting query_pool_min=0."); + } + // No explicit initial_connect_retry -> inject async so the ingest build + // is non-blocking. An explicit async needs no injection. + return mode == null ? withDefaultAsyncConnect(config) : config; + } + /** - * Sets a single configuration string used for both ingest and egress. The - * schema must be {@code ws} or {@code wss}. + * Sets the single configuration string for the whole QuestDB cluster -- + * used for both ingest and egress. List every cluster node in one + * {@code addr} (comma-separated, or by repeating the key); the ingest and + * query pools each connect across that one server list. The schema must be + * {@code ws} or {@code wss}. */ public QuestDBBuilder fromConfig(CharSequence configurationString) { - requireWebSocketSchema(configurationString, "connection"); - String s = configurationString.toString(); - this.ingestConfig = s; - this.queryConfig = s; + requireWebSocketSchema(configurationString, "cluster"); + this.config = configurationString.toString(); return this; } @@ -183,16 +291,6 @@ public QuestDBBuilder idleTimeoutMillis(long millis) { return this; } - /** - * Sets the ingest-side configuration. The schema must be {@code ws} or - * {@code wss}. - */ - public QuestDBBuilder ingestConfig(CharSequence configurationString) { - requireWebSocketSchema(configurationString, "ingest"); - this.ingestConfig = configurationString.toString(); - return this; - } - /** * Maximum age of a pooled connection before the housekeeper recycles it * (next time it is idle). Useful for picking up DNS / load-balancer @@ -206,16 +304,6 @@ public QuestDBBuilder maxLifetimeMillis(long millis) { return this; } - /** - * Sets the query-side configuration. The schema must be {@code ws} or - * {@code wss}. - */ - public QuestDBBuilder queryConfig(CharSequence configurationString) { - requireWebSocketSchema(configurationString, "query"); - this.queryConfig = configurationString.toString(); - return this; - } - /** * Maximum query-pool size. Defaults to 4. */ @@ -303,12 +391,24 @@ public java.util.Map poolConfigSnapshotForTest() { m.put("query_pool_min", queryPoolMin); m.put("query_pool_max", queryPoolMax); m.put("acquire_timeout_ms", acquireTimeoutMillis); + m.put("query_close_timeout_ms", queryCloseTimeoutMillis); m.put("idle_timeout_ms", idleTimeoutMillis); m.put("max_lifetime_ms", maxLifetimeMillis); m.put("housekeeper_interval_ms", housekeeperIntervalMillis); return m; } + // Inject a non-blocking async initial connect right after the schema + // separator so lazy_connect's build never blocks or fail-fast on a down + // server. Only used when the user set no initial_connect_retry of their own + // (resolveLazyConnect rejects an explicit blocking mode rather than silently + // overriding it), so placement is immaterial -- there is no competing value. + private static String withDefaultAsyncConnect(String config) { + int sep = config.indexOf("::"); + // sep >= 0: fromConfig() validated a ws/wss schema, so "::" is present. + return config.substring(0, sep + 2) + "initial_connect_retry=async;" + config.substring(sep + 2); + } + private static void requireWebSocketSchema(CharSequence config, String role) { String schema = ConfigString.parse(config).schema(); if (!"ws".equals(schema) && !"wss".equals(schema)) { @@ -317,53 +417,17 @@ private static void requireWebSocketSchema(CharSequence config, String role) { } } - private void resolvePoolInt(int current, String key, ConfigView ingest, ConfigView query, int dflt, IntConsumer setter) { + private void resolvePoolInt(int current, String key, ConfigView view, int dflt, IntConsumer setter) { if (current != UNSET) { - return; // explicit builder call wins; skip the conflict check - } - boolean inIngest = ingest.has(key); - boolean inQuery = query.has(key); - int value; - if (inIngest && inQuery) { - int vi = ingest.getInt(key, UNSET); - int vq = query.getInt(key, UNSET); - if (vi != vq) { - throw new IllegalArgumentException( - "conflicting pool config: " + key + " (ingest=" + vi + ", query=" + vq + ")"); - } - value = vi; - } else if (inIngest) { - value = ingest.getInt(key, UNSET); - } else if (inQuery) { - value = query.getInt(key, UNSET); - } else { - value = dflt; + return; // explicit builder call wins } - setter.accept(value); + setter.accept(view.has(key) ? view.getInt(key, UNSET) : dflt); } - private void resolvePoolLong(long current, String key, ConfigView ingest, ConfigView query, long dflt, LongConsumer setter) { + private void resolvePoolLong(long current, String key, ConfigView view, long dflt, LongConsumer setter) { if (current != UNSET) { - return; // explicit builder call wins; skip the conflict check - } - boolean inIngest = ingest.has(key); - boolean inQuery = query.has(key); - long value; - if (inIngest && inQuery) { - long vi = ingest.getLong(key, UNSET); - long vq = query.getLong(key, UNSET); - if (vi != vq) { - throw new IllegalArgumentException( - "conflicting pool config: " + key + " (ingest=" + vi + ", query=" + vq + ")"); - } - value = vi; - } else if (inIngest) { - value = ingest.getLong(key, UNSET); - } else if (inQuery) { - value = query.getLong(key, UNSET); - } else { - value = dflt; + return; // explicit builder call wins } - setter.accept(value); + setter.accept(view.has(key) ? view.getLong(key, UNSET) : dflt); } } diff --git a/core/src/main/java/io/questdb/client/Sender.java b/core/src/main/java/io/questdb/client/Sender.java index 604f45d5..dc94f42b 100644 --- a/core/src/main/java/io/questdb/client/Sender.java +++ b/core/src/main/java/io/questdb/client/Sender.java @@ -1011,6 +1011,9 @@ final class LineSenderBuilder { private int autoFlushRows = PARAMETER_NOT_SET_EXPLICITLY; private int bufferCapacity = PARAMETER_NOT_SET_EXPLICITLY; private long closeFlushTimeoutMillis = CLOSE_FLUSH_TIMEOUT_NOT_SET; + // Upper bound (ms) on the TCP connect. PARAMETER_NOT_SET_EXPLICITLY -> + // 0 (no application-level connect timeout; OS connect timeout applies). + private int connectTimeoutMillis = PARAMETER_NOT_SET_EXPLICITLY; // Optional user-supplied async connection-event listener. When null, // the sender uses DefaultSenderConnectionListener.INSTANCE // (loud-not-silent log of every transition). @@ -1078,6 +1081,11 @@ public String getSettingsPath() { public int getTimeout() { return httpTimeout == PARAMETER_NOT_SET_EXPLICITLY ? DEFAULT_HTTP_TIMEOUT : httpTimeout; } + + @Override + public int getConnectTimeout() { + return connectTimeoutMillis == PARAMETER_NOT_SET_EXPLICITLY ? 0 : connectTimeoutMillis; + } }; private long minRequestThroughput = PARAMETER_NOT_SET_EXPLICITLY; private int multicastTtl = PARAMETER_NOT_SET_EXPLICITLY; @@ -1199,6 +1207,28 @@ public AdvancedTlsSettings advancedTls() { return new AdvancedTlsSettings(); } + /** + * Upper bound, in milliseconds, on establishing the TCP connection to a + * QuestDB endpoint. When set, a connect that does not complete within + * this budget is aborted (instead of riding the much longer OS-level + * connect timeout). Applies to both HTTP/WebSocket transports. Default + * is unset (0), which falls back to the OS connect timeout. + * + * @param millis connect timeout in milliseconds; must be > 0 + * @return this instance for method chaining + */ + public LineSenderBuilder connectTimeoutMillis(int millis) { + if (this.connectTimeoutMillis != PARAMETER_NOT_SET_EXPLICITLY) { + throw new LineSenderException("connect timeout was already configured ") + .put("[connect_timeout=").put(this.connectTimeoutMillis).put("]"); + } + if (millis <= 0) { + throw new LineSenderException("connect_timeout must be > 0: ").put(millis); + } + this.connectTimeoutMillis = millis; + return this; + } + /** * Per-endpoint timeout on the WebSocket upgrade response read. Default * {@value QwpWebSocketSender#DEFAULT_AUTH_TIMEOUT_MS} ms. @@ -1531,6 +1561,7 @@ public Sender build() { actualErrorInboxCapacity, actualDurableAckKeepaliveIntervalMillis, authTimeoutMillis, + connectTimeoutMillis == PARAMETER_NOT_SET_EXPLICITLY ? 0 : connectTimeoutMillis, connectionListener, actualConnectionListenerInboxCapacity ); @@ -3166,6 +3197,9 @@ private LineSenderBuilder fromConfig(CharSequence configurationString) { pos = getValue(configurationString, pos, sink, "request_timeout"); int requestTimeout = parseIntValue(sink, "request_timeout"); httpTimeoutMillis(requestTimeout); + } else if (Chars.equals("connect_timeout", sink)) { + pos = getValue(configurationString, pos, sink, "connect_timeout"); + connectTimeoutMillis(parseIntValue(sink, "connect_timeout")); } else if (Chars.equals("request_min_throughput", sink)) { pos = getValue(configurationString, pos, sink, "request_min_throughput"); int requestMinThroughput = parseIntValue(sink, "request_min_throughput"); @@ -3446,6 +3480,9 @@ private LineSenderBuilder fromConfigWebSocket(CharSequence configurationString) if (view.has("auth_timeout_ms")) { authTimeoutMillis(view.getLong("auth_timeout_ms", 0)); } + if (view.has("connect_timeout")) { + connectTimeoutMillis((int) view.getLong("connect_timeout", 0)); + } s = view.getStr("auto_flush_rows"); if (s != null) { @@ -3701,6 +3738,7 @@ public java.util.Map wsConfigSnapshotForTest() { m.put("connection_listener_inbox_capacity", connectionListenerInboxCapacity); m.put("token", httpToken); m.put("auth_timeout_ms", authTimeoutMillis); + m.put("connect_timeout", connectTimeoutMillis == PARAMETER_NOT_SET_EXPLICITLY ? 0 : connectTimeoutMillis); m.put("username", username); m.put("password", password); m.put("tls_verify", tlsValidationMode == null ? null : tlsValidationMode.name()); diff --git a/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java b/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java index 94562663..0175ad6c 100644 --- a/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java +++ b/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java @@ -66,6 +66,7 @@ public abstract class HttpClient implements QuietCloseable { protected final NetworkFacade nf; protected final Socket socket; private final ObjectPool csPool = new ObjectPool<>(DirectUtf8String.FACTORY, 64); + private final int connectTimeout; private final int defaultTimeout; private final boolean fixBrokenConnection; private final int maxBufferSize; @@ -84,6 +85,7 @@ public HttpClient(HttpClientConfiguration configuration, SocketFactory socketFac this.nf = configuration.getNetworkFacade(); this.socket = socketFactory.newInstance(nf, LOG); this.defaultTimeout = configuration.getTimeout(); + this.connectTimeout = configuration.getConnectTimeout(); this.bufferSize = configuration.getInitialRequestBufferSize(); this.maxBufferSize = configuration.getMaximumRequestBufferSize(); this.responseParserBufSize = configuration.getResponseBufferSize(); @@ -617,10 +619,16 @@ private void connect(CharSequence host, int port) { throw new HttpClientException("could not resolve host ").put("[host=").put(host).put("]"); } - if (nf.connectAddrInfo(fd, addrInfo) != 0) { + final int connectResult = connectTimeout > 0 + ? nf.connectAddrInfoTimeout(fd, addrInfo, connectTimeout) + : nf.connectAddrInfo(fd, addrInfo); + if (connectResult != 0) { int errno = nf.errno(); nf.freeAddrInfo(addrInfo); disconnect(); + if (connectResult == NetworkFacade.CONNECT_TIMEOUT) { + throw new HttpClientException("connect timed out ").put("[host=").put(host).put(", port=").put(port).put(", timeout=").put(connectTimeout).put(']').flagAsTimeout(); + } throw new HttpClientException("could not connect to host ").put("[host=").put(host).put(", port=").put(port).put(", errno=").put(errno).put(']'); } nf.freeAddrInfo(addrInfo); @@ -631,9 +639,20 @@ private void connect(CharSequence host, int port) { throw new HttpClientException("could not configure socket to be non-blocking [fd=").put(fd).put(", errno=").put(errno).put(']'); } + // Register the fd with the event loop before the TLS handshake so the + // handshake can park on socket readiness via ioWait() instead of + // busy-spinning on the non-blocking socket. + setupIoWait(); + if (socket.supportsTls()) { + // Bound the TLS handshake by the connect budget (falling back to + // the request timeout when connect_timeout is unset), so a peer + // that completes TCP but stalls mid-handshake cannot hang or pin a + // CPU. + final long tlsHandshakeStartNanos = System.nanoTime(); + final int tlsHandshakeBudgetMillis = connectTimeout > 0 ? connectTimeout : defaultTimeout; try { - socket.startTlsSession(host); + socket.startTlsSession(host, op -> ioWait(remainingTime(tlsHandshakeBudgetMillis, tlsHandshakeStartNanos), op)); } catch (TlsSessionInitFailedException e) { int errno = nf.errno(); disconnect(); @@ -641,9 +660,15 @@ private void connect(CharSequence host, int port) { .put(", error=").put(e.getFlyweightMessage()) .put(", errno=").put(errno) .put(']'); + } catch (Throwable t) { + // ioWait() throws a timeout-flagged HttpClientException when the + // handshake budget is exhausted; any other error can also surface + // mid-handshake. Disconnect so the fd and native buffers do not + // leak, then propagate. + disconnect(); + throw t; } } - setupIoWait(); } private void doSend(long lo, long hi, int timeoutMillis) { diff --git a/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java b/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java index 81ad7c86..5bdb6fe1 100644 --- a/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java +++ b/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java @@ -101,6 +101,10 @@ public abstract class WebSocketClient implements QuietCloseable { private final WebSocketSendBuffer sendBuffer; // volatile: written by user thread in close(), read by I/O thread in checkConnected()/sendFrame()/receiveFrame() private volatile boolean closed; + // Upper bound (ms) on the TCP connect. <= 0 disables the application-level + // timeout and falls back to the OS connect timeout. Seeded from the + // configuration; the QWP sender may override it via setConnectTimeout(). + private int connectTimeoutMillis; private int fragmentBufPos; private long fragmentBufPtr; // native buffer for accumulating fragment payloads private int fragmentBufSize; @@ -168,6 +172,7 @@ public WebSocketClient(HttpClientConfiguration configuration, SocketFactory sock this.nf = configuration.getNetworkFacade(); this.socket = socketFactory.newInstance(nf, LOG); this.defaultTimeout = configuration.getTimeout(); + this.connectTimeoutMillis = configuration.getConnectTimeout(); int sendBufSize = Math.max(configuration.getInitialRequestBufferSize(), DEFAULT_SEND_BUFFER_SIZE); int maxSendBufSize = Math.max(configuration.getMaximumRequestBufferSize(), sendBufSize); @@ -481,6 +486,16 @@ public void sendPing(int timeout) { } } + /** + * Overrides the TCP connect timeout (milliseconds) for subsequent + * {@link #connect} calls. {@code <= 0} disables the application-level + * timeout and falls back to the OS connect timeout. Must be called before + * {@link #connect}. + */ + public void setConnectTimeout(int connectTimeoutMillis) { + this.connectTimeoutMillis = connectTimeoutMillis; + } + /** * Sets the value sent as the {@code X-QWP-Accept-Encoding} upgrade header, * e.g. {@code "zstd;level=1,raw"}. Pass {@code null} to omit the header @@ -922,10 +937,18 @@ private void doConnect(CharSequence host, int port) { throw new HttpClientException("could not resolve host [host=").put(host).put(']'); } - if (nf.connectAddrInfo(fd, addrInfo) != 0) { + final int connectResult = connectTimeoutMillis > 0 + ? nf.connectAddrInfoTimeout(fd, addrInfo, connectTimeoutMillis) + : nf.connectAddrInfo(fd, addrInfo); + if (connectResult != 0) { int errno = nf.errno(); nf.freeAddrInfo(addrInfo); disconnect(); + if (connectResult == NetworkFacade.CONNECT_TIMEOUT) { + throw new HttpClientException("connect timed out [host=").put(host) + .put(", port=").put(port) + .put(", timeout=").put(connectTimeoutMillis).put(']').flagAsTimeout(); + } throw new HttpClientException("could not connect [host=").put(host) .put(", port=").put(port) .put(", errno=").put(errno).put(']'); @@ -939,19 +962,35 @@ private void doConnect(CharSequence host, int port) { .put(", errno=").put(errno).put(']'); } + // Register the fd with the event loop before the TLS handshake so the + // handshake can park on socket readiness via ioWait() instead of + // busy-spinning on the non-blocking socket. + setupIoWait(); + if (socket.supportsTls()) { + // Bound the TLS handshake by the connect budget (falling back to the + // request timeout when connect_timeout is unset), so a peer that + // completes TCP but stalls mid-handshake cannot hang or pin a CPU. + final long tlsHandshakeStartNanos = System.nanoTime(); + final int tlsHandshakeBudgetMillis = connectTimeoutMillis > 0 ? connectTimeoutMillis : defaultTimeout; try { - socket.startTlsSession(host); + socket.startTlsSession(host, op -> ioWait(getRemainingTimeOrThrow(tlsHandshakeBudgetMillis, tlsHandshakeStartNanos), op)); } catch (TlsSessionInitFailedException e) { int errno = nf.errno(); disconnect(); throw new HttpClientException("could not start TLS session [fd=").put(fd) .put(", error=").put(e.getFlyweightMessage()) .put(", errno=").put(errno).put(']'); + } catch (Throwable t) { + // ioWait() throws a timeout-flagged HttpClientException when the + // handshake budget is exhausted; any other error can also surface + // mid-handshake. Disconnect so the fd and native buffers do not + // leak, then propagate. + disconnect(); + throw t; } } - setupIoWait(); if (LOG.isDebugEnabled()) { LOG.debug("Connected to [host={}, port={}]", host, port); } diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java index 1706401e..92b4f6a7 100644 --- a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java +++ b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java @@ -165,6 +165,9 @@ public class QwpQueryClient implements QuietCloseable { private final Random failoverRandom = new Random(); private long authTimeoutMs = DEFAULT_AUTH_TIMEOUT_MS; private String authorizationHeader; + // Upper bound (ms) on each TCP connect attempt. 0 (default) falls back to + // the OS connect timeout. + private int connectTimeoutMs = 0; private int bufferPoolSize = DEFAULT_IO_BUFFER_POOL_SIZE; private String clientId; // Client-configured zone (failover.md §1.1), opaque case-insensitive @@ -387,6 +390,7 @@ public static QwpQueryClient fromConfig(CharSequence configurationString) { Long failoverMaxDurationMs = view.has("failover_max_duration_ms") ? view.getLong("failover_max_duration_ms", 0) : null; Long authTimeoutMs = view.has("auth_timeout_ms") ? view.getLong("auth_timeout_ms", 0) : null; + Integer connectTimeout = view.has("connect_timeout") ? (int) view.getLong("connect_timeout", 0) : null; Long initialCredit = view.has("initial_credit") ? view.getLong("initial_credit", 0) : null; int poolSize = view.getInt("buffer_pool_size", DEFAULT_IO_BUFFER_POOL_SIZE); String compression = view.getEnum("compression"); @@ -442,6 +446,9 @@ public static QwpQueryClient fromConfig(CharSequence configurationString) { if (authTimeoutMs != null) { client.withAuthTimeout(authTimeoutMs); } + if (connectTimeout != null) { + client.withConnectTimeout(connectTimeout); + } if (initialCredit != null) { client.withInitialCredit(initialCredit); } @@ -497,6 +504,7 @@ public static void validateConfig(ConfigView view, boolean tls) { view.getLong("failover_max_duration_ms", -1); view.getLong("initial_credit", -1); view.getLong("auth_timeout_ms", -1); + view.getLong("connect_timeout", -1); String username = view.getStr("username"); String password = view.getStr("password"); String token = view.getStr("token"); @@ -867,6 +875,7 @@ public java.util.Map configSnapshotForTest() { m.put("client_id", clientId); m.put("zone", clientZone); m.put("auth_timeout_ms", authTimeoutMs); + m.put("connect_timeout", connectTimeoutMs); m.put("authorization_header", authorizationHeader); m.put("tls_verify", tlsValidationMode); m.put("tls_roots", trustStorePath); @@ -994,6 +1003,22 @@ public QwpQueryClient withAuthTimeout(long authTimeoutMs) { return this; } + /** + * Upper bound, in milliseconds, on establishing the TCP connection to an + * endpoint. Unlike {@link #withAuthTimeout(long)} this DOES bound the TCP + * connect itself (via a non-blocking connect), so a routing blackhole that + * never returns SYN-ACK is aborted within this budget instead of riding the + * OS connect timeout. {@code 0} (default) keeps the OS connect timeout. + */ + public QwpQueryClient withConnectTimeout(int connectTimeoutMs) { + checkPreConnect("withConnectTimeout"); + if (connectTimeoutMs <= 0) { + throw new IllegalArgumentException("connectTimeoutMs must be > 0"); + } + this.connectTimeoutMs = connectTimeoutMs; + return this; + } + /** * Configures HTTP Basic authentication for the WebSocket upgrade request. * The server verifies the credentials against the same user store the @@ -1369,6 +1394,7 @@ private void connectToEndpoint(Endpoint ep) { webSocketClient.setQwpClientId(clientId != null ? clientId : defaultClientId()); webSocketClient.setQwpAcceptEncoding(buildAcceptEncodingHeader()); webSocketClient.setQwpMaxBatchRows(maxBatchRows); + webSocketClient.setConnectTimeout(connectTimeoutMs); runUpgradeWithTimeout(ep); negotiatedQwpVersion = webSocketClient.getServerQwpVersion(); negotiatedZstdLevel = webSocketClient.getServerNegotiatedZstdLevel(); @@ -1745,12 +1771,21 @@ private void reconnectViaTracker() { } private void runUpgradeWithTimeout(Endpoint ep) { + // Connect first, OUTSIDE the upgrade try. A connect-phase failure -- + // including a connect_timeout overage flagged via flagAsTimeout() -- must + // keep its own message ("connect timed out ...") and must NOT be relabeled + // as an auth_timeout overage below. doConnect() tears down its own socket + // on failure; the failover walker treats the propagated HttpClientException + // as a transport error and moves on to the next endpoint. + webSocketClient.connect(ep.host, ep.port); + int timeoutMs = (int) Math.min(authTimeoutMs, Integer.MAX_VALUE); try { - webSocketClient.connect(ep.host, ep.port); webSocketClient.upgrade(DEFAULT_ENDPOINT_PATH, timeoutMs, authorizationHeader); } catch (HttpClientException ex) { if (ex.isTimeout()) { + // Reachable only for an upgrade/auth-phase timeout now, so the + // auth_timeout attribution is accurate. HttpClientException timeout = new HttpClientException("WebSocket upgrade to ") .put(ep.host).put(':').put(ep.port) .put(" exceeded auth_timeout=").put(authTimeoutMs).put("ms"); diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java index 9b9cc45d..aa1c7188 100644 --- a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java +++ b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java @@ -154,6 +154,9 @@ public class QwpWebSocketSender implements Sender { private final ClientTlsConfiguration tlsConfig; private MicrobatchBuffer activeBuffer; private long authTimeoutMs = DEFAULT_AUTH_TIMEOUT_MS; + // Upper bound (ms) on each TCP connect attempt. 0 (default) falls back to + // the OS connect timeout. Applied to every WebSocketClient before connect. + private int connectTimeoutMs = 0; // Double-buffering for async I/O private MicrobatchBuffer buffer0; // Cached column references to avoid repeated hashmap lookups @@ -577,7 +580,7 @@ public static QwpWebSocketSender connect( reconnectInitialBackoffMillis, reconnectMaxBackoffMillis, initialConnectMode, errorHandler, errorInboxCapacity, durableAckKeepaliveIntervalMillis, authTimeoutMs, - null, SenderConnectionDispatcher.DEFAULT_CAPACITY); + 0, null, SenderConnectionDispatcher.DEFAULT_CAPACITY); } /** @@ -602,6 +605,7 @@ public static QwpWebSocketSender connect( int errorInboxCapacity, long durableAckKeepaliveIntervalMillis, long authTimeoutMs, + int connectTimeoutMs, SenderConnectionListener connectionListener, int connectionListenerInboxCapacity ) { @@ -613,6 +617,7 @@ public static QwpWebSocketSender connect( try { sender.requestDurableAck = requestDurableAck; sender.authTimeoutMs = authTimeoutMs; + sender.connectTimeoutMs = connectTimeoutMs; sender.closeFlushTimeoutMillis = closeFlushTimeoutMillis; sender.reconnectMaxDurationMillis = reconnectMaxDurationMillis; sender.reconnectInitialBackoffMillis = reconnectInitialBackoffMillis; @@ -2439,6 +2444,7 @@ private synchronized WebSocketClient buildAndConnect(ReconnectSupplier ctx) { newClient.setQwpMaxVersion(QwpConstants.VERSION); newClient.setQwpClientId(QwpConstants.CLIENT_ID); newClient.setQwpRequestDurableAck(requestDurableAck); + newClient.setConnectTimeout(connectTimeoutMs); newClient.connect(ep.host, ep.port); int upgradeTimeoutMs = (int) Math.min(authTimeoutMs, Integer.MAX_VALUE); newClient.upgrade(WRITE_PATH, upgradeTimeoutMs, authorizationHeader); diff --git a/core/src/main/java/io/questdb/client/impl/ConfigSchema.java b/core/src/main/java/io/questdb/client/impl/ConfigSchema.java index b36f3207..0508428e 100644 --- a/core/src/main/java/io/questdb/client/impl/ConfigSchema.java +++ b/core/src/main/java/io/questdb/client/impl/ConfigSchema.java @@ -56,6 +56,7 @@ public final class ConfigSchema { str("tls_roots", Side.COMMON); str("tls_roots_password", Side.COMMON); longRange("auth_timeout_ms", Side.COMMON, 0, OPEN_MAX, true, false); // > 0 + longRange("connect_timeout", Side.COMMON, 0, OPEN_MAX, true, false); // > 0 // INGRESS -- the WebSocket Sender applies. STRING in the registry; the // Sender parses suffix/mode values (off/on, 64k, durability) with its @@ -108,9 +109,11 @@ public final class ConfigSchema { intRange("query_pool_min", Side.POOL, OPEN, OPEN_MAX, false, false); intRange("query_pool_max", Side.POOL, OPEN, OPEN_MAX, false, false); longRange("acquire_timeout_ms", Side.POOL, OPEN, OPEN_MAX, false, false); + longRange("query_close_timeout_ms", Side.POOL, OPEN, OPEN_MAX, false, false); longRange("idle_timeout_ms", Side.POOL, OPEN, OPEN_MAX, false, false); longRange("max_lifetime_ms", Side.POOL, OPEN, OPEN_MAX, false, false); longRange("housekeeper_interval_ms", Side.POOL, OPEN, OPEN_MAX, false, false); + boolOnOff("lazy_connect", Side.POOL); // facade flag: tolerant non-blocking startup (async ingest + lazy reads) // RESERVED -- accepted no-op (error-policy keys reserved by the spec). str("on_internal_error", Side.RESERVED); diff --git a/core/src/main/java/io/questdb/client/impl/ConfigView.java b/core/src/main/java/io/questdb/client/impl/ConfigView.java index 1160c2d6..74621eef 100644 --- a/core/src/main/java/io/questdb/client/impl/ConfigView.java +++ b/core/src/main/java/io/questdb/client/impl/ConfigView.java @@ -95,6 +95,25 @@ public static String relocatedHint(String key) { return RELOCATED_HINTS.get(key); } + /** + * A boolean flag accepting {@code true}/{@code false} (and {@code on}/{@code off} + * for consistency with the rest of the connect-string surface). Returns + * {@code dflt} when the key is absent; throws on any other value. + */ + public boolean getBool(String key, boolean dflt) { + String v = getStr(key); + if (v == null) { + return dflt; + } + if ("true".equals(v) || "on".equals(v)) { + return true; + } + if ("false".equals(v) || "off".equals(v)) { + return false; + } + throw new IllegalArgumentException("invalid " + key + ": " + v + " (expected true, false, on, off)"); + } + public boolean getBoolOnOff(String key, boolean dflt) { String v = getStr(key); if (v == null) { diff --git a/core/src/main/java/io/questdb/client/impl/PooledSender.java b/core/src/main/java/io/questdb/client/impl/PooledSender.java index 61d89296..e36a8384 100644 --- a/core/src/main/java/io/questdb/client/impl/PooledSender.java +++ b/core/src/main/java/io/questdb/client/impl/PooledSender.java @@ -37,123 +37,112 @@ import java.time.temporal.ChronoUnit; /** - * Decorator that lends a real {@link Sender} from {@link SenderPool}. The - * decorator is pre-allocated once per pool slot and reused for every borrow. + * Thin per-borrow handle returned by {@link SenderPool#borrow()}. A fresh + * instance is created on every borrow, capturing the immutable lease + * {@code generation} stamped by {@code borrow()}; it forwards every + * {@link Sender} call to the reused {@link SenderSlot}'s delegate, validating + * that generation first via {@link SenderSlot#live(long)}. *

- * Behavior difference from a raw Sender: {@link #close()} on a pooled Sender - * flushes the buffer and returns the decorator to the pool. The underlying - * Sender is only truly closed when {@link io.questdb.client.QuestDB#close()} - * shuts down the pool. + * Behaviour difference from a raw Sender: {@link #close()} flushes the buffer + * and returns the slot to the pool. The underlying Sender is only truly closed + * when {@link io.questdb.client.QuestDB#close()} shuts the pool down. + *

+ * Because the slot is reused across borrows, this wrapper -- not the slot -- + * carries the lease identity. A stale handle (held after {@link #close()}, with + * the slot since re-borrowed) fails its generation check: data calls throw and + * {@link #close()} is a no-op, so it can never flush into, release, or be + * enqueued twice for a slot a different borrower now owns. This mirrors the + * egress {@code QueryLease} guard. */ public final class PooledSender implements Sender { - private final long createdAtMillis; - private final Sender delegate; - private final SenderPool pool; - // Index of the store-and-forward slot this wrapper owns within the pool, - // or -1 when SF is disabled. Stable for the wrapper's whole life; the - // pool returns it to the free set only when the wrapper is evicted from - // {@code all} (discardBroken / reapIdle). Used to derive a distinct - // {@code sender_id} per pooled sender so concurrent SF senders sharing - // one {@code sf_dir} never collide on the slot {@code flock}. - private final int slotIndex; - private volatile long idleSinceMillis; - private volatile boolean inUse; - private volatile boolean invalidated; - - PooledSender(Sender delegate, SenderPool pool, int slotIndex) { - this.delegate = delegate; - this.pool = pool; - this.slotIndex = slotIndex; - this.createdAtMillis = System.currentTimeMillis(); - this.idleSinceMillis = this.createdAtMillis; + private final long generation; + private final SenderSlot slot; + + PooledSender(SenderSlot slot, long generation) { + this.slot = slot; + this.generation = generation; } @Override public void at(long timestamp, ChronoUnit unit) { - delegate.at(timestamp, unit); + slot.live(generation).at(timestamp, unit); } @Override public void at(Instant timestamp) { - delegate.at(timestamp); + slot.live(generation).at(timestamp); } @Override public void atNow() { - delegate.atNow(); + slot.live(generation).atNow(); } @Override public boolean awaitAckedFsn(long targetFsn, long timeoutMillis) { - return delegate.awaitAckedFsn(targetFsn, timeoutMillis); + return slot.live(generation).awaitAckedFsn(targetFsn, timeoutMillis); } @Override public Sender binaryColumn(CharSequence name, byte[] value) { - delegate.binaryColumn(name, value); + slot.live(generation).binaryColumn(name, value); return this; } @Override public Sender binaryColumn(CharSequence name, long ptr, long len) { - delegate.binaryColumn(name, ptr, len); + slot.live(generation).binaryColumn(name, ptr, len); return this; } @Override public Sender binaryColumn(CharSequence name, DirectByteSlice slice) { - delegate.binaryColumn(name, slice); + slot.live(generation).binaryColumn(name, slice); return this; } @Override public Sender boolColumn(CharSequence name, boolean value) { - delegate.boolColumn(name, value); + slot.live(generation).boolColumn(name, value); return this; } @Override public DirectByteSlice bufferView() { - return delegate.bufferView(); + return slot.live(generation).bufferView(); } @Override public Sender byteColumn(CharSequence name, byte value) { - delegate.byteColumn(name, value); + slot.live(generation).byteColumn(name, value); return this; } @Override public void cancelRow() { - delegate.cancelRow(); + slot.live(generation).cancelRow(); } @Override public Sender charColumn(CharSequence name, char value) { - delegate.charColumn(name, value); + slot.live(generation).charColumn(name, value); return this; } /** - * Flushes pending rows and returns this decorator to the pool. Does not - * actually close the underlying {@link Sender}; that only happens when - * the owning {@code QuestDB} is closed. - *

- * Idempotent: a second call after a return is a no-op. + * Flushes pending rows and returns this lease's slot to the pool. Does not + * actually close the underlying {@link Sender}; that only happens when the + * owning {@code QuestDB} is closed. *

- * Clears the current thread's pin (if any) before the slot becomes - * borrowable again. Without this step a thread that pinned this - * wrapper and then closed it via the public {@link Sender#close()} - * (the natural try-with-resources idiom) would still hold the pin - * in its {@link ThreadLocal}; a subsequent {@code QuestDB.sender()} - * call on that thread would return the cached wrapper even though - * another consumer has since borrowed the slot, and the two - * consumers would write to the same underlying delegate. + * Idempotent: a stale generation (the lease was already returned and the + * slot possibly re-borrowed) is a no-op, so a double close cannot flush + * into, or re-enqueue, a slot a different borrower now owns. The pool + * re-checks the generation under its lock. */ @Override public void close() { - if (!inUse) { + if (generation != slot.generation()) { return; } // Track normal completion rather than catching a specific throwable @@ -163,257 +152,222 @@ public void close() { // abnormal exit as unrecyclable, which is the fail-safe default. boolean flushed = false; try { - delegate.flush(); + slot.delegate().flush(); flushed = true; } finally { - inUse = false; - // Clear the pin BEFORE returning the slot. If we cleared - // after giveBack(), a concurrent borrower could grab the - // slot while this thread's pin still references it, and a - // re-pin on this thread would return the (now in-use) - // wrapper -- the same race this clear is meant to close. - pool.clearPinIfCurrent(this); if (flushed) { - pool.giveBack(this); + slot.pool().giveBack(this); } else { - // flush() did not complete normally. Sender does not clear - // its buffer on flush failure (see Sender Javadoc), and - // WebSocket transport latches the failure for good. Either - // way the wrapper is unsafe to recycle: the next borrower - // would inherit the failed rows or a dead connection. The - // original throwable propagates naturally once this finally - // returns -- no explicit rethrow needed. - pool.discardBroken(this); + // flush() did not complete normally. Sender does not clear its + // buffer on flush failure (see Sender Javadoc), and WebSocket + // transport latches the failure for good. Either way the slot + // is unsafe to recycle: the next borrower would inherit the + // failed rows or a dead connection. The original throwable + // propagates naturally once this finally returns -- no explicit + // rethrow needed. + slot.pool().discardBroken(this); } } } @Override public Sender decimalColumn(CharSequence name, Decimal256 value) { - delegate.decimalColumn(name, value); + slot.live(generation).decimalColumn(name, value); return this; } @Override public Sender decimalColumn(CharSequence name, Decimal128 value) { - delegate.decimalColumn(name, value); + slot.live(generation).decimalColumn(name, value); return this; } @Override public Sender decimalColumn(CharSequence name, Decimal64 value) { - delegate.decimalColumn(name, value); + slot.live(generation).decimalColumn(name, value); return this; } @Override public Sender decimalColumn(CharSequence name, CharSequence value) { - delegate.decimalColumn(name, value); + slot.live(generation).decimalColumn(name, value); return this; } @Override public Sender doubleArray(@NotNull CharSequence name, double[] values) { - delegate.doubleArray(name, values); + slot.live(generation).doubleArray(name, values); return this; } @Override public Sender doubleArray(@NotNull CharSequence name, double[][] values) { - delegate.doubleArray(name, values); + slot.live(generation).doubleArray(name, values); return this; } @Override public Sender doubleArray(@NotNull CharSequence name, double[][][] values) { - delegate.doubleArray(name, values); + slot.live(generation).doubleArray(name, values); return this; } @Override public Sender doubleArray(CharSequence name, DoubleArray array) { - delegate.doubleArray(name, array); + slot.live(generation).doubleArray(name, array); return this; } @Override public Sender doubleColumn(CharSequence name, double value) { - delegate.doubleColumn(name, value); + slot.live(generation).doubleColumn(name, value); return this; } @Override public boolean drain(long timeoutMillis) { - return delegate.drain(timeoutMillis); + return slot.live(generation).drain(timeoutMillis); } @Override public Sender floatColumn(CharSequence name, float value) { - delegate.floatColumn(name, value); + slot.live(generation).floatColumn(name, value); return this; } @Override public void flush() { - delegate.flush(); + slot.live(generation).flush(); } @Override public long flushAndGetSequence() { - return delegate.flushAndGetSequence(); + return slot.live(generation).flushAndGetSequence(); } @Override public Sender geoHashColumn(CharSequence name, long bits, int precisionBits) { - delegate.geoHashColumn(name, bits, precisionBits); + slot.live(generation).geoHashColumn(name, bits, precisionBits); return this; } @Override public Sender geoHashColumn(CharSequence name, CharSequence value) { - delegate.geoHashColumn(name, value); + slot.live(generation).geoHashColumn(name, value); return this; } @Override public long getAckedFsn() { - return delegate.getAckedFsn(); + return slot.live(generation).getAckedFsn(); } @Override public Sender intColumn(CharSequence name, int value) { - delegate.intColumn(name, value); + slot.live(generation).intColumn(name, value); return this; } @Override public Sender ipv4Column(CharSequence name, int address) { - delegate.ipv4Column(name, address); + slot.live(generation).ipv4Column(name, address); return this; } @Override public Sender ipv4Column(CharSequence name, CharSequence address) { - delegate.ipv4Column(name, address); + slot.live(generation).ipv4Column(name, address); return this; } @Override public Sender long256Column(CharSequence name, long l0, long l1, long l2, long l3) { - delegate.long256Column(name, l0, l1, l2, l3); + slot.live(generation).long256Column(name, l0, l1, l2, l3); return this; } @Override public Sender longArray(@NotNull CharSequence name, long[] values) { - delegate.longArray(name, values); + slot.live(generation).longArray(name, values); return this; } @Override public Sender longArray(@NotNull CharSequence name, long[][] values) { - delegate.longArray(name, values); + slot.live(generation).longArray(name, values); return this; } @Override public Sender longArray(@NotNull CharSequence name, long[][][] values) { - delegate.longArray(name, values); + slot.live(generation).longArray(name, values); return this; } @Override public Sender longArray(@NotNull CharSequence name, LongArray values) { - delegate.longArray(name, values); + slot.live(generation).longArray(name, values); return this; } @Override public Sender longColumn(CharSequence name, long value) { - delegate.longColumn(name, value); + slot.live(generation).longColumn(name, value); return this; } @Override public void reset() { - delegate.reset(); + slot.live(generation).reset(); } @Override public Sender shortColumn(CharSequence name, short value) { - delegate.shortColumn(name, value); + slot.live(generation).shortColumn(name, value); return this; } @Override public Sender stringColumn(CharSequence name, CharSequence value) { - delegate.stringColumn(name, value); + slot.live(generation).stringColumn(name, value); return this; } @Override public Sender symbol(CharSequence name, CharSequence value) { - delegate.symbol(name, value); + slot.live(generation).symbol(name, value); return this; } @Override public Sender table(CharSequence table) { - delegate.table(table); + slot.live(generation).table(table); return this; } @Override public Sender timestampColumn(CharSequence name, long value, ChronoUnit unit) { - delegate.timestampColumn(name, value, unit); + slot.live(generation).timestampColumn(name, value, unit); return this; } @Override public Sender timestampColumn(CharSequence name, Instant value) { - delegate.timestampColumn(name, value); + slot.live(generation).timestampColumn(name, value); return this; } @Override public Sender uuidColumn(CharSequence name, long lo, long hi) { - delegate.uuidColumn(name, lo, hi); + slot.live(generation).uuidColumn(name, lo, hi); return this; } - long createdAtMillis() { - return createdAtMillis; - } - - int slotIndex() { - return slotIndex; - } - - Sender delegate() { - return delegate; - } - - long idleSinceMillis() { - return idleSinceMillis; - } - - boolean isInUse() { - return inUse; - } - - boolean isInvalidated() { - return invalidated; - } - - void markIdleAt(long nowMillis) { - idleSinceMillis = nowMillis; - } - - void markInUse() { - inUse = true; + long generation() { + return generation; } - void markInvalidated() { - invalidated = true; + SenderSlot slot() { + return slot; } } diff --git a/core/src/main/java/io/questdb/client/impl/QueryClientPool.java b/core/src/main/java/io/questdb/client/impl/QueryClientPool.java index a6365dfa..cbbc150a 100644 --- a/core/src/main/java/io/questdb/client/impl/QueryClientPool.java +++ b/core/src/main/java/io/questdb/client/impl/QueryClientPool.java @@ -26,6 +26,7 @@ import io.questdb.client.QueryException; import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import org.jetbrains.annotations.TestOnly; import java.util.ArrayDeque; import java.util.ArrayList; @@ -49,6 +50,12 @@ */ public final class QueryClientPool implements AutoCloseable { + // Default upper bound, in milliseconds, on how long Query.close() waits for + // an in-flight query to drain (after issuing a cancel) before discarding the + // worker. Mirrors the ingest side's close_flush_timeout_millis default so a + // close() can never block the caller unbounded. Tunable per pool via + // closeQueryTimeoutMillis(long). + static final long DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS = 5_000; private final long acquireTimeoutMillis; private final ArrayList all; private final ArrayDeque available; @@ -75,6 +82,10 @@ public final class QueryClientPool implements AutoCloseable { private final AtomicInteger nextSlotIndex = new AtomicInteger(); private final Condition workerReleased; private volatile boolean closed; + // Upper bound on the Query.close() drain wait; see + // DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS. Volatile because QuestDBImpl sets it + // once at build time on a different thread than the borrowers that read it. + private volatile long closeQueryTimeoutMillis = DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS; private int inFlightCreations; public QueryClientPool( @@ -89,11 +100,12 @@ public QueryClientPool( idleTimeoutMillis, maxLifetimeMillis, null); } - // Package-private constructor exposing the connectHook test seam: production - // passes null (-> the real QwpQueryClient.connect()). White-box tests in - // io.questdb.client.test.impl reach this by reflection to inject a hook that - // throws a non-RuntimeException Throwable from the native connect path. - QueryClientPool( + // Constructor exposing the connectHook seam. Production (QuestDBImpl) passes + // null -> the real QwpQueryClient.connect(); white-box tests pass a hook that + // throws a non-RuntimeException Throwable from the native connect path. This + // is the construction path QuestDBImpl uses, so it is a real (public) ctor, + // not test-only. + public QueryClientPool( String configurationString, int minSize, int maxSize, @@ -106,13 +118,12 @@ public QueryClientPool( idleTimeoutMillis, maxLifetimeMillis, connectHook, null); } - // Package-private constructor exposing both the connectHook and startHook - // test seams: production passes null for each (-> the real - // QwpQueryClient.connect() and QueryWorker.start()). White-box tests in - // io.questdb.client.test.impl reach this by reflection to inject a hook that - // throws a Throwable from either the native connect path (connectHook) or - // the worker thread-start path (startHook). - QueryClientPool( + // Constructor exposing both the connectHook and startHook seams. Production + // reaches it via the overload above (both null -> the real + // QwpQueryClient.connect() and QueryWorker.start()); white-box tests pass a + // hook that throws a Throwable from either the native connect path + // (connectHook) or the worker thread-start path (startHook). + public QueryClientPool( String configurationString, int minSize, int maxSize, @@ -197,7 +208,12 @@ public QueryWorker acquire() { throw new QueryException((byte) 0, "QuestDB handle is closed"); } if (!available.isEmpty()) { - return available.pollFirst(); + QueryWorker w = available.pollFirst(); + // Stamp a fresh lease id under the lock so the QueryLease + // about to be handed out can be distinguished from any + // prior, now-stale borrow of the same worker. + w.bumpGeneration(); + return w; } if (all.size() + inFlightCreations < maxSize) { inFlightCreations++; @@ -248,6 +264,8 @@ public QueryWorker acquire() { throw new QueryException((byte) 0, "QuestDB handle is closed"); } all.add(created); + // Stamp the first lease id for this freshly built worker. + created.bumpGeneration(); return created; } if (remainingNanos <= 0) { @@ -297,6 +315,87 @@ public void close() { } } + /** + * Cancels the in-flight query on {@code w} only while its lease generation + * still equals {@code gen}, holding the pool lock across both the check and + * the wire cancel. acquire() and release() bump the generation under this + * same lock, so once this method holds it the generation cannot change: a + * cancel whose lease has already gone stale (the worker was released and + * re-borrowed) is dropped instead of aborting the new borrower's query. The + * cancel itself is non-blocking -- a volatile flag plus an AtomicLong set -- + * so the lock is held only briefly. + */ + void cancelIfCurrent(QueryWorker w, long gen) { + lock.lock(); + try { + if (closed) { + return; + } + if (w.generation() != gen) { + return; + } + w.cancelInFlight(); + } finally { + lock.unlock(); + } + } + + long closeQueryTimeoutMillis() { + return closeQueryTimeoutMillis; + } + + void closeQueryTimeoutMillis(long millis) { + this.closeQueryTimeoutMillis = millis; + } + + /** + * Evicts a worker whose lease {@link QueryImpl#close(long)} could not drain + * the in-flight query within {@link #closeQueryTimeoutMillis} (the cancel + * was not honored in time, or the caller was interrupted). The worker's + * connection is left in an unknown protocol state -- a late {@code RESULT_*} + * frame for the abandoned query could corrupt the next borrower's stream -- + * so it must NOT return to the pool. Removes it from {@code all} (freeing + * capacity for a fresh worker) and tears it down outside the lock via + * {@link QueryWorker#shutdown()}, which interrupts the dispatch thread so a + * stuck {@code execute()} returns promptly. + *

+ * Bails when the pool is already closed: {@link #close()} owns the teardown + * of every worker via its snapshot loop, so mutating {@code all} here would + * race that iteration on a non-thread-safe {@code ArrayList}. Also bails on a + * stale generation -- the worker was already released/discarded and possibly + * re-borrowed, so discarding it would evict a worker a different borrower now + * owns. Mirrors {@link SenderPool#discardBroken} on the ingest side. + */ + void discard(QueryWorker w, long gen) { + lock.lock(); + try { + if (closed) { + return; + } + if (w.generation() != gen) { + return; + } + // Invalidate the lease so a duplicate close()/release with the same + // generation is dropped and the in-flight handle can no longer drive + // this worker. + w.bumpGeneration(); + all.remove(w); + // Capacity freed -- a waiter in acquire() may now create a fresh + // worker in this slot's place. + workerReleased.signal(); + } finally { + lock.unlock(); + } + // Tear down outside the lock so a slow join doesn't keep the pool + // latched. shutdown() is best-effort and idempotent. + try { + w.shutdown(); + } catch (Throwable ignored) { + // Best-effort: a teardown Error (e.g. an -ea AssertionError) must + // not propagate out of Query.close(). + } + } + void reapIdle() { if (closed) { return; @@ -340,14 +439,30 @@ void reapIdle() { } } - void release(QueryWorker w) { - long now = System.currentTimeMillis(); - w.markIdleAt(now); + void release(QueryWorker w, long gen) { lock.lock(); try { if (closed) { return; } + if (w.generation() != gen) { + // Stale release: this lease was already returned and the worker + // has since been re-borrowed (or this is a duplicate close of an + // already-released lease). Dropping it is what makes + // Query.close() idempotent even under a concurrent re-borrow -- + // without this guard a double close would enqueue the worker + // twice and hand it to two borrowers at once, corrupting the + // whole pool. The flag a stale close() reads is no longer its + // own lease's, so a non-validated release path could not catch + // this; the generation captured at borrow time can. + return; + } + // Invalidate the just-returned lease so a duplicate release with the + // same generation is also dropped and the in-flight handle can no + // longer drive this worker. + w.bumpGeneration(); + w.markIdleAt(System.currentTimeMillis()); + assert !available.contains(w) : "worker already present in available deque on release"; available.addLast(w); workerReleased.signal(); } finally { @@ -355,11 +470,12 @@ void release(QueryWorker w) { } } - // Package-private white-box accessor for tests: reports the current - // in-flight creation count under the pool lock. A non-zero value after a - // failed acquire() means the slot reservation was never released -- the - // capacity-shrink bug this guards against. - int inFlightCreations() { + // White-box accessor for tests: reports the current in-flight creation count + // under the pool lock. A non-zero value after a failed acquire() means the + // slot reservation was never released -- the capacity-shrink bug this guards + // against. + @TestOnly + public int inFlightCreations() { lock.lock(); try { return inFlightCreations; diff --git a/core/src/main/java/io/questdb/client/impl/QueryImpl.java b/core/src/main/java/io/questdb/client/impl/QueryImpl.java index fc80d263..baf483ea 100644 --- a/core/src/main/java/io/questdb/client/impl/QueryImpl.java +++ b/core/src/main/java/io/questdb/client/impl/QueryImpl.java @@ -24,8 +24,6 @@ package io.questdb.client.impl; -import io.questdb.client.Completion; -import io.questdb.client.Query; import io.questdb.client.QueryException; import io.questdb.client.cutlass.qwp.client.QwpBindSetter; import io.questdb.client.cutlass.qwp.client.QwpBindValues; @@ -40,39 +38,54 @@ import java.util.concurrent.locks.ReentrantLock; /** - * Per-thread implementation of {@link Query}. Holds the configured query - * state (SQL, optional binds, handler), an inner {@link Completion}, and a - * wrapping {@link QwpColumnBatchHandler} that forwards callbacks to the user - * handler and signals the Completion on terminal events. + * Reusable per-{@link QueryWorker} query state: the configured SQL, optional + * binds, handler, terminal-event signalling, and a wrapping + * {@link QwpColumnBatchHandler} that forwards callbacks to the user handler and + * signals completion on terminal events. One instance is pre-allocated per + * worker in the constructor and reused across every borrow. *

- * Lifecycle: {@link QuestDBImpl#query()} returns a per-thread instance, reset - * to empty if it was in a terminal state. {@link #submit()} acquires a - * worker, dispatches, and returns the cached {@link Completion}. + * Because the instance is shared across borrows, it must never be handed to a + * caller directly -- a stale reference would leak into a later borrow's + * lifecycle. Callers instead receive a thin, per-borrow {@link QueryLease} that + * carries the lease {@code generation} stamped at borrow time and passes it + * into every operation here. Each operation validates that generation against + * {@link QueryWorker#generation()}: + *

    + *
  • builder/await operations on a stale generation throw + * {@code IllegalStateException} ("query handle is closed"),
  • + *
  • {@link #close(long)} and {@link #cancel(long)} on a stale generation are + * no-ops -- this is what makes {@code Query.close()} idempotent and + * prevents a stale handle from releasing, or cancelling the in-flight + * query of, a worker a different borrower now owns.
  • + *
+ *

+ * Lifecycle: {@link QueryWorker#lease()} resets this state and wraps it in a + * fresh {@link QueryLease} when {@link QuestDBImpl#borrowQuery()} acquires the + * worker. {@link #submit(long)} dispatches on the held worker (single-flight); + * {@link #close(long)} returns the worker to the pool. */ -final class QueryImpl implements Query { +final class QueryImpl { - private final InnerCompletion completion = new InnerCompletion(); private final Condition doneCondition; private final ReentrantLock doneLock = new ReentrantLock(); - private final QueryClientPool pool; private final StringSink sqlBuffer = new StringSink(); + private final QueryWorker worker; + private final QwpBindSetter wireBinds = this::applyBinds; private final WrappingHandler wrappingHandler = new WrappingHandler(); - private volatile QueryWorker currentWorker; private volatile boolean done = true; private volatile String resultMessage; private volatile byte resultStatus; private volatile Throwable unexpectedError; private QwpBindSetter userBinds; - private final QwpBindSetter wireBinds = this::applyBinds; private QwpColumnBatchHandler userHandler; - QueryImpl(QueryClientPool pool) { - this.pool = pool; + QueryImpl(QueryWorker worker) { + this.worker = worker; this.doneCondition = doneLock.newCondition(); } - @Override - public void abandon() { + void abandon(long gen) { + checkLive(gen); if (!done) { throw new IllegalStateException("a previous submit() is still in flight; await the Completion first"); } @@ -81,27 +94,113 @@ public void abandon() { sqlBuffer.clear(); } - @Override - public Query binds(QwpBindSetter binds) { + void await(long gen) throws InterruptedException { + rejectHandlerReentry("await"); + checkLive(gen); + doneLock.lock(); + try { + while (!done) { + doneCondition.await(); + } + } finally { + doneLock.unlock(); + } + throwIfFailed(); + } + + boolean await(long gen, long timeout, TimeUnit unit) throws InterruptedException { + rejectHandlerReentry("await"); + checkLive(gen); + long remaining = unit.toNanos(timeout); + doneLock.lock(); + try { + while (!done) { + if (remaining <= 0) { + return false; + } + remaining = doneCondition.awaitNanos(remaining); + } + } finally { + doneLock.unlock(); + } + throwIfFailed(); + return true; + } + + void cancel(long gen) { + // Fast-path drop of an obviously-stale or already-finished cancel, + // without taking the pool lock. This is only a hint -- the + // authoritative re-check runs under the pool lock inside + // worker.cancelInFlight(gen). + if (gen != worker.generation() || done) { + return; + } + // Re-check the lease generation and issue the wire cancel atomically + // under the pool lock (the same lock acquire()/release() bump the + // generation under). An unlocked check followed by an unlocked cancel + // is a TOCTOU: a cross-thread watchdog can pass the check, get + // preempted while this lease is released and the worker re-borrowed by + // another caller, then resume and abort that caller's in-flight query. + worker.cancelInFlight(gen); + } + + void close(long gen) { + rejectHandlerReentry("close"); + // A stale generation means this lease was already released and the + // worker may now be owned by another borrower. Dropping the call is + // what keeps close() idempotent without releasing someone else's + // worker or cancelling their in-flight query. release() re-checks the + // generation under the pool lock, so the worker can never be enqueued + // twice even if two threads race a close on the same live lease. + if (gen != worker.generation()) { + return; + } + // If a submit is still in flight (the caller did not await, or its + // await timed out), cancel it and wait for the terminal event so the + // leased worker is idle before it returns to the pool -- otherwise the + // next borrower would inherit a running execute(). + // + // The wait is bounded (closeQueryTimeoutMillis) and interruptible, so a + // caller that bounded its own await() is never pinned to the full + // remaining query duration here. If the query does NOT drain in time (a + // server slow to honor the cancel, or the caller interrupting), the + // worker is still running execute() on a connection whose protocol state + // is now uncertain -- a late RESULT_* for the abandoned query could + // corrupt the next borrower's stream -- so it is discarded rather than + // returned. The pool grows a fresh worker on the next borrow. + if (!done) { + worker.cancelInFlight(gen); + if (!awaitDone(worker.closeQueryTimeoutMillis())) { + worker.discardFromPool(gen); + return; + } + } + worker.releaseToPool(gen); + } + + boolean isDone(long gen) { + checkLive(gen); + return done; + } + + void setBinds(long gen, QwpBindSetter binds) { + checkLive(gen); this.userBinds = binds; - return this; } - @Override - public Query handler(QwpColumnBatchHandler handler) { + void setHandler(long gen, QwpColumnBatchHandler handler) { + checkLive(gen); this.userHandler = handler; - return this; } - @Override - public Query sql(CharSequence sql) { + void setSql(long gen, CharSequence sql) { + checkLive(gen); sqlBuffer.clear(); sqlBuffer.put(sql); - return this; } - @Override - public Completion submit() { + void submit(long gen) { + checkLive(gen); if (sqlBuffer.length() == 0) { throw new IllegalStateException("sql is required"); } @@ -111,7 +210,6 @@ public Completion submit() { if (!done) { throw new IllegalStateException("a previous submit() is still in flight; await the Completion first"); } - QueryWorker w = pool.acquire(); // Reset terminal state under the lock so a stale signal from a prior // run can't be observed by the upcoming await(). doneLock.lock(); @@ -120,12 +218,10 @@ public Completion submit() { resultStatus = 0; resultMessage = null; unexpectedError = null; - currentWorker = w; } finally { doneLock.unlock(); } - w.dispatch(this); - return completion; + worker.dispatch(this); } private void applyBinds(QwpBindValues binds) { @@ -135,6 +231,56 @@ private void applyBinds(QwpBindValues binds) { } } + /** + * Waits up to {@code timeoutMillis} for the in-flight query's terminal + * event. Returns {@code true} once {@code done} is set, {@code false} on + * timeout or interrupt. Unlike an uninterruptible drain, an interrupt aborts + * the wait and re-raises the thread's interrupt flag, so {@code close()} + * stays responsive to a caller that wants to give up. + */ + private boolean awaitDone(long timeoutMillis) { + long remaining = TimeUnit.MILLISECONDS.toNanos(timeoutMillis); + doneLock.lock(); + try { + while (!done) { + if (remaining <= 0) { + return false; + } + try { + remaining = doneCondition.awaitNanos(remaining); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return false; + } + } + return true; + } finally { + doneLock.unlock(); + } + } + + private void checkLive(long gen) { + if (gen != worker.generation()) { + throw new IllegalStateException("query handle is not borrowed (closed or never leased)"); + } + } + + private void rejectHandlerReentry(String op) { + // Result handlers (onBatch/onEnd/onError) run inline on the worker's + // dispatch thread. A blocking lease op called from there would wait for + // a terminal event that only this same thread can deliver -- a + // permanent, uninterruptible self-deadlock plus a leaked worker. Fail + // loudly at the call site instead. cancel() is the non-blocking stop. + if (worker.isCurrentThreadWorker()) { + throw new IllegalStateException( + op + "() must not be called from a result handler. Handlers " + + "(onBatch/onEnd/onError) run on the worker thread, so " + op + + "() would block forever waiting for a terminal event that only " + + "this same thread can deliver. To stop a query from inside a " + + "handler, call cancel() (non-blocking)."); + } + } + private void signalDone(byte status, String message, Throwable unexpected) { doneLock.lock(); try { @@ -145,27 +291,38 @@ private void signalDone(byte status, String message, Throwable unexpected) { this.resultMessage = message; this.unexpectedError = unexpected; this.done = true; - this.currentWorker = null; doneCondition.signalAll(); } finally { doneLock.unlock(); } } + private void throwIfFailed() { + Throwable unexpected = unexpectedError; + if (unexpected != null) { + throw new QueryException(resultStatus, resultMessage, unexpected); + } + if (resultStatus != 0) { + throw new QueryException(resultStatus, resultMessage); + } + } + /** - * Drops any prior builder state (SQL, binds, handler) if no submit is - * currently in flight. {@link QuestDBImpl#query()} invokes this before - * returning the per-thread instance so callers see the "reset to empty" - * contract documented on {@link io.questdb.client.Query} regardless of - * whether the previous use ended at a terminal handler callback or at - * {@link #abandon()}. + * Resets builder and terminal state to empty. Called by + * {@link QueryWorker#lease()} when {@link QuestDBImpl#borrowQuery()} hands a + * freshly stamped {@link QueryLease} out, so each borrow starts from the + * documented "reset to empty" contract on {@link io.questdb.client.Query}. + * The leased worker is idle at this point (just acquired from the pool), so + * the reset is unconditional. */ - void resetIfDone() { - if (done) { - userBinds = null; - userHandler = null; - sqlBuffer.clear(); - } + void resetForBorrow() { + userBinds = null; + userHandler = null; + sqlBuffer.clear(); + resultStatus = 0; + resultMessage = null; + unexpectedError = null; + done = true; } void runOn(QwpQueryClient client) { @@ -185,63 +342,6 @@ void signalUnexpected(Throwable t) { signalDone((byte) 0, t.getMessage() != null ? t.getMessage() : t.getClass().getSimpleName(), t); } - private final class InnerCompletion implements Completion { - - @Override - public void await() throws InterruptedException { - doneLock.lock(); - try { - while (!done) { - doneCondition.await(); - } - } finally { - doneLock.unlock(); - } - throwIfFailed(); - } - - @Override - public boolean await(long timeout, TimeUnit unit) throws InterruptedException { - long remaining = unit.toNanos(timeout); - doneLock.lock(); - try { - while (!done) { - if (remaining <= 0) { - return false; - } - remaining = doneCondition.awaitNanos(remaining); - } - } finally { - doneLock.unlock(); - } - throwIfFailed(); - return true; - } - - @Override - public void cancel() { - QueryWorker w = currentWorker; - if (w != null && !done) { - w.cancelInFlight(); - } - } - - @Override - public boolean isDone() { - return done; - } - - private void throwIfFailed() { - Throwable unexpected = unexpectedError; - if (unexpected != null) { - throw new QueryException(resultStatus, resultMessage, unexpected); - } - if (resultStatus != 0) { - throw new QueryException(resultStatus, resultMessage); - } - } - } - private final class WrappingHandler implements QwpColumnBatchHandler { @Override diff --git a/core/src/main/java/io/questdb/client/impl/QueryLease.java b/core/src/main/java/io/questdb/client/impl/QueryLease.java new file mode 100644 index 00000000..6083b802 --- /dev/null +++ b/core/src/main/java/io/questdb/client/impl/QueryLease.java @@ -0,0 +1,110 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.impl; + +import io.questdb.client.Completion; +import io.questdb.client.Query; +import io.questdb.client.cutlass.qwp.client.QwpBindSetter; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; + +import java.util.concurrent.TimeUnit; + +/** + * Thin per-borrow handle returned by {@link QuestDBImpl#borrowQuery()}. A fresh + * instance is created on every borrow, capturing the immutable lease + * {@code generation} stamped by {@link QueryClientPool#acquire()}; it delegates + * every {@link Query} and {@link Completion} operation to the worker's reused + * {@link QueryImpl}, threading that generation through so a stale handle cannot + * disturb a later borrow on the same worker (see {@link QueryImpl}). + *

+ * It implements {@link Completion} as well as {@link Query} so {@link #submit()} + * can return {@code this} -- the per-submit path stays allocation-free, and the + * single small allocation happens once per borrow (and is routinely + * scalar-replaced by the JIT in the common try-with-resources case). + */ +final class QueryLease implements Query, Completion { + + private final long generation; + private final QueryImpl impl; + + QueryLease(QueryImpl impl, long generation) { + this.impl = impl; + this.generation = generation; + } + + @Override + public void abandon() { + impl.abandon(generation); + } + + @Override + public void await() throws InterruptedException { + impl.await(generation); + } + + @Override + public boolean await(long timeout, TimeUnit unit) throws InterruptedException { + return impl.await(generation, timeout, unit); + } + + @Override + public Query binds(QwpBindSetter binds) { + impl.setBinds(generation, binds); + return this; + } + + @Override + public void cancel() { + impl.cancel(generation); + } + + @Override + public void close() { + impl.close(generation); + } + + @Override + public Query handler(QwpColumnBatchHandler handler) { + impl.setHandler(generation, handler); + return this; + } + + @Override + public boolean isDone() { + return impl.isDone(generation); + } + + @Override + public Query sql(CharSequence sql) { + impl.setSql(generation, sql); + return this; + } + + @Override + public Completion submit() { + impl.submit(generation); + return this; + } +} diff --git a/core/src/main/java/io/questdb/client/impl/QueryWorker.java b/core/src/main/java/io/questdb/client/impl/QueryWorker.java index f4f641c8..b3c6a32c 100644 --- a/core/src/main/java/io/questdb/client/impl/QueryWorker.java +++ b/core/src/main/java/io/questdb/client/impl/QueryWorker.java @@ -24,6 +24,7 @@ package io.questdb.client.impl; +import io.questdb.client.Query; import io.questdb.client.QueryException; import io.questdb.client.cutlass.qwp.client.QwpQueryClient; @@ -39,7 +40,11 @@ * The pooled query client's own I/O thread continues to drive the wire; the * worker thread exists only to keep {@code execute()} off the application's * submitting thread. Handler callbacks ({@code onBatch}, {@code onEnd}, - * {@code onError}) still run on the client's I/O thread. + * {@code onError}) run on this worker's own dispatch thread, which consumes the + * I/O thread's event queue inline -- not on the I/O thread itself. A handler + * must therefore never call the lease's blocking {@code close()}/{@code await()} + * (it would self-deadlock waiting for a terminal event only this thread can + * deliver); use the non-blocking {@code cancel()} to stop from inside a handler. */ public final class QueryWorker { @@ -47,16 +52,27 @@ public final class QueryWorker { private final QwpQueryClient client; private final long createdAtMillis; private final QueryClientPool pool; + private final QueryImpl query; private final Condition signalCondition; private final ReentrantLock signalLock = new ReentrantLock(); private final Thread thread; private volatile QueryImpl current; + // Monotonic lease id. Mutated only under the QueryClientPool lock + // (bumped once in acquire() when the worker is handed out and once in + // release() when it is returned), so successive borrows of the same + // worker get distinct ids. A QueryLease captures the value live during + // its borrow; once the worker is released or re-borrowed the captured id + // no longer matches, which is how a stale handle's close()/cancel()/ + // submit() are detected and dropped. Volatile so a stale handle on another + // thread observes the latest value without taking the pool lock. + private volatile long generation; private volatile long idleSinceMillis; private volatile boolean shuttingDown; public QueryWorker(QwpQueryClient client, QueryClientPool pool, int slotIndex) { this.client = client; this.pool = pool; + this.query = new QueryImpl(this); this.signalCondition = signalLock.newCondition(); this.thread = new Thread(this::runLoop, "questdb-query-worker-" + slotIndex); this.thread.setDaemon(true); @@ -68,17 +84,48 @@ long createdAtMillis() { return createdAtMillis; } + /** + * Advances the lease generation. Called by {@link QueryClientPool} under + * the pool lock when this worker is handed out (acquire) and when it is + * returned (release). + */ + void bumpGeneration() { + generation++; + } + + /** + * Current lease generation. See {@link #generation} for the visibility and + * mutation contract. + */ + long generation() { + return generation; + } + long idleSinceMillis() { return idleSinceMillis; } + /** + * True when the calling thread is this worker's own dispatch thread -- i.e. + * a reentrant call from inside a result handler, which runs inline on this + * thread. Blocking lease operations ({@link QueryImpl#close}/ + * {@link QueryImpl#await}) use this to fail loudly instead of + * self-deadlocking. + */ + boolean isCurrentThreadWorker() { + return Thread.currentThread() == thread; + } + void markIdleAt(long nowMillis) { idleSinceMillis = nowMillis; } /** - * Cancels the in-flight query on this worker's client. Safe to call from - * any thread; harmless if the worker is idle. + * Issues an unconditional wire cancel against whatever query this worker's + * client is currently running. Callers must already own the worker for the + * current lease -- in practice this runs under the pool lock via + * {@link QueryClientPool#cancelIfCurrent}, which validates the lease + * generation first. Lease code must use {@link #cancelInFlight(long)}. */ void cancelInFlight() { try { @@ -88,6 +135,18 @@ void cancelInFlight() { } } + /** + * Cancels the in-flight query only if this worker's lease generation still + * equals {@code gen}. Delegates to the pool so the generation re-check and + * the wire cancel happen together under the pool lock that + * {@link QueryClientPool#acquire} and {@link QueryClientPool#release} bump + * the generation under. That atomicity stops a stale cross-thread cancel + * from aborting a later borrower's query on the same worker. + */ + void cancelInFlight(long gen) { + pool.cancelIfCurrent(this, gen); + } + /** * Returns the {@link QwpQueryClient} this worker drives. Exposed for * introspection and tests; callers must not invoke {@code execute()} on @@ -97,6 +156,44 @@ public QwpQueryClient client() { return client; } + /** + * Resets the worker's reused {@link QueryImpl} and returns a fresh + * {@link QueryLease} stamped with the current lease {@link #generation}. + * Called by {@link QuestDBImpl#borrowQuery()} right after + * {@link QueryClientPool#acquire()} hands this worker out (which bumped the + * generation under the pool lock). The lease is a small per-borrow handle; + * the heavy state stays on the reused {@link QueryImpl}, and the per-submit + * path remains allocation-free. + */ + Query lease() { + query.resetForBorrow(); + return new QueryLease(query, generation); + } + + long closeQueryTimeoutMillis() { + return pool.closeQueryTimeoutMillis(); + } + + /** + * Discards this worker from the pool instead of returning it. Called by + * {@link QueryImpl#close(long)} when the in-flight query could not be + * drained within the close budget, leaving the connection in an unknown + * protocol state. The captured lease {@code gen} lets the pool reject a + * stale discard whose worker has already been re-borrowed. + */ + void discardFromPool(long gen) { + pool.discard(this, gen); + } + + /** + * Returns this worker to the pool. Called by {@link QueryImpl#close(long)} + * when the borrowed lease is released; the captured lease {@code gen} lets + * the pool reject a stale release whose worker has already been re-borrowed. + */ + void releaseToPool(long gen) { + pool.release(this, gen); + } + void shutdown() { shuttingDown = true; signalLock.lock(); @@ -106,10 +203,19 @@ void shutdown() { signalLock.unlock(); } try { - // If a query is in flight on this worker, ask the client to abort so - // execute() returns promptly and the thread can exit before join - // times out. cancel() is documented as thread-safe and is a no-op - // when idle. + // If a query is in flight on this worker, force execute() to return + // promptly so the dispatch thread exits before the join below times + // out. Two nudges, strongest first: + // 1. Interrupt the dispatch thread. takeEvent() (QwpSpscQueue.take) + // is interrupt-aware, and executeOnce() turns the resulting + // InterruptedException into a terminal event -> signalDone. This + // releases a caller parked in Query.close() even when the I/O + // thread is wedged and client.close()'s synthetic terminal + // (closePool()) never runs -- the race that would otherwise + // strand the caller forever. + // 2. Ask the client to cancel on the wire so the server stops work. + // Best-effort and a no-op when idle. + thread.interrupt(); try { client.cancel(); } catch (Throwable ignored) { @@ -140,8 +246,10 @@ void start() { } /** - * Hands a configured {@link QueryImpl} to this worker. The caller must - * have just acquired this worker via QueryClientPool#acquire(long). + * Hands a configured {@link QueryImpl} to this worker for execution. The + * worker is held by an open {@link io.questdb.client.Query} lease (see + * {@link #lease()}), so a lease may dispatch repeatedly (single-flight) + * until it is closed. */ void dispatch(QueryImpl q) { signalLock.lock(); @@ -181,6 +289,17 @@ private void runLoop() { return; } q = current; + // Clear the hand-off slot under signalLock, at the moment of + // consumption -- NOT after runOn() returns. A lease is + // single-flight but reused: the user thread loops submit() -> + // await() on the same handle. The terminal callback inside + // runOn() wakes the user thread, which can call submit() -> + // dispatch() (current = q; signal) before this worker thread + // returns from runOn(). Clearing current after runOn() would + // race that dispatch, clobber the freshly-set job, drop its + // already-consumed signal, and park the worker forever while + // the user thread waits on a Completion that never fires. + current = null; } finally { signalLock.unlock(); } @@ -188,9 +307,6 @@ private void runLoop() { q.runOn(client); } catch (Throwable t) { q.signalUnexpected(t); - } finally { - current = null; - pool.release(this); } } } diff --git a/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java b/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java index 5bba8d46..4e72237d 100644 --- a/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java +++ b/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java @@ -24,27 +24,30 @@ package io.questdb.client.impl; -import io.questdb.client.Completion; import io.questdb.client.QuestDB; import io.questdb.client.Query; import io.questdb.client.Sender; -import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; +import io.questdb.client.SenderConnectionListener; +import io.questdb.client.SenderErrorHandler; import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import org.jetbrains.annotations.TestOnly; import java.util.function.Consumer; import java.util.function.IntFunction; /** - * Implementation of {@link QuestDB}. Owns the elastic {@link SenderPool} - * and {@link QueryClientPool}, a {@link PoolHousekeeper} that reaps idle - * slots, and a {@link ThreadLocal} of {@link QueryImpl} instances so that - * {@link #query()} is allocation-free after the first call on each thread. + * Implementation of {@link QuestDB}. Owns the elastic {@link SenderPool} and + * {@link QueryClientPool} and a {@link PoolHousekeeper} that reaps idle slots. + * {@link #borrowQuery()} leases a pooled {@link QueryWorker} and hands back a + * thin {@link QueryLease} over its reused {@link QueryImpl}; the heavy per-query + * state is pre-allocated on the worker and the per-submit path is + * allocation-free, so only the small lease handle is created per borrow (and is + * routinely scalar-replaced by the JIT in the try-with-resources case). */ public final class QuestDBImpl implements QuestDB { private final PoolHousekeeper housekeeper; private final QueryClientPool queryPool; - private final ThreadLocal queryThreadLocal; private final SenderPool senderPool; private volatile boolean closed; @@ -58,20 +61,24 @@ public QuestDBImpl( long acquireTimeoutMillis, long idleTimeoutMillis, long maxLifetimeMillis, - long housekeeperIntervalMillis + long housekeeperIntervalMillis, + long queryCloseTimeoutMillis, + SenderErrorHandler errorHandler, + SenderConnectionListener connectionListener ) { this(ingestConfig, queryConfig, senderMin, senderMax, queryMin, queryMax, acquireTimeoutMillis, idleTimeoutMillis, maxLifetimeMillis, - housekeeperIntervalMillis, null, null); + housekeeperIntervalMillis, queryCloseTimeoutMillis, null, null, errorHandler, connectionListener); } - // Package-private constructor exposing the senderFactory and connectHook test - // seams: production passes null for both (-> the real native build/connect - // paths). White-box tests in io.questdb.client.test.impl reach this by - // reflection (the main module is declared `open`) to make SenderPool prewarm - // an observable delegate while QueryClientPool construction throws an Error, + // Test-only constructor exposing the senderFactory and connectHook seams: + // production uses the public overload above, which passes null for both -> + // the real native build/connect paths. White-box error-safety tests in + // io.questdb.client.test.impl call this to make SenderPool prewarm an + // observable delegate while QueryClientPool construction throws an Error, // exercising the cleanup catch below. - QuestDBImpl( + @TestOnly + public QuestDBImpl( String ingestConfig, String queryConfig, int senderMin, @@ -84,6 +91,33 @@ public QuestDBImpl( long housekeeperIntervalMillis, IntFunction senderFactory, Consumer connectHook + ) { + this(ingestConfig, queryConfig, senderMin, senderMax, queryMin, queryMax, + acquireTimeoutMillis, idleTimeoutMillis, maxLifetimeMillis, + housekeeperIntervalMillis, QueryClientPool.DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS, + senderFactory, connectHook, null, null); + } + + // Full constructor adding the ingest-side errorHandler/connectionListener, + // applied by SenderPool to every Sender it builds. The 12-arg overload above + // is the unchanged white-box test seam and delegates here with null + // callbacks; the public overload delegates here with null test seams. + QuestDBImpl( + String ingestConfig, + String queryConfig, + int senderMin, + int senderMax, + int queryMin, + int queryMax, + long acquireTimeoutMillis, + long idleTimeoutMillis, + long maxLifetimeMillis, + long housekeeperIntervalMillis, + long queryCloseTimeoutMillis, + IntFunction senderFactory, + Consumer connectHook, + SenderErrorHandler errorHandler, + SenderConnectionListener connectionListener ) { SenderPool builtSenderPool = null; QueryClientPool builtQueryPool = null; @@ -95,10 +129,12 @@ public QuestDBImpl( // Defer SF startup recovery to the PoolHousekeeper thread so // build() never blocks on a slow / reachable-but-not-acking // server; the housekeeper drives it via runStartupRecoveryStep(). - true); + true, + errorHandler, connectionListener); builtQueryPool = new QueryClientPool( queryConfig, queryMin, queryMax, acquireTimeoutMillis, idleTimeoutMillis, maxLifetimeMillis, connectHook); + builtQueryPool.closeQueryTimeoutMillis(queryCloseTimeoutMillis); builtHousekeeper = new PoolHousekeeper(builtSenderPool, builtQueryPool, housekeeperIntervalMillis); builtHousekeeper.start(); } catch (Throwable e) { @@ -128,7 +164,11 @@ public QuestDBImpl( this.senderPool = builtSenderPool; this.queryPool = builtQueryPool; this.housekeeper = builtHousekeeper; - this.queryThreadLocal = ThreadLocal.withInitial(() -> new QueryImpl(queryPool)); + } + + @Override + public Query borrowQuery() { + return queryPool.acquire().lease(); } @Override @@ -182,30 +222,4 @@ private static void closeQuietly(AutoCloseable closeable) { } } - @Override - public Completion executeSql(CharSequence sql, QwpColumnBatchHandler handler) { - return query().sql(sql).handler(handler).submit(); - } - - @Override - public Query newQuery() { - return new QueryImpl(queryPool); - } - - @Override - public Query query() { - QueryImpl q = queryThreadLocal.get(); - q.resetIfDone(); - return q; - } - - @Override - public void releaseSender() { - senderPool.releaseCurrentThread(); - } - - @Override - public Sender sender() { - return senderPool.pinToCurrentThread(); - } } diff --git a/core/src/main/java/io/questdb/client/impl/SenderPool.java b/core/src/main/java/io/questdb/client/impl/SenderPool.java index 8c9fda7a..b971d1e2 100644 --- a/core/src/main/java/io/questdb/client/impl/SenderPool.java +++ b/core/src/main/java/io/questdb/client/impl/SenderPool.java @@ -25,11 +25,14 @@ package io.questdb.client.impl; import io.questdb.client.Sender; +import io.questdb.client.SenderConnectionListener; +import io.questdb.client.SenderErrorHandler; import io.questdb.client.cutlass.line.LineSenderException; import io.questdb.client.cutlass.qwp.client.QwpWebSocketSender; import io.questdb.client.cutlass.qwp.client.sf.cursor.OrphanScanner; import io.questdb.client.std.Files; import io.questdb.client.std.IntList; +import org.jetbrains.annotations.TestOnly; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,9 +96,13 @@ public final class SenderPool implements AutoCloseable { // transport has no application-level connect timeout to clamp it. private static final long RECOVERY_DRAIN_BUDGET_MILLIS = 1_000; private final long acquireTimeoutMillis; - private final ArrayList all; - private final ArrayDeque available; + private final ArrayList all; + private final ArrayDeque available; private final String configurationString; + // User-supplied ingest callbacks, shared across every pooled Sender this + // pool builds. Null -> each sender keeps its loud-not-silent default. + private final SenderConnectionListener connectionListener; + private final SenderErrorHandler errorHandler; private final long idleTimeoutMillis; // Test seam. Production builds delegates via defaultSender(); white-box // tests in io.questdb.client.test.impl reach the package-private @@ -132,7 +139,6 @@ public final class SenderPool implements AutoCloseable { private final Condition slotReleased; // True iff the configuration enables store-and-forward (sf_dir set). private final boolean storeAndForward; - private final ThreadLocal threadAffine = new ThreadLocal<>(); // Slots removed from `all` whose delegate is still releasing its flock. // They keep reserving capacity (and their slotInUse mark) until the // flock drops, so the cap check and the slot allocator stay consistent @@ -189,16 +195,17 @@ public SenderPool( long maxLifetimeMillis ) { this(configurationString, minSize, maxSize, acquireTimeoutMillis, - idleTimeoutMillis, maxLifetimeMillis, null); + idleTimeoutMillis, maxLifetimeMillis, null, false, null, null); } - // Package-private constructor exposing the senderFactory test seam: - // production passes null (-> the real defaultSender()). White-box tests in - // io.questdb.client.test.impl reach this by reflection to inject a factory - // that throws a non-RuntimeException Throwable mid-prewarm. Recovery runs - // inline here (deferStartupRecovery=false); the pooled QuestDB handle uses - // the 8-arg overload to defer it to the housekeeper thread. - SenderPool( + // Test-only constructor exposing the senderFactory seam: production builds + // via the full constructor below (senderFactory null -> the real + // defaultSender()). White-box tests inject a factory that throws a + // non-RuntimeException Throwable mid-prewarm. Recovery runs inline here + // (deferStartupRecovery=false); the pooled QuestDB handle uses the 8-arg + // overload to defer it to the housekeeper thread. + @TestOnly + public SenderPool( String configurationString, int minSize, int maxSize, @@ -211,14 +218,16 @@ public SenderPool( idleTimeoutMillis, maxLifetimeMillis, senderFactory, false); } - // Full constructor. deferStartupRecovery=true skips the inline, - // construction-time SF recovery (recoverOneSlotStep) so - // QuestDB.build() never blocks on a slow or reachable-but-not-acking - // server; the owner (QuestDBImpl) then drives recovery one slot per tick on - // the PoolHousekeeper thread via runStartupRecoveryStep(). The in-range - // recovery pass is concurrency-safe against borrow()/return on that + // Test-only constructor adding the deferStartupRecovery toggle. + // deferStartupRecovery=true skips the inline, construction-time SF recovery + // (recoverOneSlotStep) so QuestDB.build() never blocks on a slow or + // reachable-but-not-acking server; the owner (QuestDBImpl) then drives + // recovery one slot per tick on the PoolHousekeeper thread via + // runStartupRecoveryStep(). White-box SF tests call this directly; the + // in-range recovery pass is concurrency-safe against borrow()/return on the // deferred path -- see recoverOneSlotStep(). - SenderPool( + @TestOnly + public SenderPool( String configurationString, int minSize, int maxSize, @@ -227,10 +236,34 @@ public SenderPool( long maxLifetimeMillis, IntFunction senderFactory, boolean deferStartupRecovery + ) { + this(configurationString, minSize, maxSize, acquireTimeoutMillis, + idleTimeoutMillis, maxLifetimeMillis, senderFactory, + deferStartupRecovery, null, null); + } + + // Full constructor adding the user-supplied ingest callbacks (error handler + // and connection listener), applied to every Sender the pool builds (see + // buildManagedSlotSender). The public 6-arg ctor and the test-only + // senderFactory overloads above both delegate here with null callbacks; the + // pooled QuestDB handle calls this directly. + SenderPool( + String configurationString, + int minSize, + int maxSize, + long acquireTimeoutMillis, + long idleTimeoutMillis, + long maxLifetimeMillis, + IntFunction senderFactory, + boolean deferStartupRecovery, + SenderErrorHandler errorHandler, + SenderConnectionListener connectionListener ) { if (minSize < 0 || maxSize < 1 || minSize > maxSize) { throw new IllegalArgumentException("invalid pool sizing: min=" + minSize + ", max=" + maxSize); } + this.errorHandler = errorHandler; + this.connectionListener = connectionListener; this.senderFactory = senderFactory != null ? senderFactory : this::defaultSender; // An injected factory (tests) drives recovery too, preserving the // white-box recovery seam; production recovery forces OFF-mode connects @@ -262,7 +295,7 @@ public SenderPool( if (storeAndForward) { slotInUse[i] = true; } - PooledSender ps = createUnlocked(storeAndForward ? i : -1); + SenderSlot ps = createUnlocked(storeAndForward ? i : -1); all.add(ps); available.add(ps); built++; @@ -571,7 +604,7 @@ private boolean drainCandidateSlotForRecovery(int slotIndex, String slotPath, // createRecoverer() takes the slot flock on -slotIndex, and // delegate().close() can early-return with the I/O thread still running // (flock still held). - PooledSender recoverer = null; + SenderSlot recoverer = null; boolean stopScan = false; try { if (!OrphanScanner.isCandidateOrphan(slotPath)) { @@ -597,7 +630,7 @@ private boolean drainCandidateSlotForRecovery(int slotIndex, String slotPath, // on a timeout: a server that fails to ack within the budget // will very likely do the same for every remaining slot -- the // same reasoning as the build-failure case above. - if (!recoverer.drain(remainingMillis)) { + if (!recoverer.delegate().drain(remainingMillis)) { LOG.warn("startup SF recovery: drain did not ack slot {} " + "within {}ms; skipping remaining slots", slotPath, remainingMillis); @@ -636,9 +669,12 @@ public PooledSender borrow() { throw new LineSenderException("QuestDB handle is closed"); } if (!available.isEmpty()) { - PooledSender s = available.pollFirst(); - s.markInUse(); - return s; + SenderSlot s = available.pollFirst(); + // Stamp a fresh lease id under the lock so the PooledSender + // wrapper handed out can be told apart from any prior, + // now-stale borrow of the same slot. + s.bumpGeneration(); + return new PooledSender(s, s.generation()); } if (all.size() + inFlightCreations + closingSlots + leakedSlots + recoveringSlots < maxSize) { inFlightCreations++; @@ -647,7 +683,7 @@ public PooledSender borrow() { // SF is off (no per-slot identity needed). int slotIndex = storeAndForward ? allocateSlotIndex() : -1; lock.unlock(); - PooledSender created; + SenderSlot created; try { created = createUnlocked(slotIndex); } catch (Throwable e) { @@ -685,8 +721,8 @@ public PooledSender borrow() { throw new LineSenderException("QuestDB handle is closed"); } all.add(created); - created.markInUse(); - return created; + created.bumpGeneration(); + return new PooledSender(created, created.generation()); } if (remainingNanos <= 0) { throw new LineSenderException( @@ -721,7 +757,7 @@ void markClosing() { @Override public void close() { - PooledSender[] snapshot; + SenderSlot[] snapshot; lock.lock(); try { if (closeStarted) { @@ -731,22 +767,13 @@ public void close() { // Raise the shutdown signal too (a direct, non-pooled caller may // close() without a prior markClosing()); harmless if already set. closed = true; - // Mark every pooled wrapper invalidated so pinToCurrentThread() - // on other threads -- which never takes this lock -- can detect - // that its cached entry no longer wraps a live delegate. Removing - // the calling thread's ThreadLocal only clears one slot; other - // threads' slots survive until they read the flag. - for (int i = 0; i < all.size(); i++) { - all.get(i).markInvalidated(); - } // Snapshot under the lock so the delegate-close loop below is // immune to concurrent mutation of `all`. discardBroken running // on another thread can still bail thanks to the `closed` check // it now performs; the snapshot is belt-and-braces for any // future code path that mutates `all` outside this lock's // happens-before chain. - snapshot = all.toArray(new PooledSender[0]); - threadAffine.remove(); + snapshot = all.toArray(new SenderSlot[0]); slotReleased.signalAll(); } finally { lock.unlock(); @@ -763,27 +790,11 @@ public void close() { } } - /** - * Clears the current thread's pin if it currently references {@code s}. - * Invoked from {@link PooledSender#close()} before the wrapper is - * returned to the pool, so a subsequent {@link #pinToCurrentThread()} - * on this thread cannot hand the wrapper back after another consumer - * has borrowed the slot. No-op when the caller never pinned, or pinned - * a different wrapper. - */ - void clearPinIfCurrent(PooledSender s) { - if (threadAffine.get() == s) { - threadAffine.remove(); - } - } - /** * Evicts a slot whose delegate has failed (typically a {@code flush()} - * failure observed in {@link PooledSender#close()}). The wrapper is - * marked invalidated so any thread-pinned reference gets rejected on the - * next {@link #pinToCurrentThread()} call; the slot is removed from - * {@code all} so the pool can grow back into a fresh slot on demand. The - * underlying delegate is closed outside the lock so a slow real-close + * failure observed in {@link PooledSender#close()}). The slot is removed + * from {@code all} so the pool can grow back into a fresh slot on demand. + * The underlying delegate is closed outside the lock so a slow real-close * does not stall other borrowers. *

* Bails when the pool is already closed: {@link #close()} owns the @@ -792,14 +803,22 @@ void clearPinIfCurrent(PooledSender s) { * {@code ArrayList} and the {@code delegate.close()} below would be a * double-close on a delegate {@code close()} has already shut down. */ - void discardBroken(PooledSender s) { - s.markInvalidated(); + void discardBroken(PooledSender ps) { + SenderSlot s = ps.slot(); + long gen = ps.generation(); boolean reserved = false; lock.lock(); try { if (closed) { return; } + if (s.generation() != gen) { + // Stale discard: the slot was already returned/discarded and + // possibly re-borrowed. Dropping it avoids evicting a slot a + // different borrower now owns and double-closing its delegate. + return; + } + s.bumpGeneration(); boolean removed = all.remove(s); // For an SF slot, keep its index reserved (move the reservation // from `all` to `closingSlots`) until the delegate below releases @@ -844,15 +863,26 @@ void discardBroken(PooledSender s) { } } - public void giveBack(PooledSender s) { - long now = System.currentTimeMillis(); - s.markIdleAt(now); + public void giveBack(PooledSender ps) { + SenderSlot s = ps.slot(); + long gen = ps.generation(); lock.lock(); try { if (closed) { // Pool already shut down: don't requeue; let close() finish destroying. return; } + if (s.generation() != gen) { + // Stale return: this lease was already given back and the slot + // possibly re-borrowed (or this is a duplicate close). Dropping + // it keeps Sender.close() idempotent under a concurrent + // re-borrow -- without it a double close would enqueue the slot + // twice and hand it to two borrowers writing into one delegate. + return; + } + s.bumpGeneration(); + s.markIdleAt(System.currentTimeMillis()); + assert !available.contains(s) : "slot already present in available deque on giveBack"; available.addLast(s); slotReleased.signal(); } finally { @@ -860,19 +890,6 @@ public void giveBack(PooledSender s) { } } - public PooledSender pinToCurrentThread() { - PooledSender pinned = threadAffine.get(); - if (pinned != null && !pinned.isInvalidated()) { - return pinned; - } - if (pinned != null) { - threadAffine.remove(); - } - PooledSender s = borrow(); - threadAffine.set(s); - return s; - } - /** * Closes idle slots that have exceeded {@code idleTimeoutMillis} or that * have aged past {@code maxLifetimeMillis}. Never shrinks below @@ -883,15 +900,15 @@ public void reapIdle() { return; } long now = System.currentTimeMillis(); - ArrayList toClose = null; + ArrayList toClose = null; lock.lock(); try { if (closed) { return; } - Iterator it = available.iterator(); + Iterator it = available.iterator(); while (it.hasNext() && all.size() > minSize) { - PooledSender s = it.next(); + SenderSlot s = it.next(); boolean idleExpired = idleTimeoutMillis < Long.MAX_VALUE && (now - s.idleSinceMillis()) >= idleTimeoutMillis; boolean overAge = maxLifetimeMillis < Long.MAX_VALUE @@ -933,7 +950,7 @@ public void reapIdle() { lock.lock(); try { for (int i = 0, n = toClose.size(); i < n; i++) { - PooledSender s = toClose.get(i); + SenderSlot s = toClose.get(i); if (s.slotIndex() >= 0) { reclaimSlot(s, " during idle reaping"); } @@ -983,32 +1000,19 @@ public int leakedSlotCount() { } } - public void releaseCurrentThread() { - PooledSender pinned = threadAffine.get(); - if (pinned == null) { - return; - } - threadAffine.remove(); - if (pinned.isInvalidated()) { - // Pool was closed: delegate is already closed, skip flush/giveBack. - return; - } - pinned.close(); - } - - private PooledSender createUnlocked(int slotIndex) { - return new PooledSender(senderFactory.apply(slotIndex), this, slotIndex); + private SenderSlot createUnlocked(int slotIndex) { + return new SenderSlot(senderFactory.apply(slotIndex), this, slotIndex); } /** - * Builds a {@link PooledSender} for startup recovery of one stranded slot. + * Builds a {@link SenderSlot} for startup recovery of one stranded slot. * Routes through {@link #recoverySenderFactory}, which in production forces * a non-blocking initial connect ({@link #defaultRecoverySender}) so a * single recovery step stays bounded -- see that method and * {@link #drainCandidateSlotForRecovery}. */ - private PooledSender createRecoverer(int slotIndex) { - return new PooledSender(recoverySenderFactory.apply(slotIndex), this, slotIndex); + private SenderSlot createRecoverer(int slotIndex) { + return new SenderSlot(recoverySenderFactory.apply(slotIndex), this, slotIndex); } private Sender defaultSender(int slotIndex) { @@ -1035,9 +1039,21 @@ private Sender defaultRecoverySender(int slotIndex) { return buildManagedSlotSender(slotIndex, true); } + // Applies the user-supplied ingest callbacks to a sender builder. Null + // callbacks are skipped so the sender keeps its loud-not-silent default. + private Sender.LineSenderBuilder applyUserCallbacks(Sender.LineSenderBuilder builder) { + if (errorHandler != null) { + builder.errorHandler(errorHandler); + } + if (connectionListener != null) { + builder.connectionListener(connectionListener); + } + return builder; + } + private Sender buildManagedSlotSender(int slotIndex, boolean forRecovery) { if (!storeAndForward) { - return Sender.fromConfig(configurationString); + return applyUserCallbacks(Sender.builder(configurationString)).build(); } // Give this pooled sender its own slot dir /- // so concurrent SF senders sharing one sf_dir never collide on @@ -1091,7 +1107,9 @@ private Sender buildManagedSlotSender(int slotIndex, boolean forRecovery) { // returns). builder.drainOrphans(false); } - return builder.build(); + // Recovery delegates are internal, short-lived, OFF-mode drain senders; + // don't surface their connect/error events to the user's callbacks. + return (forRecovery ? builder : applyUserCallbacks(builder)).build(); } /** @@ -1130,7 +1148,7 @@ private void freeSlotIndex(int idx) { * {@link QwpWebSocketSender#isSlotLockReleased()} -- false means close() * bailed early with the I/O thread still running and the flock still held. */ - private static boolean flockReleased(PooledSender s) { + private static boolean flockReleased(SenderSlot s) { Sender d = s.delegate(); return !(d instanceof QwpWebSocketSender) || ((QwpWebSocketSender) d).isSlotLockReleased(); } @@ -1153,7 +1171,7 @@ private static boolean flockReleased(PooledSender s) { * path (e.g. {@code ""} or {@code " during idle reaping"}) * @return {@code true} if the index was freed, {@code false} if retired */ - private boolean reclaimSlot(PooledSender s, String context) { + private boolean reclaimSlot(SenderSlot s, String context) { closingSlots--; if (flockReleased(s)) { freeSlotIndex(s.slotIndex()); diff --git a/core/src/main/java/io/questdb/client/impl/SenderSlot.java b/core/src/main/java/io/questdb/client/impl/SenderSlot.java new file mode 100644 index 00000000..19c93671 --- /dev/null +++ b/core/src/main/java/io/questdb/client/impl/SenderSlot.java @@ -0,0 +1,118 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.impl; + +import io.questdb.client.Sender; + +/** + * One reusable {@link SenderPool} slot: owns a real {@link Sender} delegate, its + * store-and-forward slot index, and the idle/age bookkeeping the pool needs. + * Pre-allocated once per slot and held in the pool's {@code all}/{@code + * available} collections across borrows; it is never handed to callers + * directly. + *

+ * Each borrow wraps the slot in a fresh {@link PooledSender} stamped with the + * slot's current lease {@link #generation}. Because the slot is shared across + * borrows, a stale handle's {@code close()} or data write must not release, or + * write through, a slot a later borrower now owns. The generation -- mutated + * only under the pool lock when the slot is handed out and returned -- is what + * lets {@link #live(long)} and {@link SenderPool#giveBack}/{@link + * SenderPool#discardBroken} detect and drop such stale calls. This is the + * ingest-side mirror of the egress {@code QueryWorker} generation guard. + */ +final class SenderSlot { + + private final long createdAtMillis; + private final Sender delegate; + private final SenderPool pool; + private final int slotIndex; + // Monotonic lease id. Mutated only under the SenderPool lock (bumped in + // borrow() when the slot is handed out and in giveBack()/discardBroken() + // when it is returned). A PooledSender wrapper captures it live for its + // borrow; once the slot is released or re-borrowed the captured id no + // longer matches. Volatile so a stale handle on another thread observes + // the latest value without taking the pool lock. + private volatile long generation; + private volatile long idleSinceMillis; + + SenderSlot(Sender delegate, SenderPool pool, int slotIndex) { + this.delegate = delegate; + this.pool = pool; + this.slotIndex = slotIndex; + this.createdAtMillis = System.currentTimeMillis(); + this.idleSinceMillis = this.createdAtMillis; + } + + /** + * Advances the lease generation. Called by {@link SenderPool} under the + * pool lock when the slot is handed out (borrow) and when it is returned + * (giveBack/discardBroken). + */ + void bumpGeneration() { + generation++; + } + + long createdAtMillis() { + return createdAtMillis; + } + + Sender delegate() { + return delegate; + } + + long generation() { + return generation; + } + + long idleSinceMillis() { + return idleSinceMillis; + } + + /** + * Validates the borrowing lease's {@code gen} and returns the underlying + * delegate for a data-plane call. Throws if the lease is stale (the slot + * was returned to the pool or re-borrowed), so a stale handle cannot write + * into a slot a later borrower owns. Called by {@link PooledSender} on + * every operation. + */ + Sender live(long gen) { + if (gen != generation) { + throw new IllegalStateException("sender handle is closed (returned to the pool)"); + } + return delegate; + } + + void markIdleAt(long nowMillis) { + idleSinceMillis = nowMillis; + } + + SenderPool pool() { + return pool; + } + + int slotIndex() { + return slotIndex; + } +} diff --git a/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java b/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java index 4d363fbb..3a4d1503 100644 --- a/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java +++ b/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java @@ -307,91 +307,13 @@ public int send(long bufferPtr, int bufferLen) { } @Override - public void startTlsSession(CharSequence peerName) throws TlsSessionInitFailedException { + public void startTlsSession(CharSequence peerName, SocketReadinessWaiter waiter) throws TlsSessionInitFailedException { assert state == STATE_PLAINTEXT; prepareInternalBuffers(); try { this.sslEngine = createSslEngine(peerName); this.sslEngine.beginHandshake(); - SSLEngineResult.HandshakeStatus handshakeStatus = sslEngine.getHandshakeStatus(); - while (handshakeStatus != SSLEngineResult.HandshakeStatus.FINISHED) { - switch (handshakeStatus) { - case NEED_TASK: - Runnable task; - while ((task = sslEngine.getDelegatedTask()) != null) { - task.run(); - } - handshakeStatus = sslEngine.getHandshakeStatus(); - break; - case NEED_WRAP: { - SSLEngineResult result = sslEngine.wrap(wrapInputBuffer, wrapOutputBuffer); - handshakeStatus = result.getHandshakeStatus(); - switch (result.getStatus()) { - case BUFFER_UNDERFLOW: - // there cannot be underflow since wrap() during handshake does not read from the input buffer at all - throw new AssertionError("Buffer underflow during TLS handshake. This should not happen. please report as a bug"); - case BUFFER_OVERFLOW: - if (wrapOutputBuffer.position() != 0) { - // wrap() left bytes behind without producing a complete record. The OK - // branch is the only place that drains and clears, so a non-empty - // buffer here means we would re-enter NEED_WRAP with identical state - // and spin forever. Fail loudly instead. - throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug"); - } - // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record, - // but that would indicate our starting buffer is too small. - growWrapOutputBuffer(); - break; - case OK: - // wrapOutputBuffer: write mode - int written = 0; - int bufferLimit = wrapOutputBuffer.position(); - while (written < bufferLimit) { - int n = delegate.send(wrapOutputBufferPtr + written, bufferLimit - written); - if (n < 0) { - throw TlsSessionInitFailedException.instance("socket write error"); - } - written += n; - } - wrapOutputBuffer.clear(); - break; - case CLOSED: - throw TlsSessionInitFailedException.instance("server closed connection unexpectedly"); - } - break; - } - case NEED_UNWRAP: { - int n = readFromSocket(); - if (n < 0) { - throw TlsSessionInitFailedException.instance("socket read error"); - } - SSLEngineResult result = sslEngine.unwrap(unwrapInputBuffer, unwrapOutputBuffer); - handshakeStatus = result.getHandshakeStatus(); - switch (result.getStatus()) { - case BUFFER_UNDERFLOW: - // we need to receive more data from a socket, let's try again - break; - case BUFFER_OVERFLOW: - if (unwrapOutputBuffer.position() != 0) { - // unwrap() produced plaintext but signalled overflow without consuming - // the next record. Nothing in the handshake loop drains this buffer, - // so re-entering NEED_UNWRAP would spin forever. Fail loudly. - throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug"); - } - // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record, - // but that would indicate our starting buffer is too small. - growUnwrapOutputBuffer(); - break; - case OK: - // good, let's see what we need to do next - break; - case CLOSED: - throw TlsSessionInitFailedException.instance("server closed connection unexpectedly"); - } - } - break; - } - } + runHandshake(waiter); // unwrap input buffer: read mode and empty unwrapInputBuffer.position(0); unwrapInputBuffer.limit(0); @@ -583,6 +505,108 @@ private int readFromSocket() { return n; } + /** + * Drives the TLS handshake state machine to completion. When the + * non-blocking socket would block, hands control to {@code waiter} (which + * parks on the event loop bounded by the connect deadline) instead of + * busy-spinning on read/write. Extracted from {@link #startTlsSession} so a + * stub {@code sslEngine} can exercise the wait paths in isolation. + */ + private void runHandshake(SocketReadinessWaiter waiter) throws SSLException, TlsSessionInitFailedException { + SSLEngineResult.HandshakeStatus handshakeStatus = sslEngine.getHandshakeStatus(); + while (handshakeStatus != SSLEngineResult.HandshakeStatus.FINISHED) { + switch (handshakeStatus) { + case NEED_TASK: + Runnable task; + while ((task = sslEngine.getDelegatedTask()) != null) { + task.run(); + } + handshakeStatus = sslEngine.getHandshakeStatus(); + break; + case NEED_WRAP: { + SSLEngineResult result = sslEngine.wrap(wrapInputBuffer, wrapOutputBuffer); + handshakeStatus = result.getHandshakeStatus(); + switch (result.getStatus()) { + case BUFFER_UNDERFLOW: + // there cannot be underflow since wrap() during handshake does not read from the input buffer at all + throw new AssertionError("Buffer underflow during TLS handshake. This should not happen. please report as a bug"); + case BUFFER_OVERFLOW: + if (wrapOutputBuffer.position() != 0) { + // wrap() left bytes behind without producing a complete record. The OK + // branch is the only place that drains and clears, so a non-empty + // buffer here means we would re-enter NEED_WRAP with identical state + // and spin forever. Fail loudly instead. + throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug"); + } + // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record, + // but that would indicate our starting buffer is too small. + growWrapOutputBuffer(); + break; + case OK: + // wrapOutputBuffer: write mode + int written = 0; + int bufferLimit = wrapOutputBuffer.position(); + while (written < bufferLimit) { + int n = delegate.send(wrapOutputBufferPtr + written, bufferLimit - written); + if (n < 0) { + throw TlsSessionInitFailedException.instance("socket write error"); + } + if (n == 0) { + // The non-blocking socket's send buffer is full. Wait for it to + // become writable -- bounded by the connect deadline -- instead of + // busy-spinning on send(). + waiter.awaitReady(IOOperation.WRITE); + } + written += n; + } + wrapOutputBuffer.clear(); + break; + case CLOSED: + throw TlsSessionInitFailedException.instance("server closed connection unexpectedly"); + } + break; + } + case NEED_UNWRAP: { + int n = readFromSocket(); + if (n < 0) { + throw TlsSessionInitFailedException.instance("socket read error"); + } + SSLEngineResult result = sslEngine.unwrap(unwrapInputBuffer, unwrapOutputBuffer); + handshakeStatus = result.getHandshakeStatus(); + switch (result.getStatus()) { + case BUFFER_UNDERFLOW: + // Not enough bytes for a complete TLS record yet. If the last read + // drained the socket (n == 0, would-block on the non-blocking fd), wait + // for it to become readable -- bounded by the connect deadline -- instead + // of busy-spinning. A positive n means we read a partial record, so loop + // immediately and read the rest. + if (n == 0) { + waiter.awaitReady(IOOperation.READ); + } + break; + case BUFFER_OVERFLOW: + if (unwrapOutputBuffer.position() != 0) { + // unwrap() produced plaintext but signalled overflow without consuming + // the next record. Nothing in the handshake loop drains this buffer, + // so re-entering NEED_UNWRAP would spin forever. Fail loudly. + throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug"); + } + // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record, + // but that would indicate our starting buffer is too small. + growUnwrapOutputBuffer(); + break; + case OK: + // good, let's see what we need to do next + break; + case CLOSED: + throw TlsSessionInitFailedException.instance("server closed connection unexpectedly"); + } + } + break; + } + } + } + private int writeToSocket(int bytesToSend) { // wrapOutputBuffer is in the write mode int n = delegate.send(wrapOutputBufferPtr, bytesToSend); diff --git a/core/src/main/java/io/questdb/client/network/Net.java b/core/src/main/java/io/questdb/client/network/Net.java index 040a2cb7..f649d330 100644 --- a/core/src/main/java/io/questdb/client/network/Net.java +++ b/core/src/main/java/io/questdb/client/network/Net.java @@ -36,6 +36,11 @@ public final class Net { + // Sentinel returned by connectAddrInfoTimeout when the connect did not + // complete within the supplied budget. Distinct from -1 (generic error) and + // the disconnect codes so callers can flag a timeout without decoding errno. + @SuppressWarnings("unused") + public static final int CONNECT_TIMEOUT = -3; @SuppressWarnings("unused") public static final int EOTHERDISCONNECT = -2; @SuppressWarnings("unused") @@ -88,6 +93,14 @@ public static void configureKeepAlive(int fd) { public static native int connectAddrInfo(int fd, long lpAddrInfo); + /** + * Non-blocking connect bounded by {@code timeoutMillis}. Returns 0 on + * success, {@link #CONNECT_TIMEOUT} on timeout, or -1 on failure (errno set, + * readable via {@link io.questdb.client.std.Os#errno()}). The socket is left + * non-blocking on success. + */ + public static native int connectAddrInfoTimeout(int fd, long lpAddrInfo, int timeoutMillis); + public static void freeAddrInfo(long pAddrInfo) { if (pAddrInfo != 0) { ADDR_INFO_COUNTER.decrementAndGet(); diff --git a/core/src/main/java/io/questdb/client/network/NetworkFacade.java b/core/src/main/java/io/questdb/client/network/NetworkFacade.java index b2e97dad..d23824a5 100644 --- a/core/src/main/java/io/questdb/client/network/NetworkFacade.java +++ b/core/src/main/java/io/questdb/client/network/NetworkFacade.java @@ -27,6 +27,12 @@ import org.slf4j.Logger; public interface NetworkFacade { + /** + * Return value of {@link #connectAddrInfoTimeout(int, long, int)} when the + * connect did not complete within the supplied budget. + */ + int CONNECT_TIMEOUT = Net.CONNECT_TIMEOUT; + int close(int fd); void close(int fd, Logger logger); @@ -39,6 +45,13 @@ public interface NetworkFacade { int connectAddrInfo(int fd, long pAddrInfo); + /** + * Non-blocking connect bounded by {@code timeoutMillis}. Returns 0 on + * success, {@link #CONNECT_TIMEOUT} on timeout, or -1 on failure (with + * {@link #errno()} set). The socket is left non-blocking on success. + */ + int connectAddrInfoTimeout(int fd, long pAddrInfo, int timeoutMillis); + int errno(); void freeAddrInfo(long pAddrInfo); diff --git a/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java b/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java index 11195fc2..64ea0dc7 100644 --- a/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java +++ b/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java @@ -62,6 +62,11 @@ public int connectAddrInfo(int fd, long pAddrInfo) { return Net.connectAddrInfo(fd, pAddrInfo); } + @Override + public int connectAddrInfoTimeout(int fd, long pAddrInfo, int timeoutMillis) { + return Net.connectAddrInfoTimeout(fd, pAddrInfo, timeoutMillis); + } + @Override public int errno() { return Os.errno(); diff --git a/core/src/main/java/io/questdb/client/network/PlainSocket.java b/core/src/main/java/io/questdb/client/network/PlainSocket.java index 06e8c23e..555affd2 100644 --- a/core/src/main/java/io/questdb/client/network/PlainSocket.java +++ b/core/src/main/java/io/questdb/client/network/PlainSocket.java @@ -71,7 +71,7 @@ public int send(long bufferPtr, int bufferLen) { } @Override - public void startTlsSession(CharSequence peerName) { + public void startTlsSession(CharSequence peerName, SocketReadinessWaiter waiter) { throw new UnsupportedOperationException(); } diff --git a/core/src/main/java/io/questdb/client/network/Socket.java b/core/src/main/java/io/questdb/client/network/Socket.java index dec4db4e..0cdce517 100644 --- a/core/src/main/java/io/questdb/client/network/Socket.java +++ b/core/src/main/java/io/questdb/client/network/Socket.java @@ -84,9 +84,12 @@ public interface Socket extends QuietCloseable { * on server connections. * * @param peerName server name to use for SNI and certificate validation. + * @param waiter blocks until the socket is ready for the next handshake + * read/write (bounded by the connect deadline), so the + * handshake does not busy-spin on the non-blocking socket. * @throws TlsSessionInitFailedException if the call fails. */ - void startTlsSession(@Nullable CharSequence peerName) throws TlsSessionInitFailedException; + void startTlsSession(@Nullable CharSequence peerName, SocketReadinessWaiter waiter) throws TlsSessionInitFailedException; /** * @return true if the socket support TLS encryption; false otherwise. diff --git a/core/src/main/java/io/questdb/client/network/SocketReadinessWaiter.java b/core/src/main/java/io/questdb/client/network/SocketReadinessWaiter.java new file mode 100644 index 00000000..8543d3e6 --- /dev/null +++ b/core/src/main/java/io/questdb/client/network/SocketReadinessWaiter.java @@ -0,0 +1,46 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.network; + +/** + * Blocks until a non-blocking socket is ready for a given I/O operation, or + * throws a timeout-flagged exception once the caller's deadline passes. + *

+ * Used to drive the TLS handshake off the client's event loop: instead of + * busy-spinning on a non-blocking socket that returns "would block", the + * handshake hands control to this waiter, which parks on epoll/kqueue/select + * with the remaining connect budget. This bounds the handshake by the same + * deadline as the TCP connect and keeps a stalled peer from pinning a CPU. + */ +@FunctionalInterface +public interface SocketReadinessWaiter { + /** + * Blocks until the socket is ready for {@code ioOperation}, or throws a + * timeout-flagged exception when the connect deadline is exceeded. + * + * @param ioOperation {@link IOOperation#READ} or {@link IOOperation#WRITE} + */ + void awaitReady(int ioOperation); +} diff --git a/core/src/main/resources/io/questdb/client/bin/darwin-aarch64/libquestdb.dylib b/core/src/main/resources/io/questdb/client/bin/darwin-aarch64/libquestdb.dylib deleted file mode 100644 index 82d21e59..00000000 Binary files a/core/src/main/resources/io/questdb/client/bin/darwin-aarch64/libquestdb.dylib and /dev/null differ diff --git a/core/src/main/resources/io/questdb/client/bin/darwin-x86-64/libquestdb.dylib b/core/src/main/resources/io/questdb/client/bin/darwin-x86-64/libquestdb.dylib deleted file mode 100644 index 647a12cb..00000000 Binary files a/core/src/main/resources/io/questdb/client/bin/darwin-x86-64/libquestdb.dylib and /dev/null differ diff --git a/core/src/main/resources/io/questdb/client/bin/linux-aarch64/libquestdb.so b/core/src/main/resources/io/questdb/client/bin/linux-aarch64/libquestdb.so deleted file mode 100644 index 94ad41c1..00000000 Binary files a/core/src/main/resources/io/questdb/client/bin/linux-aarch64/libquestdb.so and /dev/null differ diff --git a/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so b/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so old mode 100644 new mode 100755 index 15c0135d..82797659 Binary files a/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so and b/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so differ diff --git a/core/src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll b/core/src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll deleted file mode 100755 index e95dcecd..00000000 Binary files a/core/src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll and /dev/null differ diff --git a/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java b/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java index 1734360b..5b06513c 100644 --- a/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java +++ b/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java @@ -51,150 +51,50 @@ public void testBuilderCallAfterFromConfigOverridesPoolKeysFromString() { Assert.assertEquals(150L, b.poolConfigSnapshotForTest().get("acquire_timeout_ms")); } - @Test - public void testConflictingIntPoolKeyAcrossSidesRejected() { - // Both sides carry sender_pool_max (an int pool key) with different - // values -> build fails via resolvePoolInt's conflict check. The long - // pool keys are covered by testConflictingPoolKeysAcrossSidesRejected; - // this guards the separate int code path. - try (QuestDB ignored = QuestDB.builder() - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=2;") - .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;sender_pool_max=5;") - .build()) { - Assert.fail("expected conflicting pool config"); - } catch (IllegalArgumentException e) { - Assert.assertTrue(e.getMessage(), e.getMessage().contains("conflicting pool config: sender_pool_max")); - } - } - - @Test - public void testConflictingPoolKeysAcrossSidesRejected() { - // Both sides carry acquire_timeout_ms with different values -> build fails. - try (QuestDB ignored = QuestDB.builder() - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;acquire_timeout_ms=1000;") - .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;acquire_timeout_ms=2000;") - .build()) { - Assert.fail("expected conflicting pool config"); - } catch (IllegalArgumentException e) { - Assert.assertTrue(e.getMessage(), e.getMessage().contains("conflicting pool config: acquire_timeout_ms")); - } - } - - @Test - public void testConnectRejectsNonWsSchemaOnSingleString() { - // QuestDB.connect(single string) must enforce the ws/wss schema, just - // like the builder's fromConfig(). - assertSchemaRejected(() -> QuestDB.connect("http::addr=h:9000;")); - } - - @Test - public void testConnectRejectsNonWsSchemaOnTwoArg() { - // QuestDB.connect(ingest, query) rejects a non-ws schema on either side. - assertSchemaRejected(() -> QuestDB.connect("tcp::addr=h:9009;", "ws::addr=h:9000;")); - assertSchemaRejected(() -> QuestDB.connect("ws::addr=h:9000;", "udp::addr=h:9009;")); - } - @Test public void testConnectSingleStringValidatesAndBuilds() { - // QuestDB.connect(single string) hands the same ws:: string to both the - // ingest and query sides. min=0 on both pools validates both clients - // without connecting, so build() returns a live handle. + // QuestDB.connect(single string) hands the same ws:: cluster string to + // both the ingest and query pools. min=0 on both pools validates both + // clients without connecting, so build() returns a live handle. try (QuestDB ignored = QuestDB.connect( "ws::addr=127.0.0.1:1;sender_pool_min=0;query_pool_min=0;")) { Assert.assertNotNull(ignored); } } - @Test - public void testConnectStringWithPoolKeysAppliedToBuilder() { - // Pool keys supplied via separate ingest/query strings are accepted; - // min=0 so nothing connects. - try (QuestDB ignored = QuestDB.builder() - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=1;") - .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;query_pool_max=1;") - .build()) { - Assert.assertNotNull(ignored); - } - } - - @Test - public void testConnectTwoArgValidatesAndBuilds() { - // QuestDB.connect(ingest, query) sets the two sides independently; - // min=0 on each validates both clients without connecting. - try (QuestDB ignored = QuestDB.connect( - "ws::addr=127.0.0.1:1;sender_pool_min=0;", - "ws::addr=127.0.0.1:1;query_pool_min=0;")) { - Assert.assertNotNull(ignored); - } - } - - @Test - public void testExplicitPoolKeyWinsOverConflictingStrings() { - // The two strings disagree on acquire_timeout_ms, but an explicit builder - // call sets it: explicit wins and the conflict check is skipped, whether - // the explicit call comes after or before the config strings. The resolved - // value is the explicit 500, not either string's value. - QuestDBBuilder after = QuestDB.builder() - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;acquire_timeout_ms=1000;") - .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;acquire_timeout_ms=2000;") - .acquireTimeoutMillis(500); - try (QuestDB ignored = after.build()) { - Assert.assertNotNull(ignored); - } - Assert.assertEquals(500L, after.poolConfigSnapshotForTest().get("acquire_timeout_ms")); - - QuestDBBuilder before = QuestDB.builder() - .acquireTimeoutMillis(500) - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;acquire_timeout_ms=1000;") - .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;acquire_timeout_ms=2000;"); - try (QuestDB ignored = before.build()) { - Assert.assertNotNull(ignored); - } - Assert.assertEquals(500L, before.poolConfigSnapshotForTest().get("acquire_timeout_ms")); - } - - @Test - public void testHttpIngestConfigRejected() { - assertSchemaRejected(() -> QuestDB.builder().ingestConfig("http::addr=h:9000;")); - } - - @Test - public void testHttpSingleConfigRejected() { - assertSchemaRejected(() -> QuestDB.builder().fromConfig("http::addr=h:9000;")); - } - @Test public void testMalformedEgressConfigRejectedAtBuildWithMinZero() { // query_pool_min=0 pre-warms nothing, so build() never constructs a - // QwpQueryClient -- yet it must still reject a malformed query config up - // front via QwpQueryClient.validateConfig, mirroring the ingress side. + // QwpQueryClient -- yet it must still reject a malformed egress key in + // the single cluster config up front, mirroring the ingress side. // Covers a typed enum (compression) and a bounded int (compression_level). - assertEgressBuildRejected( - "ws::addr=127.0.0.1:1;compression=gzip;query_pool_min=0;query_pool_max=2;", "compression"); - assertEgressBuildRejected( - "ws::addr=127.0.0.1:1;compression_level=99;query_pool_min=0;query_pool_max=2;", "compression_level"); + assertBuildRejected( + "ws::addr=127.0.0.1:1;compression=gzip;sender_pool_min=0;query_pool_min=0;query_pool_max=2;", + "compression"); + assertBuildRejected( + "ws::addr=127.0.0.1:1;compression_level=99;sender_pool_min=0;query_pool_min=0;query_pool_max=2;", + "compression_level"); } @Test public void testMalformedIngressConfigRejectedAtBuildWithMinZero() { // sender_pool_min=0 pre-warms nothing, so build() never constructs a - // Sender -- yet it must still reject a malformed ingest config up front, - // matching the egress side. Covers a typed enum (tls_verify), a + // Sender -- yet it must still reject a malformed ingress key in the + // single cluster config up front. Covers a typed enum (tls_verify), a // registry-STRING value that only the real Sender parse validates - // (auto_flush_rows), and WebSocket build-time checks that only the full - // no-connect validation reaches: auto_flush=off and auto_flush_interval=off - // both disable auto-flush (unsupported on WebSocket), and sf_durability=flush - // is not yet supported. - assertIngressBuildRejected( - "wss::addr=127.0.0.1:1;tls_verify=strict;sender_pool_min=0;sender_pool_max=2;", "tls_verify"); - assertIngressBuildRejected( - "ws::addr=127.0.0.1:1;auto_flush_rows=abc;sender_pool_min=0;sender_pool_max=2;", "auto_flush_rows"); - assertIngressBuildRejected( - "ws::addr=127.0.0.1:1;auto_flush_interval=off;sender_pool_min=0;sender_pool_max=2;", "auto-flush"); - assertIngressBuildRejected( - "ws::addr=127.0.0.1:1;auto_flush=off;sender_pool_min=0;sender_pool_max=2;", "auto-flush"); - assertIngressBuildRejected( - "ws::addr=127.0.0.1:1;sf_durability=flush;sender_pool_min=0;sender_pool_max=2;", "not yet supported"); + // (auto_flush_rows), and WebSocket build-time checks: auto_flush=off and + // auto_flush_interval=off both disable auto-flush (unsupported on + // WebSocket), and sf_durability=flush is not yet supported. + assertBuildRejected( + "wss::addr=127.0.0.1:1;tls_verify=strict;sender_pool_min=0;query_pool_min=0;", "tls_verify"); + assertBuildRejected( + "ws::addr=127.0.0.1:1;auto_flush_rows=abc;sender_pool_min=0;query_pool_min=0;", "auto_flush_rows"); + assertBuildRejected( + "ws::addr=127.0.0.1:1;auto_flush_interval=off;sender_pool_min=0;query_pool_min=0;", "auto-flush"); + assertBuildRejected( + "ws::addr=127.0.0.1:1;auto_flush=off;sender_pool_min=0;query_pool_min=0;", "auto-flush"); + assertBuildRejected( + "ws::addr=127.0.0.1:1;sf_durability=flush;sender_pool_min=0;query_pool_min=0;", "not yet supported"); } @Test @@ -212,22 +112,12 @@ public void testMalformedPoolValueRejectedAtBuild() { } @Test - public void testMissingIngestConfigThrows() { - try { - QuestDB.builder().queryConfig("ws::addr=h:9000;").build().close(); - Assert.fail(); - } catch (IllegalStateException e) { - Assert.assertTrue(e.getMessage().contains("ingest")); - } - } - - @Test - public void testMissingQueryConfigThrows() { + public void testMissingConfigThrows() { try { - QuestDB.builder().ingestConfig("ws::addr=h:9000;").build().close(); + QuestDB.builder().build().close(); Assert.fail(); } catch (IllegalStateException e) { - Assert.assertTrue(e.getMessage().contains("query")); + Assert.assertTrue(e.getMessage(), e.getMessage().contains("configuration")); } } @@ -254,26 +144,37 @@ public void testNegativePoolSizesRejected() { } } + @Test + public void testNonWsSchemaRejected() { + // The single cluster config (and QuestDB.connect) must use ws/wss. + assertSchemaRejected(() -> QuestDB.builder().fromConfig("http::addr=h:9000;")); + assertSchemaRejected(() -> QuestDB.builder().fromConfig("tcp::addr=h:9009;")); + assertSchemaRejected(() -> QuestDB.builder().fromConfig("udp::addr=h:9009;")); + assertSchemaRejected(() -> QuestDB.connect("http::addr=h:9000;").close()); + } + @Test public void testQueryPoolBuildFailureUnwindsSenderPool() throws Exception { - // Sender pool builds against a healthy ws ingest endpoint; the query - // pool fails on a dead address. The handle must close the already-built - // sender pool (its connected senders) rather than leak them. - try (TestWebSocketServer ingest = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() { + // One server, one cluster config: the server accepts ingest write-path + // upgrades but rejects egress read-path upgrades, so the sender pool + // connects while the query pool's connect fails. The failed build() must + // close the already-built sender pool (its connected senders) rather than + // leak them. + try (TestWebSocketServer server = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() { })) { - ingest.start(); - Assert.assertTrue(ingest.awaitStart(5, TimeUnit.SECONDS)); - int port = ingest.getPort(); + server.setRejectReadUpgrade(true); + server.start(); + Assert.assertTrue(server.awaitStart(5, TimeUnit.SECONDS)); + int port = server.getPort(); try { QuestDB.builder() - .ingestConfig("ws::addr=localhost:" + port + ";") - .queryConfig("ws::addr=127.0.0.1:1;auth_timeout_ms=200;") + .fromConfig("ws::addr=localhost:" + port + ";auth_timeout_ms=200;") .senderPoolSize(2) .queryPoolSize(2) .acquireTimeoutMillis(500) .build() .close(); - Assert.fail("expected build to fail when query pool cannot connect"); + Assert.fail("expected build to fail when the query pool cannot connect"); } catch (RuntimeException expected) { // The exact exception comes from QwpQueryClient.connect(). The // build failing only proves the query pool gave up; the @@ -284,75 +185,51 @@ public void testQueryPoolBuildFailureUnwindsSenderPool() throws Exception { // saw two ingest handshakes (proving the senders connected and the // assertion below is not vacuous)... awaitTrue("sender pool should have connected two ingest senders", - () -> ingest.handshakeCount() >= 2); + () -> server.handshakeCount() >= 2); // ...and the failed build() must have closed every one of them, so // no sender connection is left live on the server. The server // observes the client-side socket close asynchronously, so poll. awaitTrue("failed build() must close the already-built sender pool, leaving no live connection", - () -> ingest.liveConnectionCount() == 0); - } - } - - @Test - public void testSamePoolKeyValueAcrossSidesOk() { - // The same key at the same value on both sides builds cleanly. - try (QuestDB ignored = QuestDB.builder() - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;query_pool_min=0;acquire_timeout_ms=1500;") - .queryConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;query_pool_min=0;acquire_timeout_ms=1500;") - .build()) { - Assert.assertNotNull(ignored); + () -> server.liveConnectionCount() == 0); } } @Test public void testSharedVocabularyConnectsBothPoolsLive() throws Exception { - // The headline use case: one connect-string vocabulary carrying BOTH + // The headline use case: one cluster connect-string carrying BOTH // ingress-only keys (auto_flush_rows, sender_id) and egress-only keys - // (compression, max_batch_rows, target, failover) drives both LIVE - // clients through the facade -- each side applies the keys it owns and - // silently ignores the rest. Other tests cover this validate-only - // (min=0) or on a single side; this one pre-warms min=1 so both pools - // actually connect. - // - // The mock serves ingest (ACK) and query (SERVER_INFO) semantics on - // separate sockets, so ingest and query connect to separate servers. A - // single ws:: address serving both is exercised end-to-end against a - // real server in the parent repo. - try (TestWebSocketServer ingest = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() { - }); - TestWebSocketServer query = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() { - })) { - ingest.start(); - query.setSendServerInfo(true); // the egress client's connect() waits for SERVER_INFO - query.start(); - Assert.assertTrue(ingest.awaitStart(5, TimeUnit.SECONDS)); - Assert.assertTrue(query.awaitStart(5, TimeUnit.SECONDS)); - - // Identical vocabulary on both sides, differing only in addr -- the - // same mixed key set a single-string connect() would hand to both - // clients. The pool keys carry the same value on both sides, so the - // builder's cross-string conflict check passes. - String shared = "auto_flush_rows=100;sender_id=probe-1;" // ingress-only - + "compression=auto;max_batch_rows=512;target=any;failover=off;" // egress-only - + "auth_timeout_ms=2000;" // COMMON + // (compression, max_batch_rows, target, failover) drives both LIVE pools + // -- each side applies the keys it owns and silently ignores the rest. + // One mock server serves both: an ACK stream on the ingest write path and + // a SERVER_INFO frame on the egress read path (the read path is gated so + // the ingest connection's ACK stream is never disturbed). + try (TestWebSocketServer server = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() { + })) { + server.setSendServerInfo(true); // the egress client's connect() waits for SERVER_INFO + server.start(); + Assert.assertTrue(server.awaitStart(5, TimeUnit.SECONDS)); + + // A single cluster config carrying the mixed key set. The pools + // pre-warm min=1, so the shared vocabulary connects a live sender AND + // a live query client, not merely validates. + String cfg = "ws::addr=localhost:" + server.getPort() + ";" + + "auto_flush_rows=100;sender_id=probe-1;" // ingress-only + + "compression=auto;max_batch_rows=512;target=any;failover=off;" // egress-only + + "auth_timeout_ms=2000;" // common + "sender_pool_min=1;sender_pool_max=2;query_pool_min=1;query_pool_max=2;"; // pool - try (QuestDB db = QuestDB.builder() - .ingestConfig("ws::addr=localhost:" + ingest.getPort() + ";" + shared) - .queryConfig("ws::addr=localhost:" + query.getPort() + ";" + shared) - .build()) { - // build() returned, so both pools pre-warmed their min=1 slot: - // the shared vocabulary connected a live sender AND a live query - // client, not merely validated. + try (QuestDB db = QuestDB.builder().fromConfig(cfg).build()) { Assert.assertNotNull(db.borrowSender()); - Assert.assertNotNull(db.query()); + try (io.questdb.client.Query q = db.borrowQuery()) { + Assert.assertNotNull(q); + } } } } @Test public void testSharedWsConfigWithPoolKeys() { - // A shared ws:: string carries pool keys; min=0 so build does only - // parse-only validation (no connect). + // A cluster ws:: string carries pool keys for both pools; min=0 so build + // does only parse-only validation (no connect). try (QuestDB ignored = QuestDB.builder() .fromConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=3;" + "query_pool_min=0;query_pool_max=2;acquire_timeout_ms=1234;") @@ -361,41 +238,13 @@ public void testSharedWsConfigWithPoolKeys() { } } - @Test - public void testTcpIngestConfigRejected() { - assertSchemaRejected(() -> QuestDB.builder().ingestConfig("tcp::addr=h:9009;")); - } - - @Test - public void testUdpIngestConfigRejected() { - assertSchemaRejected(() -> QuestDB.builder().queryConfig("udp::addr=h:9009;")); - } - - private static void assertEgressBuildRejected(String query, String expectedFragment) { - try { - QuestDB.builder() - .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=2;") - .queryConfig(query) - .build() - .close(); - Assert.fail("expected build() to reject the malformed query config: " + query); - } catch (RuntimeException e) { - Assert.assertNotNull(e.getMessage()); - Assert.assertTrue(e.getMessage(), e.getMessage().contains(expectedFragment)); - } - } - - private static void assertIngressBuildRejected(String ingest, String expectedFragment) { + private static void assertBuildRejected(String config, String expectedFragment) { try { - QuestDB.builder() - .ingestConfig(ingest) - .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;query_pool_max=2;") - .build() - .close(); - Assert.fail("expected build() to reject the malformed ingest config: " + ingest); + QuestDB.builder().fromConfig(config).build().close(); + Assert.fail("expected build() to reject the malformed config: " + config); } catch (RuntimeException e) { - // Ingress value errors surface as LineSenderException; both it and the - // egress IllegalArgumentException are RuntimeException. + // Ingress value errors surface as LineSenderException; egress errors + // as IllegalArgumentException -- both are RuntimeException. Assert.assertNotNull(e.getMessage()); Assert.assertTrue(e.getMessage(), e.getMessage().contains(expectedFragment)); } diff --git a/core/src/test/java/io/questdb/client/test/QuestDBFacadeCallbacksTest.java b/core/src/test/java/io/questdb/client/test/QuestDBFacadeCallbacksTest.java new file mode 100644 index 00000000..93ecb9be --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/QuestDBFacadeCallbacksTest.java @@ -0,0 +1,122 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test; + +import io.questdb.client.QuestDB; +import io.questdb.client.SenderConnectionEvent; +import io.questdb.client.SenderConnectionListener; +import io.questdb.client.SenderError; +import io.questdb.client.SenderErrorHandler; +import io.questdb.client.test.cutlass.qwp.client.TestPorts; +import org.jetbrains.annotations.NotNull; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Proves the ingest-side async callbacks exposed on the {@link QuestDB} facade + * ({@link io.questdb.client.QuestDBBuilder#errorHandler}/{@code connectionListener}) + * actually reach the pooled {@link io.questdb.client.Sender}s -- not merely the + * lower-level {@code Sender.builder()}. + *

+ * Each test eagerly prewarms one ingest sender ({@code sender_pool_min=1}) + * pointed at a dead port in {@code initial_connect_retry=async} mode with a + * tight reconnect budget: the pool's I/O thread exhausts the budget in the + * background and surfaces the failure through whichever facade-wired callback is + * under test. No server is required. + */ +public class QuestDBFacadeCallbacksTest { + + @Test + public void testFacadeConnectionListenerReceivesEvents() throws Exception { + int port = TestPorts.findUnusedPort(); + CountDownLatch sawEvent = new CountDownLatch(1); + SenderConnectionListener listener = new SenderConnectionListener() { + @Override + public void onEvent(@NotNull SenderConnectionEvent event) { + sawEvent.countDown(); + } + }; + try (QuestDB ignored = QuestDB.builder() + .fromConfig(config(port)) + .connectionListener(listener) + .build()) { + Assert.assertTrue( + "facade-wired connectionListener must observe at least one connection event", + sawEvent.await(5, TimeUnit.SECONDS)); + } + } + + @Test + public void testFacadeErrorHandlerReceivesAsyncIngestError() throws Exception { + int port = TestPorts.findUnusedPort(); + ErrorInbox inbox = new ErrorInbox(); + try (QuestDB ignored = QuestDB.builder() + .fromConfig(config(port)) + .errorHandler(inbox) + .build()) { + Assert.assertTrue( + "facade-wired errorHandler must receive the async budget-exhaustion SenderError", + inbox.await(5, TimeUnit.SECONDS)); + Assert.assertNotNull("a SenderError must be delivered", inbox.get()); + } + } + + // One cluster config drives both pools. Eagerly prewarm one sender + // (sender_pool_min=1) so build() exercises the production + // buildManagedSlotSender path that applies the facade callbacks; async + a + // tight budget -> the I/O thread fails fast against the dead port. + // query_pool_min=0 -> the query pool never connects, so the test is isolated + // to the ingest callbacks. + private static String config(int port) { + return "ws::addr=localhost:" + port + ";sender_pool_min=1;sender_pool_max=1" + + ";query_pool_min=0;query_pool_max=1" + + ";initial_connect_retry=async;reconnect_max_duration_millis=400" + + ";reconnect_initial_backoff_millis=10;reconnect_max_backoff_millis=50" + + ";close_flush_timeout_millis=0;"; + } + + private static final class ErrorInbox implements SenderErrorHandler { + private final CountDownLatch latch = new CountDownLatch(1); + private final AtomicReference first = new AtomicReference<>(); + + boolean await(long timeout, TimeUnit unit) throws InterruptedException { + return latch.await(timeout, unit); + } + + SenderError get() { + return first.get(); + } + + @Override + public void onError(@NotNull SenderError error) { + first.compareAndSet(null, error); + latch.countDown(); + } + } +} diff --git a/core/src/test/java/io/questdb/client/test/QuestDBLazyConnectTest.java b/core/src/test/java/io/questdb/client/test/QuestDBLazyConnectTest.java new file mode 100644 index 00000000..47dd5fa8 --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/QuestDBLazyConnectTest.java @@ -0,0 +1,150 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test; + +import io.questdb.client.QuestDB; +import io.questdb.client.QuestDBBuilder; +import io.questdb.client.Sender; +import io.questdb.client.test.cutlass.qwp.client.TestPorts; +import org.junit.Assert; +import org.junit.Test; + +/** + * {@code lazy_connect=true} makes a {@link QuestDB} facade tolerate the server + * being down at startup without disabling reads: the ingest side + * connects asynchronously (writes buffer until the wire is up) and the read pool + * connects lazily on first use. Reads stay enabled and connect once the server + * is up (the recovery lifecycle is covered end-to-end by + * {@link QuestDBServerRecoveryTest}). + *

+ * Because both sides must start non-blocking, a knob that forces a blocking / + * fail-fast startup ({@code initial_connect_retry} other than {@code async}, or + * an explicit {@code query_pool_min > 0}) is a configuration conflict and is + * rejected up front with a clear remedy. + */ +public class QuestDBLazyConnectTest { + + @Test(timeout = 30_000) + public void testLazyConnectStartsAndWritesWhileServerDown() { + int port = TestPorts.findUnusedPort(); + // No server at `port`, sender_pool_min defaults to 1, and the only + // resilience knob is lazy_connect=true. (a) build() must return promptly + // -- the read pool defaults to min=0 and the ingest side goes async, so + // neither side fail-fasts -- and (b) a write must buffer without throwing. + try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port + + ";lazy_connect=true;reconnect_max_duration_millis=200" + + ";reconnect_initial_backoff_millis=10;reconnect_max_backoff_millis=50" + + ";close_flush_timeout_millis=0;")) { + Sender sender = db.borrowSender(); + Assert.assertNotNull("a sender must be available with no server present", sender); + sender.table("t").longColumn("v", 1L).atNow(); + } + } + + @Test(timeout = 30_000) + public void testLazyConnectKeepsReadsEnabledWhileServerDown() { + int port = TestPorts.findUnusedPort(); + // Reads are ENABLED, just deferred: under lazy_connect the read pool + // defaults to min=0, so build() does not eagerly connect or fail-fast + // while the server is down. The read client connects lazily on the + // first borrowQuery() once the server is up (covered end-to-end by + // QuestDBServerRecoveryTest). This is the whole point of lazy_connect + // over the old write-only mode, which disabled reads outright. + try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port + + ";lazy_connect=true;close_flush_timeout_millis=0;")) { + Assert.assertNotNull("the handle must build read-enabled while the server is down", db); + } + } + + @Test + public void testLazyConnectAcceptsOnAndAllowsExplicitAsync() { + int port = TestPorts.findUnusedPort(); + // lazy_connect accepts on/off as well as true/false, and an explicit + // initial_connect_retry=async is consistent with it (no conflict). + try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port + + ";lazy_connect=on;initial_connect_retry=async;query_pool_min=0" + + ";close_flush_timeout_millis=0;")) { + Assert.assertNotNull(db); + } + } + + @Test + public void testLazyConnectConflictsWithBlockingInitialConnectRetry() { + // off/false (OFF) and on/true/sync (SYNC) all block or fail-fast at + // startup, so each conflicts with lazy_connect and must be rejected with + // a clear remedy. + assertLazyConflict("initial_connect_retry=off", "initial_connect_retry", "async"); + assertLazyConflict("initial_connect_retry=sync", "initial_connect_retry", "async"); + assertLazyConflict("initial_connect_retry=on", "initial_connect_retry", "async"); + } + + @Test + public void testLazyConnectConflictsWithExplicitQueryPoolMinInConfig() { + // An explicit query_pool_min > 0 makes the read pool eagerly fail-fast at + // startup, contradicting lazy_connect. + assertLazyConflict("query_pool_min=1", "query_pool_min", "0"); + assertLazyConflict("query_pool_min=2", "query_pool_min", "0"); + // query_pool_min=0 is exactly what lazy_connect wants -- no conflict. + int port = TestPorts.findUnusedPort(); + try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port + + ";lazy_connect=true;query_pool_min=0;close_flush_timeout_millis=0;")) { + Assert.assertNotNull(db); + } + } + + @Test + public void testLazyConnectConflictsWithExplicitQueryPoolMinFromBuilder() { + // The conflict also fires when query_pool_min > 0 comes from an explicit + // builder call (queryPoolMin / queryPoolSize), not just the connect string. + int port = TestPorts.findUnusedPort(); + assertLazyConflict(QuestDB.builder() + .fromConfig("ws::addr=localhost:" + port + ";lazy_connect=true;close_flush_timeout_millis=0;") + .queryPoolMin(1), "query_pool_min", "0"); + assertLazyConflict(QuestDB.builder() + .fromConfig("ws::addr=localhost:" + port + ";lazy_connect=true;close_flush_timeout_millis=0;") + .queryPoolSize(2), "query_pool_min", "0"); + } + + private static void assertLazyConflict(String extraKeys, String... expectedFragments) { + int port = TestPorts.findUnusedPort(); + assertLazyConflict(QuestDB.builder().fromConfig("ws::addr=localhost:" + port + + ";lazy_connect=true;" + extraKeys + ";close_flush_timeout_millis=0;"), expectedFragments); + } + + private static void assertLazyConflict(QuestDBBuilder builder, String... expectedFragments) { + try { + builder.build().close(); + Assert.fail("expected lazy_connect configuration conflict"); + } catch (IllegalArgumentException e) { + String msg = e.getMessage(); + Assert.assertNotNull(msg); + Assert.assertTrue(msg, msg.contains("lazy_connect")); + for (int i = 0; i < expectedFragments.length; i++) { + Assert.assertTrue("'" + msg + "' should mention '" + expectedFragments[i] + "'", + msg.contains(expectedFragments[i])); + } + } + } +} diff --git a/core/src/test/java/io/questdb/client/test/QuestDBServerRecoveryTest.java b/core/src/test/java/io/questdb/client/test/QuestDBServerRecoveryTest.java new file mode 100644 index 00000000..c68be090 --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/QuestDBServerRecoveryTest.java @@ -0,0 +1,114 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test; + +import io.questdb.client.QuestDB; +import io.questdb.client.Sender; +import io.questdb.client.test.cutlass.qwp.websocket.TestWebSocketServer; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.TimeUnit; +import java.util.function.BooleanSupplier; + +/** + * End-to-end resilience: the facade starts with the server down, the producer + * keeps writing (buffered), and once the server comes up the write side + * reconnects and the read side -- previously deferred so it could not fail-fast + * the build -- can connect. + *

+ * The mock cannot answer a real SELECT (result frames are exercised against a + * real server in the parent repo), so the read step asserts the query client + * connects once the server is up, not the row contents. + */ +public class QuestDBServerRecoveryTest { + + @Test(timeout = 60_000) + public void testFacadeStartsWhileServerDownThenWritesAndReaderConnectsOnRecovery() throws Exception { + // One mock server (the whole "cluster"), bound so the port is known but + // NOT accepting yet: the address is reachable but no WebSocket upgrade + // completes, so the server is effectively "down". It serves ingest ACK + // on the write path and a SERVER_INFO frame on the read path -- the read + // path is gated so the ingest connection's ACK stream is never disturbed. + try (TestWebSocketServer server = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() { + })) { + server.setSendServerInfo(true); // the egress client's connect() waits for SERVER_INFO + // One cluster config drives both pools: + // lazy_connect=true expands to exactly this resilience: the ingest + // side goes async (the producer never blocks; writes buffer until the + // wire is up) and the read pool defaults to min=0 (the otherwise + // fail-fast reader never sinks the build while the server is down, + // and connects lazily on the first query). + String cfg = "ws::addr=localhost:" + server.getPort() + + ";lazy_connect=true" + + ";sender_pool_min=1;sender_pool_max=1;query_pool_max=1" + + ";auth_timeout_ms=2000;reconnect_initial_backoff_millis=20" + + ";reconnect_max_backoff_millis=100;reconnect_max_duration_millis=600000" + + ";close_flush_timeout_millis=1000;"; + + // (1) server down + (2) client starts: + try (QuestDB db = QuestDB.builder().fromConfig(cfg).build()) { + Assert.assertEquals("no handshake while the server is down", 0, server.handshakeCount()); + + // lazy_connect keeps reads ENABLED, just deferred: the read pool + // defaults to min=0, so nothing connects while the server is + // down. The read client connects lazily on the first + // borrowQuery() once the server is up (step 5). + + // (3) client writes -> buffers in the cursor SF engine; the call + // must not throw even though the server is down. + Sender sender = db.borrowSender(); + sender.table("t").longColumn("v", 1L).atNow(); + + // (4) server starts: + server.start(); + Assert.assertTrue(server.awaitStart(5, TimeUnit.SECONDS)); + + // The write side reconnects on its own once the server is up. + awaitTrue("ingest must connect after the server comes up", + () -> server.handshakeCount() >= 1); + + // (5) client can now read: the deferred reader connects on the + // first borrowQuery() (the mock does not serve rows, so we + // assert the connection, not the result). + int handshakesBeforeQuery = server.handshakeCount(); + db.borrowQuery().close(); + awaitTrue("query client must connect after the server comes up", + () -> server.handshakeCount() >= handshakesBeforeQuery + 1); + } + } + } + + private static void awaitTrue(String message, BooleanSupplier condition) throws InterruptedException { + long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(15); + while (System.nanoTime() < deadline) { + if (condition.getAsBoolean()) { + return; + } + Thread.sleep(20); + } + Assert.assertTrue(message, condition.getAsBoolean()); + } +} diff --git a/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java b/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java index cf121d8c..7dc2810b 100644 --- a/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java +++ b/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java @@ -31,6 +31,7 @@ import io.questdb.client.cutlass.http.client.WebSocketSendBuffer; import io.questdb.client.network.PlainSocketFactory; import io.questdb.client.network.Socket; +import io.questdb.client.network.SocketReadinessWaiter; import org.junit.Assert; import org.junit.Test; @@ -263,7 +264,7 @@ public int send(long bufferPtr, int bufferLen) { } @Override - public void startTlsSession(CharSequence peerName) { + public void startTlsSession(CharSequence peerName, SocketReadinessWaiter waiter) { throw new UnsupportedOperationException(); } diff --git a/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/QwpQueryClientConnectTimeoutTest.java b/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/QwpQueryClientConnectTimeoutTest.java new file mode 100644 index 00000000..e0435b72 --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/QwpQueryClientConnectTimeoutTest.java @@ -0,0 +1,88 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test.cutlass.qwp.client; + +import io.questdb.client.cutlass.http.client.HttpClientException; +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Test; + +public class QwpQueryClientConnectTimeoutTest { + + /** + * A connect-phase timeout must be reported as a connect_timeout failure, not + * relabeled as an "exceeded auth_timeout" overage. + *

+ * {@code QwpQueryClient.runUpgradeWithTimeout} used to wrap the {@code connect()} + * and {@code upgrade()} calls in one try block, so the timeout-flagged exception + * thrown by the (in-diff) connect_timeout path was caught by the {@code isTimeout()} + * branch intended for upgrade() and rewritten with the (much larger, and wrong) + * auth_timeout value -- e.g. a connect that bailed after 500 ms reported + * "exceeded auth_timeout=15000ms". The ingest side never had this because it + * routes through {@code QwpUpgradeFailures.classify}, which leaves the + * connect-timeout exception unmodified. + */ + @Test(timeout = 30_000) + public void testConnectTimeoutNotReportedAsAuthTimeout() { + // 192.0.2.0/24 is TEST-NET-1 (RFC 5737): on a normal network the SYN is + // silently dropped, so the TCP connect stalls and our application-level + // connect_timeout (500 ms) fires -- long before auth_timeout_ms (15000 ms). + // The WebSocket upgrade phase is never reached. + try (QwpQueryClient client = QwpQueryClient.fromConfig( + "ws::addr=192.0.2.1:9009;connect_timeout=500;auth_timeout_ms=15000;failover=off;target=any;")) { + long start = System.currentTimeMillis(); + try { + client.connect(); + Assert.fail("expected connect to fail"); + } catch (HttpClientException ex) { + long elapsed = System.currentTimeMillis() - start; + String msg = ex.getMessage(); + + // The connect_timeout path is only exercised when the runner routes + // TEST-NET-1 into a black hole (dropped SYN). Skip -- rather than + // flake -- on the other two outcomes: + // - no route: a fast ENETUNREACH surfaces as "could not connect". + // - (rare) the host accepts the connect: the upgrade then runs the + // full auth_timeout, so elapsed ~ auth_timeout (>5 s). + // Neither gate keys on the connect-vs-auth label, so neither can mask + // the regression: a black-holed connect always bails at ~500 ms with + // a message that is "connect timed out" (fixed) or "...auth_timeout..." + // (the bug) -- both reach the assertions below. + Assume.assumeFalse("no route to TEST-NET-1 black hole on this runner: " + msg, + msg.contains("could not connect")); + Assume.assumeTrue("TEST-NET-1 is not a black hole on this runner (elapsed=" + elapsed + "ms): " + msg, + elapsed < 5_000); + + // It bailed at connect_timeout=500 ms, nowhere near auth_timeout=15000 ms. + // Regression: name the connect phase, never auth_timeout. + Assert.assertFalse("connect-phase timeout misreported as auth_timeout: " + msg, + msg.contains("auth_timeout")); + Assert.assertTrue("expected a connect-timeout diagnostic, got: " + msg, + msg.contains("connect timed out")); + } + } + } +} diff --git a/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java b/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java index 806d3750..92e4a648 100644 --- a/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java +++ b/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java @@ -83,6 +83,12 @@ public class TestWebSocketServer implements Closeable { // QwpQueryClient tests enable this; ingress sender tests leave it off so their // connections carry only ACK frames. private volatile boolean sendServerInfo; + // When true, the server fails the WebSocket upgrade on the egress read path + // (/read...) by dropping the connection before the 101, while still serving + // the ingest write path (/write...) normally. Lets one server + one cluster + // config drive a build where the sender pool connects but the query pool + // cannot. Set via setRejectReadUpgrade(). + private volatile boolean rejectReadUpgrade; // When non-null the next handshake responds with HTTP 421 Misdirected // Request + X-QuestDB-Role: , mimicking a server whose // QwpServerInfoProvider reports REPLICA / PRIMARY_CATCHUP. Set after @@ -208,6 +214,18 @@ public void setRejectWithRole(String role) { this.rejectingRole = role; } + /** + * When enabled, the server fails the WebSocket upgrade on the egress read + * path ({@code /read/...}) while still serving the ingest write path + * ({@code /write/...}) normally. This lets a single server, addressed by a + * single cluster config, accept ingest senders but reject query clients -- + * e.g. to exercise build()'s unwind of an already-built sender pool when the + * query pool fails. + */ + public void setRejectReadUpgrade(boolean rejectReadUpgrade) { + this.rejectReadUpgrade = rejectReadUpgrade; + } + /** * Configure the server to reject the next handshake with an arbitrary * HTTP status code (e.g. 401, 403, 404, 426, 503). Pass {@code 0} to @@ -221,9 +239,12 @@ public void setRejectWithStatus(int statusCode, String reasonPhrase) { /** * When enabled, the server sends a {@code SERVER_INFO} frame immediately - * after a successful 101 upgrade, the way a real egress endpoint does. The - * advertised role follows {@link #setAdvertisedRole}, defaulting to - * {@code STANDALONE}. Leave disabled for ingress (Sender) tests. + * after a successful 101 upgrade on the egress read path ({@code /read/...}), + * the way a real egress endpoint does. Ingest write-path ({@code /write/...}) + * connections never receive it -- their ACK-only response stream would choke + * on an unexpected frame -- so one server can serve both an ingest and a + * query pool from a single cluster config. The advertised role follows + * {@link #setAdvertisedRole}, defaulting to {@code STANDALONE}. */ public void setSendServerInfo(boolean sendServerInfo) { this.sendServerInfo = sendServerInfo; @@ -251,6 +272,10 @@ private static byte[] buildServerInfoFrame(byte role) { return bb.array(); } + private static boolean isReadPath(String path) { + return path != null && path.startsWith("/read"); + } + private static byte roleByte(String role) { if (role == null) { return 0; // ROLE_STANDALONE @@ -313,6 +338,10 @@ public class ClientHandler implements Closeable { private boolean isClosed; private OutputStream out; private Thread readThread; + // Request path from the WebSocket upgrade GET line (e.g. /write/v4, + // /read/v1). Captured during the handshake so the post-upgrade logic can + // distinguish ingest from egress connections. + private String requestPath = ""; ClientHandler(Socket socket) { this.socket = socket; @@ -459,7 +488,15 @@ private boolean performHandshake() throws IOException { } String key = null; - for (String line : request.toString().split("\r\n")) { + String[] lines = request.toString().split("\r\n"); + if (lines.length > 0) { + // GET HTTP/1.1 + String[] parts = lines[0].split(" "); + if (parts.length >= 2) { + requestPath = parts[1]; + } + } + for (String line : lines) { if (line.toLowerCase().startsWith("sec-websocket-key:")) { key = line.substring(18).trim(); break; @@ -470,6 +507,13 @@ private boolean performHandshake() throws IOException { return false; } + // Read-path reject: drop the egress upgrade before the 101 so the + // query pool's connect fails fast, while ingest write-path upgrades + // still complete on this same server. + if (rejectReadUpgrade && isReadPath(requestPath)) { + return false; + } + // Arbitrary-status reject path: tests use setRejectWithStatus // to drive the failover loop's terminal-vs-transient // classification (failover.md §6). @@ -566,7 +610,11 @@ void start() { liveConnections.incrementAndGet(); try { - if (sendServerInfo) { + // SERVER_INFO is an egress-only frame: send it only on a + // read-path (query) connection. An ingest write-path + // connection parses every inbound frame as an ACK and + // would fail on it. + if (sendServerInfo && isReadPath(requestPath)) { sendBinary(buildServerInfoFrame(roleByte(advertisedRole))); } diff --git a/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java b/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java index bd3e944a..1aa681f4 100644 --- a/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java +++ b/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java @@ -44,11 +44,11 @@ public class QuestDBExamples { public static void main(String[] args) throws Exception { - // 1. Connect with a single configuration string. Both sides run over - // QWP/WebSocket, so one ws:: string configures ingest and egress. - try (QuestDB db = QuestDB.connect("ws::addr=localhost:9000;")) { + // 1. Connect with a single configuration string for the whole cluster. + // Both sides run over QWP/WebSocket, so one ws:: string configures + // ingest and egress; list every node in one addr server list. + try (QuestDB db = QuestDB.connect("ws::addr=node1:9000,node2:9000,node3:9000;")) { ingestWithBorrowedSender(db); - ingestWithThreadAffineSender(db); queryOneShot(db); queryWithBinds(db); cancelExample(db); @@ -59,21 +59,24 @@ public static void main(String[] args) throws Exception { try (QuestDB db = QuestDB.connect( "wss::addr=db.questdb.cloud:9000;token=YOUR_TOKEN_HERE;")) { // ... use db ... - db.executeSql("SELECT 1", new PrintingHandler()).await(); + try (Query q = db.borrowQuery()) { + q.sql("SELECT 1").handler(new PrintingHandler()).submit().await(); + } } - // 3. Custom pool sizing and timeouts via the builder. Use this when - // ingest and egress use separate address lists, or when you need to - // override defaults. + // 3. Custom pool sizing and timeouts via the builder. One cluster config + // (a single addr server list) drives both pools; use the builder to + // override pool/timeout defaults. try (QuestDB db = QuestDB.builder() - .ingestConfig("ws::addr=ingest.cluster:9000;") - .queryConfig("ws::addr=read-replica.cluster:9000;") + .fromConfig("ws::addr=node1.cluster:9000,node2.cluster:9000;") .senderPoolSize(8) .queryPoolSize(4) .acquireTimeoutMillis(10_000) .build()) { // ... use db ... - db.executeSql("SELECT 1", new PrintingHandler()).await(); + try (Query q = db.borrowQuery()) { + q.sql("SELECT 1").handler(new PrintingHandler()).submit().await(); + } } } @@ -84,15 +87,17 @@ public static void main(String[] args) throws Exception { * returns normally; either way the Completion reaches a terminal state. */ static void cancelExample(QuestDB db) { - Completion c = db.executeSql( - "SELECT * FROM big_table ORDER BY ts", - new PrintingHandler()); - // ... some condition decides to abort ... - c.cancel(); - try { - c.await(); - } catch (Exception cancelled) { - // expected when cancel won the race + try (Query q = db.borrowQuery()) { + Completion c = q.sql("SELECT * FROM big_table ORDER BY ts") + .handler(new PrintingHandler()) + .submit(); + // ... some condition decides to abort ... + c.cancel(); + try { + c.await(); + } catch (Exception cancelled) { + // expected when cancel won the race + } } } @@ -113,62 +118,42 @@ static void ingestWithBorrowedSender(QuestDB db) { } /** - * Thread-affine Sender: the first call on a thread leases one and pins it; - * subsequent calls on the same thread return the same instance with zero - * borrow overhead. Best for long-lived dedicated producer threads. - *

- * Call {@link QuestDB#releaseSender()} on threads borrowed from pools you - * don't own (Netty event loops, etc.) before they're recycled. - */ - static void ingestWithThreadAffineSender(QuestDB db) { - Sender s = db.sender(); - for (int i = 0; i < 1_000; i++) { - s.table("trades") - .symbol("symbol", "BTC-USD") - .doubleColumn("price", 42_500.50 + i) - .longColumn("size", 100) - .atNow(); - } - s.flush(); - // Not strictly required: db.close() reaps pinned Senders. Call it - // only when handing this thread back to a foreign pool. - // db.releaseSender(); - } - - /** - * One-shot query, no bind parameters. {@link QuestDB#executeSql} returns - * a {@link Completion} that you can {@code await()} synchronously, time + * One-shot query, no bind parameters. Borrow a {@link Query} handle, + * submit, await, and close it (try-with-resources). {@code submit()} + * returns a {@link Completion} you can {@code await()} synchronously, time * out on, or cancel. */ static void queryOneShot(QuestDB db) throws InterruptedException { - Completion c = db.executeSql( - "SELECT price FROM trades WHERE symbol = 'BTC-USD' LIMIT 10", - new PrintingHandler()); - c.await(); + try (Query q = db.borrowQuery()) { + q.sql("SELECT price FROM trades WHERE symbol = 'BTC-USD' LIMIT 10") + .handler(new PrintingHandler()) + .submit() + .await(); + } } /** - * Query with bind parameters. Use {@link QuestDB#query()} to get the - * per-thread Query builder, then set SQL, binds (via QwpBindSetter), and - * handler. + * Query with bind parameters. Borrow a {@link Query} handle, then set SQL, + * binds (via QwpBindSetter), and handler. *

* The same SQL text reuses the server's compiled-factory cache -- bind * values supply the per-call inputs. Interpolating values into the SQL * string defeats that cache. */ static void queryWithBinds(QuestDB db) throws InterruptedException { - Query q = db.query() - .sql("SELECT price FROM trades WHERE symbol = $1 LIMIT $2") - .binds(binds -> { - binds.setVarchar(0, "BTC-USD"); - binds.setLong(1, 10L); - }) - .handler(new PrintingHandler()); - Completion c = q.submit(); - // Optional timeout: returns false if the query is still in flight. - if (!c.await(5, TimeUnit.SECONDS)) { - c.cancel(); - c.await(); + try (Query q = db.borrowQuery()) { + q.sql("SELECT price FROM trades WHERE symbol = $1 LIMIT $2") + .binds(binds -> { + binds.setVarchar(0, "BTC-USD"); + binds.setLong(1, 10L); + }) + .handler(new PrintingHandler()); + Completion c = q.submit(); + // Optional timeout: returns false if the query is still in flight. + if (!c.await(5, TimeUnit.SECONDS)) { + c.cancel(); + c.await(); + } } } diff --git a/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java b/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java index 34ba4d1a..82b997c5 100644 --- a/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java @@ -54,6 +54,7 @@ public void testEveryPoolKeyIsHonored() { expected.put("query_pool_min", 0); expected.put("query_pool_max", 5); expected.put("acquire_timeout_ms", 1234L); + expected.put("query_close_timeout_ms", 2468L); expected.put("idle_timeout_ms", 4321L); expected.put("max_lifetime_ms", 98765L); expected.put("housekeeper_interval_ms", 222L); @@ -74,6 +75,12 @@ public void testEveryPoolKeyIsHonored() { // the assertions above, so a new pool key with no assertion trips this. for (ConfigSchema.KeySpec spec : ConfigSchema.all()) { if (spec.side() == Side.POOL) { + // lazy_connect is a facade flag (build()'s tolerant-startup + // branch, covered by QuestDBLazyConnectTest), not a numeric + // pool-sizing knob resolved into the snapshot. + if ("lazy_connect".equals(spec.name())) { + continue; + } Assert.assertTrue("registry pool key '" + spec.name() + "' has no honored assertion", expected.containsKey(spec.name())); } diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java index 3994a1d2..fae24fb2 100644 --- a/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java @@ -33,8 +33,6 @@ import org.junit.Assert; import org.junit.Test; -import java.lang.reflect.Constructor; -import java.lang.reflect.Method; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; @@ -44,8 +42,8 @@ // OutOfMemoryError); the old catches let that Error skip cleanup. // // QwpQueryClient is a concrete class with no fake seam, so these tests inject an -// Error at the real connect step via the package-private connectHook constructor -// (reached by reflection -- the main module is declared `open`). fromConfig() +// Error at the real connect step via the public connectHook constructor. +// fromConfig() // still runs for real, committing the NATIVE_DEFAULT scratch the cleanup must // reclaim, so the memory assertions are meaningful. public class QueryClientPoolErrorSafetyTest { @@ -232,30 +230,21 @@ private static Consumer alwaysThrowStart() { }; } - private static int inFlightCreations(QueryClientPool pool) throws Exception { - Method m = QueryClientPool.class.getDeclaredMethod("inFlightCreations"); - m.setAccessible(true); - return (int) m.invoke(pool); + private static int inFlightCreations(QueryClientPool pool) { + return pool.inFlightCreations(); } private static QueryClientPool newPool( String cfg, int min, int max, long acquireMs, Consumer connectHook - ) throws Exception { - Constructor c = QueryClientPool.class.getDeclaredConstructor( - String.class, int.class, int.class, long.class, long.class, long.class, Consumer.class); - c.setAccessible(true); - return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, connectHook); + ) { + return new QueryClientPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, connectHook); } private static QueryClientPool newPool( String cfg, int min, int max, long acquireMs, Consumer connectHook, Consumer startHook - ) throws Exception { - Constructor c = QueryClientPool.class.getDeclaredConstructor( - String.class, int.class, int.class, long.class, long.class, long.class, - Consumer.class, Consumer.class); - c.setAccessible(true); - return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, + ) { + return new QueryClientPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, connectHook, startHook); } } diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryCloseDrainTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryCloseDrainTest.java new file mode 100644 index 00000000..7831c02c --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/impl/QueryCloseDrainTest.java @@ -0,0 +1,167 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test.impl; + +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.impl.QueryClientPool; +import io.questdb.client.impl.QueryWorker; +import org.junit.Assert; +import org.junit.Test; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.function.Consumer; + +/** + * Regression tests for the bounded, interruptible {@code Query.close()} drain. + * When a submit is still in flight at close() time, the old drain blocked the + * caller unbounded and uninterruptibly on the terminal event (and could hang + * forever if a racing {@code QuestDB.close()} stranded it). The drain now waits + * at most {@code closeQueryTimeoutMillis}, an interrupt aborts it, and a worker + * that fails to drain in time is discarded -- its connection may still carry + * late frames for the abandoned query -- rather than returned to the pool. + *

+ * White-box style: a no-op connect hook builds workers without a network, and + * the in-flight state is simulated by setting {@code QueryImpl.done=false} + * reflectively, so no server or real {@code execute()} is needed to exercise + * the close() drain logic. + */ +public class QueryCloseDrainTest { + + private static final String CFG = "ws::addr=127.0.0.1:1;"; + private static final Consumer NO_CONNECT = c -> { + }; + + @Test(timeout = 30_000) + public void testCloseDiscardsWorkerWhenDrainTimesOut() throws Exception { + try (QueryClientPool pool = new QueryClientPool( + CFG, 0, 2, 1_000L, Long.MAX_VALUE, Long.MAX_VALUE, NO_CONNECT)) { + setCloseQueryTimeout(pool, 150L); + QueryWorker w = pool.acquire(); + long gen = generation(w); + setDone(w, false); // pretend a submit is in flight; nothing will ever signal done + + long startNanos = System.nanoTime(); + closeQuery(w, gen); + long elapsedMs = (System.nanoTime() - startNanos) / 1_000_000; + + Assert.assertTrue("close() must wait about the close budget, elapsed=" + elapsedMs, + elapsedMs >= 120); + Assert.assertTrue("close() must be bounded, not block unbounded, elapsed=" + elapsedMs, + elapsedMs < 5_000); + Assert.assertFalse("a worker that did not drain must be discarded, not returned to the pool", + allWorkers(pool).contains(w)); + Assert.assertEquals("the discarded worker must leave the pool so it can grow a fresh one", + 0, allWorkers(pool).size()); + Assert.assertFalse("the discarded worker's dispatch thread must have exited", + dispatchThread(w).isAlive()); + } + } + + @Test(timeout = 30_000) + public void testCloseIsInterruptible() throws Exception { + try (QueryClientPool pool = new QueryClientPool( + CFG, 0, 2, 1_000L, Long.MAX_VALUE, Long.MAX_VALUE, NO_CONNECT)) { + // A long budget: the only way close() can return promptly is by + // honoring the caller's interrupt. + setCloseQueryTimeout(pool, 60_000L); + QueryWorker w = pool.acquire(); + long gen = generation(w); + setDone(w, false); + + Thread.currentThread().interrupt(); + long startNanos = System.nanoTime(); + closeQuery(w, gen); + long elapsedMs = (System.nanoTime() - startNanos) / 1_000_000; + + Assert.assertTrue("close() must preserve the caller's interrupt flag", Thread.interrupted()); + Assert.assertTrue("interrupt must abort the drain promptly, elapsed=" + elapsedMs, + elapsedMs < 5_000); + Assert.assertFalse("an interrupted close() must discard the worker", + allWorkers(pool).contains(w)); + } + } + + @Test(timeout = 30_000) + public void testCloseReturnsWorkerWhenAlreadyDrained() throws Exception { + try (QueryClientPool pool = new QueryClientPool( + CFG, 0, 2, 1_000L, Long.MAX_VALUE, Long.MAX_VALUE, NO_CONNECT)) { + setCloseQueryTimeout(pool, 150L); + QueryWorker w = pool.acquire(); + long gen = generation(w); + // done stays true (no in-flight submit): close() must take the fast + // path and return the worker to the pool for reuse, not discard it. + closeQuery(w, gen); + Assert.assertTrue("an already-drained worker must be returned to the pool, not discarded", + allWorkers(pool).contains(w)); + } + } + + @SuppressWarnings("unchecked") + private static ArrayList allWorkers(QueryClientPool pool) throws Exception { + Field f = QueryClientPool.class.getDeclaredField("all"); + f.setAccessible(true); + return (ArrayList) f.get(pool); + } + + private static void closeQuery(QueryWorker w, long gen) throws Exception { + Object impl = queryImpl(w); + Method close = impl.getClass().getDeclaredMethod("close", long.class); + close.setAccessible(true); + close.invoke(impl, gen); + } + + private static Thread dispatchThread(QueryWorker w) throws Exception { + Field f = QueryWorker.class.getDeclaredField("thread"); + f.setAccessible(true); + return (Thread) f.get(w); + } + + private static long generation(QueryWorker w) throws Exception { + Method m = QueryWorker.class.getDeclaredMethod("generation"); + m.setAccessible(true); + return (long) m.invoke(w); + } + + private static Object queryImpl(QueryWorker w) throws Exception { + Field queryF = QueryWorker.class.getDeclaredField("query"); + queryF.setAccessible(true); + return queryF.get(w); + } + + private static void setCloseQueryTimeout(QueryClientPool pool, long millis) throws Exception { + Field f = QueryClientPool.class.getDeclaredField("closeQueryTimeoutMillis"); + f.setAccessible(true); + f.setLong(pool, millis); + } + + private static void setDone(QueryWorker w, boolean done) throws Exception { + Object impl = queryImpl(w); + Field doneF = impl.getClass().getDeclaredField("done"); + doneF.setAccessible(true); + doneF.setBoolean(impl, done); + } +} diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java index 1ff33b76..f9cd8bc0 100644 --- a/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java @@ -24,11 +24,11 @@ package io.questdb.client.test.impl; -import io.questdb.client.Query; import io.questdb.client.cutlass.qwp.client.QwpBindSetter; import io.questdb.client.cutlass.qwp.client.QwpColumnBatch; import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; import io.questdb.client.cutlass.qwp.client.QwpServerInfo; +import io.questdb.client.std.str.StringSink; import org.junit.Assert; import org.junit.Test; @@ -39,58 +39,52 @@ public class QueryImplResetTest { /** - * Regression test for the state-carryover bug between consecutive - * submits on the per-thread {@code QuestDB#query()} handle. + * The Javadoc on both {@code Query} and {@code QuestDB#borrowQuery()} + * promises the leased handle is handed out "reset to empty". The reset is + * {@code QueryImpl.resetForBorrow()}, invoked from {@code QueryWorker.lease()} + * when {@code borrowQuery()} hands the pre-allocated handle out. It must + * clear the builder state (SQL, binds, handler) so a follow-up + * {@code submit()} cannot silently reuse a prior borrow's handler/binds, + * and it must leave the handle idle (done). *

- * The Javadoc on both {@code Query} and {@code QuestDB#query()} promises - * that the returned instance is "reset to empty" / "in a reset state". - * Before the fix, {@code QuestDBImpl.query()} returned the bare - * thread-local without nulling {@code userHandler} / {@code userBinds}, - * so the second call below would silently reuse {@code h1}: - *

-     *   db.query().sql("SELECT 1").handler(h1).submit().await();
-     *   db.query().sql("SELECT 2").submit();    // no .handler() -- reuses h1
-     * 
- * The {@code if (userHandler == null)} check in {@code submit()} could - * not catch the misuse because the field was still set from the prior - * submit. - *

- * The fix is {@code QueryImpl.resetIfDone()}, invoked from - * {@code QuestDBImpl.query()} before the per-thread handle is returned. - * This test reaches into {@code QueryImpl} via reflection (the class is - * package-private and lives in a different package from this test) and - * asserts the reset clears all three configured fields when the prior - * run is in a terminal state. + * The reset is unconditional: the leased worker was just acquired from the + * pool, so it is always idle (done) at borrow time. This test reaches into + * {@code QueryImpl} by reflection (the class is package-private and lives + * in a different package from this test). Builder state is seeded directly + * via reflection rather than through the {@code Query} API because the + * lease-generation guard on the setters would dereference the (null) worker. */ @Test - public void testResetIfDoneClearsBuilderStateInTerminalState() throws Exception { + public void testResetForBorrowClearsBuilderState() throws Exception { Class queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl"); - Class poolClass = Class.forName("io.questdb.client.impl.QueryClientPool"); + Class workerClass = Class.forName("io.questdb.client.impl.QueryWorker"); - Constructor ctor = queryImplClass.getDeclaredConstructor(poolClass); + Constructor ctor = queryImplClass.getDeclaredConstructor(workerClass); ctor.setAccessible(true); - // QueryImpl never dereferences the pool outside of submit(); a null - // pool is fine for this state-only test. - Query q = (Query) ctor.newInstance(new Object[]{null}); - - // Mirror the post-submit().await() state: builder fields set, - // done flag true (the constructor default). - QwpColumnBatchHandler h = new NoopHandler(); - QwpBindSetter b = values -> { - // no-op - }; - q.sql("SELECT 1").binds(b).handler(h); - - Method reset = queryImplClass.getDeclaredMethod("resetIfDone"); - reset.setAccessible(true); - reset.invoke(q); + // resetForBorrow() never dereferences the worker; a null worker is fine + // for this state-only test. + Object q = ctor.newInstance(new Object[]{null}); Field handlerF = queryImplClass.getDeclaredField("userHandler"); Field bindsF = queryImplClass.getDeclaredField("userBinds"); Field sqlBufF = queryImplClass.getDeclaredField("sqlBuffer"); + Field doneF = queryImplClass.getDeclaredField("done"); handlerF.setAccessible(true); bindsF.setAccessible(true); sqlBufF.setAccessible(true); + doneF.setAccessible(true); + + // Seed builder state as a prior borrow would have left it. + handlerF.set(q, new NoopHandler()); + bindsF.set(q, (QwpBindSetter) values -> { + // no-op + }); + ((StringSink) sqlBufF.get(q)).put("SELECT 1"); + doneF.setBoolean(q, false); + + Method reset = queryImplClass.getDeclaredMethod("resetForBorrow"); + reset.setAccessible(true); + reset.invoke(q); Assert.assertNull("userHandler must be cleared so a follow-up submit() without .handler() fails fast", handlerF.get(q)); @@ -99,53 +93,43 @@ public void testResetIfDoneClearsBuilderStateInTerminalState() throws Exception CharSequence sqlBuffer = (CharSequence) sqlBufF.get(q); Assert.assertEquals("sqlBuffer must be empty so a follow-up submit() without .sql() throws 'sql is required'", 0, sqlBuffer.length()); + Assert.assertTrue("done must be true so the handle starts idle, not in flight", + doneF.getBoolean(q)); } /** - * Symmetric guard: when a submit is in flight ({@code done == false}), - * {@code resetIfDone()} must NOT touch the configured fields. The - * dispatched worker thread is reading {@code sqlBuffer} in - * {@code runOn()} and {@code userHandler} via the wrapping handler; - * clearing them mid-flight would race. + * {@code QuestDB#borrowQuery()} returns a thin lease that is freshly + * allocated per borrow, but the heavy state it wraps -- the per-worker + * {@code QueryImpl} -- is pre-allocated once and reused across borrows. This + * pins that contract: two {@code lease()} calls on the same worker return + * distinct lease wrappers that delegate to the same pooled {@code QueryImpl}. + * Reaches both package-private classes by reflection. */ @Test - public void testResetIfDoneIsNoOpWhileSubmitInFlight() throws Exception { - Class queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl"); + public void testLeaseWrapsSamePooledQueryImpl() throws Exception { + Class workerClass = Class.forName("io.questdb.client.impl.QueryWorker"); Class poolClass = Class.forName("io.questdb.client.impl.QueryClientPool"); + Class clientClass = Class.forName("io.questdb.client.cutlass.qwp.client.QwpQueryClient"); + Class leaseClass = Class.forName("io.questdb.client.impl.QueryLease"); - Constructor ctor = queryImplClass.getDeclaredConstructor(poolClass); + // lease() never dereferences the client or pool (it only resets the + // reused QueryImpl and stamps the current generation), so nulls are fine + // for this structure-only test -- mirrors the null-worker shortcut above. + Constructor ctor = workerClass.getDeclaredConstructor(clientClass, poolClass, int.class); ctor.setAccessible(true); - Query q = (Query) ctor.newInstance(new Object[]{null}); + Object worker = ctor.newInstance(null, null, 0); - QwpColumnBatchHandler h = new NoopHandler(); - QwpBindSetter b = values -> { - // no-op - }; - q.sql("SELECT 1").binds(b).handler(h); - - // Flip the in-flight flag by setting done=false directly. - Field doneF = queryImplClass.getDeclaredField("done"); - doneF.setAccessible(true); - doneF.setBoolean(q, false); + Method leaseM = workerClass.getDeclaredMethod("lease"); + leaseM.setAccessible(true); + Object leaseA = leaseM.invoke(worker); + Object leaseB = leaseM.invoke(worker); - Method reset = queryImplClass.getDeclaredMethod("resetIfDone"); - reset.setAccessible(true); - reset.invoke(q); + Assert.assertNotSame("each borrow must hand back a fresh lease wrapper", leaseA, leaseB); - Field handlerF = queryImplClass.getDeclaredField("userHandler"); - Field bindsF = queryImplClass.getDeclaredField("userBinds"); - Field sqlBufF = queryImplClass.getDeclaredField("sqlBuffer"); - handlerF.setAccessible(true); - bindsF.setAccessible(true); - sqlBufF.setAccessible(true); - - Assert.assertSame("userHandler must survive resetIfDone() while a submit is in flight", - h, handlerF.get(q)); - Assert.assertSame("userBinds must survive resetIfDone() while a submit is in flight", - b, bindsF.get(q)); - CharSequence sqlBuffer = (CharSequence) sqlBufF.get(q); - Assert.assertEquals("sqlBuffer must survive resetIfDone() while a submit is in flight", - "SELECT 1", sqlBuffer.toString()); + Field implF = leaseClass.getDeclaredField("impl"); + implF.setAccessible(true); + Assert.assertSame("both leases must wrap the same pooled QueryImpl (zero-allocation reuse of the heavy state)", + implF.get(leaseA), implF.get(leaseB)); } private static final class NoopHandler implements QwpColumnBatchHandler { diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryLeaseGenerationTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryLeaseGenerationTest.java new file mode 100644 index 00000000..f9e83fb7 --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/impl/QueryLeaseGenerationTest.java @@ -0,0 +1,273 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test.impl; + +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.impl.QueryClientPool; +import io.questdb.client.impl.QueryWorker; +import org.junit.Assert; +import org.junit.Test; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.ArrayDeque; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Regression tests for M1: a stale {@code Query} lease (held after close, or a + * cached {@code Completion}) must not disturb a later borrow of the same + * worker. The reused per-worker {@code QueryImpl} alone cannot distinguish a + * stale handle from a live one -- the fix stamps each borrow with a monotonic + * generation under the pool lock and validates it on close/cancel/release. + *

+ * These exercise the package-private internals by reflection (the same + * white-box style as the other tests in this package). They construct workers + * with a non-connected {@code newPlainText} client and never start the worker + * thread, so no network or I/O thread is involved. + */ +public class QueryLeaseGenerationTest { + + /** + * A stale {@code Completion.cancel()} (its lease long since released and the + * worker re-borrowed) must NOT reach the worker's client -- otherwise it + * would cancel whatever query the current borrower is running. We observe + * "reached the client" via the client's pending-cancel latch, which + * {@code QwpQueryClient.cancel()} sets first thing. + */ + @Test + public void testStaleCancelDoesNotReachClient() throws Exception { + Class workerClass = Class.forName("io.questdb.client.impl.QueryWorker"); + Class queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl"); + Method bump = workerClass.getDeclaredMethod("bumpGeneration"); + bump.setAccessible(true); + Field queryF = workerClass.getDeclaredField("query"); + queryF.setAccessible(true); + Field doneF = queryImplClass.getDeclaredField("done"); + doneF.setAccessible(true); + Method cancel = queryImplClass.getDeclaredMethod("cancel", long.class); + cancel.setAccessible(true); + + // cancel(gen) validates the generation under the pool lock, so the + // worker needs a real pool to lock on (the worker thread is never + // started, so no network or I/O thread is involved). + QueryClientPool pool = new QueryClientPool( + "ws::addr=localhost:9000;", + /*minSize*/ 0, /*maxSize*/ 2, + /*acquireTimeoutMillis*/ 1_000L, + /*idleTimeoutMillis*/ Long.MAX_VALUE, + /*maxLifetimeMillis*/ Long.MAX_VALUE); + try { + // Live lease: generation 1 (one acquire), query in flight -> cancel(1) + // must reach the client. + try (QwpQueryClient live = QwpQueryClient.newPlainText("localhost", 9000)) { + QueryWorker w = new QueryWorker(live, pool, 0); + bump.invoke(w); // generation -> 1 (acquire stamp) + Object impl = queryF.get(w); + doneF.setBoolean(impl, false); // pretend a submit is in flight + cancel.invoke(impl, 1L); + Assert.assertTrue("cancel() on the live lease must reach the client", + live.isPendingCancelForTest()); + } + + // Stale lease: the worker was borrowed (gen 1), released and re-borrowed + // (gen now 3). A cancel from the old lease (gen 1) must be dropped, even + // though the current query is in flight. + try (QwpQueryClient reused = QwpQueryClient.newPlainText("localhost", 9000)) { + QueryWorker w = new QueryWorker(reused, pool, 0); + bump.invoke(w); // -> 1 (first acquire) + bump.invoke(w); // -> 2 (release) + bump.invoke(w); // -> 3 (second acquire by a new borrower) + Object impl = queryF.get(w); + doneF.setBoolean(impl, false); // the new borrower's query is in flight + cancel.invoke(impl, 1L); // stale lease cancels + Assert.assertFalse("a stale lease's cancel() must NOT reach the client and " + + "cancel a different borrower's in-flight query", + reused.isPendingCancelForTest()); + } + } finally { + pool.close(); + } + } + + /** + * The TOCTOU the locked cancel closes: a cross-thread watchdog calls + * {@code cancel(gen)} while its lease is live, but the lease goes stale (the + * worker is released and re-borrowed) before the wire cancel fires. The + * cancel must re-validate the generation atomically with the cancel, under + * the pool lock, or it would abort the new borrower's query. + *

+ * Driven deterministically: the test thread holds the pool lock, so the + * watchdog's cancel parks inside the pool's generation re-check. We then + * advance the generation (release + re-borrow) under the lock and release + * it. The parked cancel must observe the new generation and drop. An + * unlocked check-then-cancel would not park, would pass its check at the + * still-live generation, and would fire the wire cancel. + */ + @Test + public void testConcurrentCancelDoesNotReachClientAfterReborrow() throws Exception { + Method bump = QueryWorker.class.getDeclaredMethod("bumpGeneration"); + bump.setAccessible(true); + Field queryF = QueryWorker.class.getDeclaredField("query"); + queryF.setAccessible(true); + Class queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl"); + Field doneF = queryImplClass.getDeclaredField("done"); + doneF.setAccessible(true); + Method cancel = queryImplClass.getDeclaredMethod("cancel", long.class); + cancel.setAccessible(true); + Field poolLockF = QueryClientPool.class.getDeclaredField("lock"); + poolLockF.setAccessible(true); + + QueryClientPool pool = new QueryClientPool( + "ws::addr=localhost:9000;", + /*minSize*/ 0, /*maxSize*/ 2, + /*acquireTimeoutMillis*/ 1_000L, + /*idleTimeoutMillis*/ Long.MAX_VALUE, + /*maxLifetimeMillis*/ Long.MAX_VALUE); + QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000); + try { + final QueryWorker w = new QueryWorker(client, pool, 0); + bump.invoke(w); // generation -> 1; the watchdog's lease captured 1 + final Object impl = queryF.get(w); + doneF.setBoolean(impl, false); // a query is in flight + + ReentrantLock poolLock = (ReentrantLock) poolLockF.get(pool); + final CountDownLatch atCancel = new CountDownLatch(1); + final CountDownLatch cancelReturned = new CountDownLatch(1); + final AtomicReference err = new AtomicReference<>(); + + // Hold the pool lock so the watchdog's cancel cannot finish its + // generation re-check + wire cancel until we let go. + poolLock.lock(); + Thread watchdog = new Thread(() -> { + atCancel.countDown(); + try { + cancel.invoke(impl, 1L); // lease generation captured at borrow = 1 + } catch (Throwable t) { + err.set(t); + } finally { + cancelReturned.countDown(); + } + }, "watchdog-cancel"); + watchdog.start(); + Assert.assertTrue("watchdog must start", atCancel.await(5, TimeUnit.SECONDS)); + + // With the locked cancel, cancel() parks on the pool lock and cannot + // return while we hold it. An unlocked check-then-cancel would have + // already fired the wire cancel and returned. + Assert.assertFalse("cancel() must re-check the generation under the pool " + + "lock, so it cannot complete while the lock is held", + cancelReturned.await(200, TimeUnit.MILLISECONDS)); + + // The lease goes stale underneath the parked cancel: released (-> 2) + // and re-borrowed by a new owner (-> 3). + bump.invoke(w); + bump.invoke(w); + poolLock.unlock(); + + Assert.assertTrue("cancel() must return once the pool lock is free", + cancelReturned.await(5, TimeUnit.SECONDS)); + if (err.get() != null) { + throw new AssertionError("cancel() threw", err.get()); + } + Assert.assertFalse("a cancel whose lease went stale while parked on the pool " + + "lock must NOT reach the client and abort the new borrower's query", + client.isPendingCancelForTest()); + } finally { + client.close(); + pool.close(); + } + } + + /** + * The pool-wide blast radius of M1: a stale (duplicate / post-reborrow) + * release must never enqueue a worker that a live borrower owns, otherwise + * the worker sits in {@code available} twice and is handed to two borrowers + * at once. The generation captured at borrow time, re-checked under the pool + * lock, makes this impossible. + */ + @Test + @SuppressWarnings("unchecked") + public void testStaleReleaseDoesNotEnqueueWorkerTwice() throws Exception { + Class poolClass = Class.forName("io.questdb.client.impl.QueryClientPool"); + Method release = poolClass.getDeclaredMethod("release", QueryWorker.class, long.class); + release.setAccessible(true); + Field availableF = poolClass.getDeclaredField("available"); + availableF.setAccessible(true); + Method bump = QueryWorker.class.getDeclaredMethod("bumpGeneration"); + bump.setAccessible(true); + Method generation = QueryWorker.class.getDeclaredMethod("generation"); + generation.setAccessible(true); + + QueryClientPool pool = new QueryClientPool( + "ws::addr=localhost:9000;", + /*minSize*/ 0, /*maxSize*/ 2, + /*acquireTimeoutMillis*/ 1_000L, + /*idleTimeoutMillis*/ Long.MAX_VALUE, + /*maxLifetimeMillis*/ Long.MAX_VALUE); + QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000); + try { + ArrayDeque available = (ArrayDeque) availableF.get(pool); + QueryWorker w = new QueryWorker(client, pool, 0); + + // acquire #1 stamps generation 1; the lease (A) captures 1. + bump.invoke(w); + Assert.assertEquals(1L, generation.invoke(w)); + + // close A -> release(w, 1): matches, enqueues once. + release.invoke(pool, w, 1L); + Assert.assertEquals("valid release must enqueue the worker once", 1, available.size()); + + // close A again (duplicate, e.g. explicit close + try-with-resources) + // -> release(w, 1): generation already bumped to 2, so it is dropped. + release.invoke(pool, w, 1L); + Assert.assertEquals("duplicate release of the same lease must be dropped", + 1, available.size()); + + // acquire #2 hands the worker to a new borrower (B): pull it out and + // stamp generation 3. + available.pollFirst(); + bump.invoke(w); + Assert.assertEquals(3L, generation.invoke(w)); + + // A stray close from the long-dead lease A -> release(w, 1): dropped, + // so B's worker is NOT re-enqueued while B still owns it. + release.invoke(pool, w, 1L); + Assert.assertEquals("a post-reborrow stale release must NOT enqueue the " + + "worker while another borrower owns it", + 0, available.size()); + + // B's own close -> release(w, 3): matches, enqueues legitimately. + release.invoke(pool, w, 3L); + Assert.assertEquals("the current borrower's release must still work", + 1, available.size()); + } finally { + client.close(); + pool.close(); + } + } +} diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java index e9041448..5a3093a0 100644 --- a/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java @@ -26,16 +26,35 @@ import io.questdb.client.Completion; import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.impl.QueryClientPool; import io.questdb.client.impl.QueryWorker; import org.junit.Assert; import org.junit.Test; import java.lang.reflect.Constructor; import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; +/** + * Unit tests for {@link QueryWorker}. + *

+ * Coverage boundary: the lost-dispatch fix for the single-flight-reuse race + * (clearing {@code current} under {@code signalLock} at the moment of + * consumption rather than in a post-{@code runOn()} finally) has no + * deterministic unit reproduction here. Reproducing the clobber needs the + * worker to be mid-{@code runOn(client)} when the user thread re-dispatches on + * the same lease, which requires a live query client to drive + * {@code client.execute(...)} to its terminal callback. That regression is + * guarded end-to-end by {@code QuestDBFacadeE2ETest.testSustainedMixedConcurrency} + * in the parent questdb repo (more threads than pool slots, repeated + * submit/await per lease). {@link #testShutdownRacingDispatchMustNotStrandCaller()} + * below covers the adjacent but distinct shutdown-vs-dispatch branch only -- + * reverting the lost-dispatch hunk would not fail it. + */ public class QueryWorkerTest { /** @@ -68,14 +87,14 @@ public void testClientGetterReturnsConstructorInstance() { * state directly: it parks the worker on its condition, then takes the * worker's own {@code signalLock} and atomically sets both * {@code current} and {@code shuttingDown} before signalling. After the - * worker thread exits, the test asserts the {@link Completion} has been - * signalled. Today the assertion fails because the run loop's early - * return strands the {@code QueryImpl}. + * worker thread exits, the test asserts the {@code QueryImpl} was signalled + * to done. Without the fix the assertion fails because the run loop's early + * return strands the {@code QueryImpl} with {@code done==false}, so any + * caller blocked in {@code Completion.await()} would hang forever. */ @Test(timeout = 30_000) public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception { Class queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl"); - Class poolClass = Class.forName("io.questdb.client.impl.QueryClientPool"); Field lockF = QueryWorker.class.getDeclaredField("signalLock"); Field condF = QueryWorker.class.getDeclaredField("signalCondition"); @@ -87,9 +106,9 @@ public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception { } Field doneF = queryImplClass.getDeclaredField("done"); - Field completionF = queryImplClass.getDeclaredField("completion"); + Field unexpectedF = queryImplClass.getDeclaredField("unexpectedError"); doneF.setAccessible(true); - completionF.setAccessible(true); + unexpectedF.setAccessible(true); // No QwpQueryClient is constructed here: runLoop exits at the // shuttingDown check before reaching the first reference to @@ -123,11 +142,10 @@ public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception { // Construct a QueryImpl with done=false, mimicking the state set up // by QueryImpl.submit() just before it calls worker.dispatch(). - Constructor ctor = queryImplClass.getDeclaredConstructor(poolClass); + Constructor ctor = queryImplClass.getDeclaredConstructor(QueryWorker.class); ctor.setAccessible(true); Object queryImpl = ctor.newInstance(new Object[]{null}); doneF.setBoolean(queryImpl, false); - Completion completion = (Completion) completionF.get(queryImpl); // Atomically force the racy state under the worker's own lock: // current set AND shuttingDown set before the worker wakes. @@ -145,20 +163,99 @@ public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception { Assert.assertFalse("worker thread did not exit after shuttingDown=true", t.isAlive()); - // The Completion must have been signalled. Without the fix, await(2s) - // returns false because signalDone is never called. - boolean completed; - try { - completed = completion.await(2, TimeUnit.SECONDS); - } catch (RuntimeException expectedAfterFix) { - // Once fixed, the worker is expected to call signalUnexpected - // with a QueryException("QuestDB handle is closed") which - // await() rethrows. Either form of "completed" is acceptable; - // the bug is the silent hang. - completed = true; - } + // The QueryImpl must have been signalled to done. Without the fix, + // done stays false because signalDone is never called, so a caller in + // Completion.await() would hang forever. The worker reaches the + // shutdown-race branch and calls signalUnexpected("QuestDB handle is + // closed"), which sets done=true and records the unexpected error. Assert.assertTrue("BUG: QueryWorker.runLoop returned with shuttingDown=true " + "while current!=null, never invoking runOn or signalUnexpected. " - + "The caller's Completion.await() hangs forever.", completed); + + "The caller's Completion.await() hangs forever.", doneF.getBoolean(queryImpl)); + Assert.assertNotNull("signalUnexpected must record the closed-handle error", + unexpectedF.get(queryImpl)); + } + + /** + * Result handlers (onBatch/onEnd/onError) run inline on the worker's + * dispatch thread. The blocking lease ops -- {@code close()} and the two + * {@code await()} variants -- would there wait on a terminal event that + * only this same thread can deliver, a permanent self-deadlock. The + * reentrancy guard must turn that into an immediate IllegalStateException. + *

+ * The guard compares {@code Thread.currentThread()} to the worker's + * dispatch thread, so this test points that field at the test thread (the + * worker is never started) to stand in for a reentrant in-handler call. + * Without the guard, {@code close()}/{@code await()} would park forever and + * the method-level timeout would fail the test. + */ + @Test(timeout = 30_000) + public void testCloseAndAwaitFromWorkerThreadThrowInsteadOfDeadlocking() throws Exception { + Class queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl"); + Field queryF = QueryWorker.class.getDeclaredField("query"); + queryF.setAccessible(true); + Field threadF = QueryWorker.class.getDeclaredField("thread"); + threadF.setAccessible(true); + Field doneF = queryImplClass.getDeclaredField("done"); + doneF.setAccessible(true); + Method bump = QueryWorker.class.getDeclaredMethod("bumpGeneration"); + bump.setAccessible(true); + Method isWorker = QueryWorker.class.getDeclaredMethod("isCurrentThreadWorker"); + isWorker.setAccessible(true); + Method close = queryImplClass.getDeclaredMethod("close", long.class); + close.setAccessible(true); + Method awaitNoTimeout = queryImplClass.getDeclaredMethod("await", long.class); + awaitNoTimeout.setAccessible(true); + Method awaitTimed = queryImplClass.getDeclaredMethod("await", long.class, long.class, TimeUnit.class); + awaitTimed.setAccessible(true); + + QueryClientPool pool = new QueryClientPool( + "ws::addr=localhost:9000;", + /*minSize*/ 0, /*maxSize*/ 2, + /*acquireTimeoutMillis*/ 1_000L, + /*idleTimeoutMillis*/ Long.MAX_VALUE, + /*maxLifetimeMillis*/ Long.MAX_VALUE); + QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000); + try { + QueryWorker w = new QueryWorker(client, pool, 0); + bump.invoke(w); // generation -> 1: a live lease + Object impl = queryF.get(w); + doneF.setBoolean(impl, false); // a submit is in flight, as during a handler + + // Off the worker thread the guard must NOT fire. + Assert.assertFalse("guard must not fire on a normal caller thread", + (Boolean) isWorker.invoke(w)); + + // Stand in for a reentrant call from inside a result handler: the + // guard compares Thread.currentThread() to the worker's dispatch + // thread, so point that field at this thread. + threadF.set(w, Thread.currentThread()); + Assert.assertTrue((Boolean) isWorker.invoke(w)); + + assertThrowsHandlerReentry("close", () -> close.invoke(impl, 1L)); + assertThrowsHandlerReentry("await", () -> awaitNoTimeout.invoke(impl, 1L)); + assertThrowsHandlerReentry("await(timeout)", + () -> awaitTimed.invoke(impl, 1L, 5L, TimeUnit.SECONDS)); + } finally { + client.close(); + pool.close(); + } + } + + private static void assertThrowsHandlerReentry(String op, ReflectiveCall call) throws Exception { + try { + call.run(); + Assert.fail(op + "() from the worker thread must throw, not block/deadlock"); + } catch (InvocationTargetException e) { + Throwable cause = e.getCause(); + Assert.assertTrue(op + "(): expected IllegalStateException, was " + cause, + cause instanceof IllegalStateException); + Assert.assertTrue(op + "(): message must point at cancel(), was: " + cause.getMessage(), + cause.getMessage().contains("cancel()")); + } + } + + @FunctionalInterface + private interface ReflectiveCall { + void run() throws Exception; } } diff --git a/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java b/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java index 93b10301..533360be 100644 --- a/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java @@ -30,8 +30,6 @@ import org.junit.Assert; import org.junit.Test; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Proxy; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; @@ -48,9 +46,9 @@ // // Sender is an interface, faked with a Proxy whose close() flips a flag, injected // via the SenderPool senderFactory seam. The connect Error is injected via the -// QueryClientPool connectHook seam. Both are passed through the package-private -// QuestDBImpl seam constructor (reached by reflection -- the main module is -// declared `open`); production callers pass null for both. +// QueryClientPool connectHook seam. Both are passed through the @TestOnly public +// QuestDBImpl seam constructor; production uses the public overload that passes +// null for both. public class QuestDBImplErrorSafetyTest { // Non-SF http config: the SenderPool factory replaces the build, but the @@ -122,33 +120,15 @@ private static Sender fakeSender(AtomicBoolean closedFlag) { private static QuestDBImpl newQuestDB( IntFunction senderFactory, Consumer connectHook - ) throws Exception { - Constructor c = QuestDBImpl.class.getDeclaredConstructor( - String.class, String.class, int.class, int.class, int.class, int.class, - long.class, long.class, long.class, long.class, - IntFunction.class, Consumer.class); - c.setAccessible(true); - try { - return c.newInstance( - SENDER_CFG, QUERY_CFG, - /*senderMin*/ 1, /*senderMax*/ 1, - /*queryMin*/ 1, /*queryMax*/ 1, - /*acquireTimeoutMillis*/ 250L, - /*idleTimeoutMillis*/ Long.MAX_VALUE, - /*maxLifetimeMillis*/ Long.MAX_VALUE, - /*housekeeperIntervalMillis*/ Long.MAX_VALUE, - senderFactory, connectHook); - } catch (InvocationTargetException e) { - // Unwrap so the caller sees the real construction failure (Error or - // RuntimeException), matching a direct constructor invocation. - Throwable cause = e.getCause(); - if (cause instanceof RuntimeException) { - throw (RuntimeException) cause; - } - if (cause instanceof Error) { - throw (Error) cause; - } - throw e; - } + ) { + return new QuestDBImpl( + SENDER_CFG, QUERY_CFG, + /*senderMin*/ 1, /*senderMax*/ 1, + /*queryMin*/ 1, /*queryMax*/ 1, + /*acquireTimeoutMillis*/ 250L, + /*idleTimeoutMillis*/ Long.MAX_VALUE, + /*maxLifetimeMillis*/ Long.MAX_VALUE, + /*housekeeperIntervalMillis*/ Long.MAX_VALUE, + senderFactory, connectHook); } } diff --git a/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java b/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java index c5c5edb7..e6f5eb69 100644 --- a/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java @@ -67,6 +67,7 @@ public void testEveryEgressKeyIsHonored() { assertHonored("zone=us-east", "zone", "us-east"); // COMMON applied by egress. assertHonored("auth_timeout_ms=7777", "auth_timeout_ms", 7777L); + assertHonored("connect_timeout=6000", "connect_timeout", 6000); // Credentials become the Authorization header, including the user/pass aliases. String basic = "Basic " + Base64.getEncoder() diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderLeaseGenerationTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderLeaseGenerationTest.java new file mode 100644 index 00000000..5deeb5ac --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/impl/SenderLeaseGenerationTest.java @@ -0,0 +1,147 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test.impl; + +import io.questdb.client.Sender; +import io.questdb.client.impl.PooledSender; +import io.questdb.client.impl.SenderPool; +import org.junit.Assert; +import org.junit.Test; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.ArrayDeque; + +/** + * Ingest-side mirror of {@code QueryLeaseGenerationTest}: a stale pooled-Sender + * handle (held after close, with the slot since re-borrowed) must not disturb a + * later borrow of the same slot. {@code PooledSender} is now a fresh per-borrow + * wrapper carrying the lease generation; the reused {@code SenderSlot} validates + * it under the pool lock so a stale close/write is dropped. + *

+ * Reaches package-private internals by reflection (same white-box style as the + * other tests here); {@code SenderSlot} is constructed with a {@code null} + * delegate, which the paths under test never dereference. + */ +public class SenderLeaseGenerationTest { + + private static final String DEAD_HTTP_CONFIG = + "http::addr=127.0.0.1:1;protocol_version=2;auto_flush=off;"; + + /** + * The pool-wide blast radius: a stale (duplicate / post-reborrow) close must + * never enqueue a slot a live borrower owns, or two borrowers would write + * into one delegate's buffer at once. {@code giveBack} validates the lease + * generation under the pool lock, so this is impossible. + */ + @Test + @SuppressWarnings("unchecked") + public void testStaleGiveBackDoesNotEnqueueSlotTwice() throws Exception { + Class slotClass = Class.forName("io.questdb.client.impl.SenderSlot"); + Constructor slotCtor = slotClass.getDeclaredConstructor(Sender.class, SenderPool.class, int.class); + slotCtor.setAccessible(true); + Method bump = slotClass.getDeclaredMethod("bumpGeneration"); + bump.setAccessible(true); + Method generation = slotClass.getDeclaredMethod("generation"); + generation.setAccessible(true); + Constructor leaseCtor = + PooledSender.class.getDeclaredConstructor(slotClass, long.class); + leaseCtor.setAccessible(true); + Field availableF = SenderPool.class.getDeclaredField("available"); + availableF.setAccessible(true); + + try (SenderPool pool = new SenderPool( + DEAD_HTTP_CONFIG, /*minSize*/ 0, /*maxSize*/ 2, + /*acquireTimeoutMillis*/ 1_000L, + /*idleTimeoutMillis*/ Long.MAX_VALUE, + /*maxLifetimeMillis*/ Long.MAX_VALUE)) { + ArrayDeque available = (ArrayDeque) availableF.get(pool); + Object slot = slotCtor.newInstance(null, pool, -1); + + // borrow #1 stamps generation 1; lease A captures 1. + bump.invoke(slot); + Assert.assertEquals(1L, generation.invoke(slot)); + PooledSender leaseA = leaseCtor.newInstance(slot, 1L); + + // close A -> giveBack(A): matches, enqueues once. + pool.giveBack(leaseA); + Assert.assertEquals("valid close must enqueue the slot once", 1, available.size()); + + // duplicate close A (e.g. explicit close + try-with-resources) + // -> giveBack(A): generation already bumped to 2, so it is dropped. + pool.giveBack(leaseA); + Assert.assertEquals("duplicate close of the same lease must be dropped", + 1, available.size()); + + // borrow #2 hands the slot to a new borrower B: pull it out, stamp 3. + available.pollFirst(); + bump.invoke(slot); + Assert.assertEquals(3L, generation.invoke(slot)); + PooledSender leaseB = leaseCtor.newInstance(slot, 3L); + + // A stray close from the long-dead lease A -> dropped, so B's slot is + // NOT re-enqueued while B still owns it. + pool.giveBack(leaseA); + Assert.assertEquals("a post-reborrow stale close must NOT enqueue the slot " + + "while another borrower owns it", 0, available.size()); + + // B's own close -> giveBack(B): matches, enqueues legitimately. + pool.giveBack(leaseB); + Assert.assertEquals("the current borrower's close must still work", + 1, available.size()); + } + } + + /** + * A stale lease's data write must be rejected (not silently land in a slot a + * later borrower now owns). The generation guard in + * {@code SenderSlot.live()} throws before the delegate is touched. + */ + @Test + public void testStaleWriteIsRejected() throws Exception { + Class slotClass = Class.forName("io.questdb.client.impl.SenderSlot"); + Constructor slotCtor = slotClass.getDeclaredConstructor(Sender.class, SenderPool.class, int.class); + slotCtor.setAccessible(true); + Method bump = slotClass.getDeclaredMethod("bumpGeneration"); + bump.setAccessible(true); + Constructor leaseCtor = + PooledSender.class.getDeclaredConstructor(slotClass, long.class); + leaseCtor.setAccessible(true); + + Object slot = slotCtor.newInstance(null, null, -1); + bump.invoke(slot); // generation -> 1, lease A captures 1 + PooledSender leaseA = leaseCtor.newInstance(slot, 1L); + bump.invoke(slot); // released + bump.invoke(slot); // re-borrowed -> generation 3 + + try { + leaseA.table("x"); + Assert.fail("a stale lease's write must throw, not reach the re-borrowed slot"); + } catch (IllegalStateException expected) { + Assert.assertTrue(expected.getMessage(), expected.getMessage().contains("closed")); + } + } +} diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java index b7b56e7a..6c4ae2d5 100644 --- a/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java @@ -29,7 +29,6 @@ import org.junit.Assert; import org.junit.Test; -import java.lang.reflect.Constructor; import java.lang.reflect.Proxy; import java.nio.file.Paths; import java.util.concurrent.atomic.AtomicBoolean; @@ -246,10 +245,7 @@ private static Sender fakeSender(AtomicBoolean closedFlag) { private static SenderPool newPool( String cfg, int min, int max, long acquireMs, IntFunction senderFactory - ) throws Exception { - Constructor c = SenderPool.class.getDeclaredConstructor( - String.class, int.class, int.class, long.class, long.class, long.class, IntFunction.class); - c.setAccessible(true); - return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory); + ) { + return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory); } } diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java index e4b2b49a..2c76997d 100644 --- a/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java @@ -43,7 +43,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.lang.reflect.Constructor; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.nio.ByteBuffer; @@ -207,7 +206,10 @@ public void testReturnedSenderReusesSameSlot() throws Exception { first.close(); PooledSender second = pool.borrow(); try { - Assert.assertSame("returned slot must be recycled", first, second); + // borrow() now returns a fresh wrapper each time; the + // recycled thing is the underlying slot. + Assert.assertSame("returned slot must be recycled", + getField(first, "slot"), getField(second, "slot")); Assert.assertEquals("no new slot dir on recycle", 1, countSlotDirs()); Assert.assertTrue(Files.exists(slot("default-0"))); } finally { @@ -1883,9 +1885,12 @@ private static void rmDir(String dir) { } private static Sender getDelegate(PooledSender ps) throws Exception { - Field f = PooledSender.class.getDeclaredField("delegate"); + Field slotF = PooledSender.class.getDeclaredField("slot"); + slotF.setAccessible(true); + Object slot = slotF.get(ps); + Field f = slot.getClass().getDeclaredField("delegate"); f.setAccessible(true); - return (Sender) f.get(ps); + return (Sender) f.get(slot); } // Invokes one of the pool's private managed-slot delegate factories @@ -1931,27 +1936,20 @@ private static void invokeDiscardBroken(SenderPool pool, PooledSender ps) throws m.invoke(pool, ps); } - // Reaches the package-private senderFactory test seam by reflection so a - // test can inject a fake/forged delegate (mirrors SenderPoolErrorSafetyTest). + // Uses the @TestOnly senderFactory seam so a test can inject a fake/forged + // delegate (mirrors SenderPoolErrorSafetyTest). private static SenderPool newPoolWithFactory( String cfg, int min, int max, long acquireMs, IntFunction senderFactory - ) throws Exception { - Constructor c = SenderPool.class.getDeclaredConstructor( - String.class, int.class, int.class, long.class, long.class, long.class, IntFunction.class); - c.setAccessible(true); - return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory); + ) { + return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory); } - // Reaches the package-private 8-arg constructor (deferStartupRecovery=true) - // by reflection so a test can build a pool whose SF startup recovery is NOT - // run inline -- mirroring the pooled QuestDB handle, which defers it to the - // housekeeper. senderFactory=null -> the real defaultSender(). - private static SenderPool newDeferredPool(String cfg, int min, int max, long acquireMs) throws Exception { - Constructor c = SenderPool.class.getDeclaredConstructor( - String.class, int.class, int.class, long.class, long.class, long.class, - IntFunction.class, boolean.class); - c.setAccessible(true); - return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, null, true); + // Uses the @TestOnly 8-arg constructor (deferStartupRecovery=true) so a test + // can build a pool whose SF startup recovery is NOT run inline -- mirroring + // the pooled QuestDB handle, which defers it to the housekeeper. + // senderFactory=null -> the real defaultSender(). + private static SenderPool newDeferredPool(String cfg, int min, int max, long acquireMs) { + return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, null, true); } // Drives a deferred pool's startup recovery to completion (the housekeeper @@ -1982,12 +1980,8 @@ private static void invokeMarkClosing(SenderPool pool) throws Exception { // test can drive the housekeeper recovery path against fully controlled // (fake) recoverers. private static SenderPool newDeferredPoolWithFactory( - String cfg, int min, int max, long acquireMs, IntFunction factory) throws Exception { - Constructor c = SenderPool.class.getDeclaredConstructor( - String.class, int.class, int.class, long.class, long.class, long.class, - IntFunction.class, boolean.class); - c.setAccessible(true); - return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, factory, true); + String cfg, int min, int max, long acquireMs, IntFunction factory) { + return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, factory, true); } // Fake Sender whose drain() (for slot 0 only) parks until released, opening a diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java index 85952f85..3f16b965 100644 --- a/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java @@ -34,10 +34,7 @@ import java.lang.reflect.Field; import java.lang.reflect.Proxy; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; /** * Unit tests for the {@link SenderPool} borrow/return semantics. Uses the @@ -57,26 +54,36 @@ public class SenderPoolTest { "http::addr=127.0.0.1:1;protocol_version=2;auto_flush=off;"; @Test - public void testBorrowReturnRecyclesSameDecorator() { + public void testBorrowReturnRecyclesSameDecorator() throws Exception { try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE)) { Sender first = pool.borrow(); first.close(); Sender second = pool.borrow(); - Assert.assertSame("returned decorator should be reused after close()", first, second); + // Each borrow is a fresh PooledSender wrapper; what the pool recycles + // is the underlying slot, so compare those rather than the handles. + Assert.assertSame("returned slot should be recycled after close()", + slotOf(first), slotOf(second)); second.close(); } } + private static Object slotOf(Sender pooledWrapper) throws Exception { + Field f = PooledSender.class.getDeclaredField("slot"); + f.setAccessible(true); + return f.get(pooledWrapper); + } + @Test - public void testBrokenSenderIsNotReturnedToPool() { + public void testBrokenSenderIsNotReturnedToPool() throws Exception { // Borrowing, buffering a row, and then closing forces flush() against - // the unreachable address, which throws. The broken wrapper must not - // be returned to the pool: its delegate's buffer still holds the - // failed row, and on transports with terminal-failure semantics the - // delegate is also unusable. Either way, the next borrower must get - // a fresh wrapper. + // the unreachable address, which throws. The broken slot must not be + // returned to the pool: its delegate's buffer still holds the failed + // row, and on transports with terminal-failure semantics the delegate + // is also unusable. Either way, the next borrower must get a fresh + // slot, not the broken one. try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE)) { Sender first = pool.borrow(); + Object firstSlot = slotOf(first); first.table("t").longColumn("v", 1).atNow(); try { first.close(); @@ -86,11 +93,23 @@ public void testBrokenSenderIsNotReturnedToPool() { } Sender second = pool.borrow(); try { - Assert.assertNotSame("broken sender must not be handed back to next borrower", - first, second); + // borrow() always hands out a FRESH PooledSender wrapper, so + // assertNotSame(first, second) on the wrappers is vacuously + // true and proves nothing -- it stays true whether or not the + // broken slot was discarded. What the pool recycles is the + // underlying slot, so a broken slot leaking back to the next + // borrower shows up as the SAME slot. Assert the slot differs. + Assert.assertNotSame("broken slot must not be handed back to next borrower", + firstSlot, slotOf(second)); } finally { - if (second != first) { + // On the failing path (broken slot recycled) second.close() + // re-throws, since its delegate's buffer still holds the + // failed row; swallow it so the assertion above is what + // surfaces rather than this incidental close() failure. + try { second.close(); + } catch (LineSenderException ignored) { + // expected only when the regression is present } } } @@ -319,180 +338,6 @@ public void testReapIdleRespectsMinSize() throws InterruptedException { } } - @Test - public void testPinAfterCloseRejectsStaleEntry() throws Exception { - // Pin from a worker thread, close the pool from main. The worker's - // ThreadLocal still references its PooledSender, but the underlying - // delegate has been closed. The next pinToCurrentThread() on the - // worker must reject the stale entry instead of handing it back. - SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE); - CountDownLatch pinned = new CountDownLatch(1); - CountDownLatch closed = new CountDownLatch(1); - AtomicReference secondCallError = new AtomicReference<>(); - Thread worker = new Thread(() -> { - try { - pool.pinToCurrentThread(); - pinned.countDown(); - Assert.assertTrue(closed.await(2, TimeUnit.SECONDS)); - try { - pool.pinToCurrentThread(); - secondCallError.set(new AssertionError("pinToCurrentThread after close must throw")); - } catch (LineSenderException e) { - // expected - } - } catch (Throwable t) { - secondCallError.set(t); - } - }); - worker.start(); - Assert.assertTrue(pinned.await(2, TimeUnit.SECONDS)); - pool.close(); - closed.countDown(); - worker.join(2_000); - if (secondCallError.get() != null) { - throw new AssertionError(secondCallError.get()); - } - } - - @Test - public void testPinAfterUserCloseDoesNotShareWrapper() { - // Same-thread reproducer for the pinToCurrentThread() sharing bug. - // The user closes a pinned Sender (the natural try-with-resources - // idiom on the public Sender API), then another consumer borrows - // the slot. pinToCurrentThread() must not hand that wrapper back: - // it is now owned by the second consumer. - // - // Pool size 1 collapses the race window into a linear sequence: - // the second borrower deterministically receives the same slot - // that was just returned, so the bug is observable at the - // wrapper-identity level without timing. - try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 100, Long.MAX_VALUE, Long.MAX_VALUE)) { - Sender pinned = pool.pinToCurrentThread(); - pinned.close(); // pool slot returned; ThreadLocal still points at it - Sender stolen = pool.borrow(); // pollFirst hands the same wrapper to a new consumer - try { - Sender repinned = pool.pinToCurrentThread(); - Assert.fail("pinToCurrentThread() returned wrapper " + repinned - + " already borrowed by another consumer " + stolen); - } catch (LineSenderException expected) { - // After fix: TL cleared (or owner-thread invalidated) on close; - // re-pin tries to borrow, pool is empty, acquireTimeout fires. - } finally { - stolen.close(); - } - } - } - - @Test - public void testPinAfterUserCloseDoesNotShareWrapperCrossThread() throws InterruptedException { - // Cross-thread variant of the same bug, mirroring the originally - // reported trigger: Thread A pins, closes, then re-pins while - // Thread B has borrowed the slot in between. A's ThreadLocal still - // references the wrapper, and pinToCurrentThread() hands it back -- - // so A and B end up writing to the same underlying Sender. - try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 100, Long.MAX_VALUE, Long.MAX_VALUE)) { - CountDownLatch aClosed = new CountDownLatch(1); - CountDownLatch bBorrowed = new CountDownLatch(1); - AtomicReference bSender = new AtomicReference<>(); - AtomicReference failure = new AtomicReference<>(); - - Thread a = new Thread(() -> { - try { - Sender s = pool.pinToCurrentThread(); - s.close(); - aClosed.countDown(); - Assert.assertTrue(bBorrowed.await(2, TimeUnit.SECONDS)); - try { - Sender repinned = pool.pinToCurrentThread(); - failure.compareAndSet(null, new AssertionError( - "pinToCurrentThread() returned wrapper " + repinned - + " already borrowed by another thread " + bSender.get())); - } catch (LineSenderException expected) { - // After fix: re-pin tries to borrow, pool is empty, times out. - } - } catch (Throwable t) { - failure.compareAndSet(null, t); - } - }); - Thread b = new Thread(() -> { - try { - Assert.assertTrue(aClosed.await(2, TimeUnit.SECONDS)); - bSender.set(pool.borrow()); - } catch (Throwable t) { - failure.compareAndSet(null, t); - } finally { - bBorrowed.countDown(); - } - }); - - a.start(); - b.start(); - a.join(4_000); - b.join(4_000); - - if (bSender.get() != null) { - bSender.get().close(); - } - if (failure.get() != null) { - throw new AssertionError(failure.get()); - } - } - } - - @Test - public void testReleaseAfterCloseIsSafe() throws Exception { - // Same setup as the pin test, but exercise releaseCurrentThread() - // instead. With a closed delegate underneath, the release path must - // not invoke flush() on the dead Sender. - SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE); - CountDownLatch pinned = new CountDownLatch(1); - CountDownLatch closed = new CountDownLatch(1); - AtomicReference releaseError = new AtomicReference<>(); - Thread worker = new Thread(() -> { - try { - pool.pinToCurrentThread(); - pinned.countDown(); - Assert.assertTrue(closed.await(2, TimeUnit.SECONDS)); - pool.releaseCurrentThread(); - } catch (Throwable t) { - releaseError.set(t); - } - }); - worker.start(); - Assert.assertTrue(pinned.await(2, TimeUnit.SECONDS)); - pool.close(); - closed.countDown(); - worker.join(2_000); - if (releaseError.get() != null) { - throw new AssertionError(releaseError.get()); - } - } - - @Test - public void testThreadAffinityIsPerThread() throws InterruptedException { - try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 2, 2, 1_000, Long.MAX_VALUE, Long.MAX_VALUE)) { - Sender mainPinned = pool.pinToCurrentThread(); - Assert.assertSame("re-pin on same thread returns same instance", - mainPinned, pool.pinToCurrentThread()); - - AtomicReference otherPinned = new AtomicReference<>(); - CountDownLatch done = new CountDownLatch(1); - Thread t = new Thread(() -> { - try { - otherPinned.set(pool.pinToCurrentThread()); - } finally { - done.countDown(); - } - }); - t.start(); - Assert.assertTrue(done.await(2, TimeUnit.SECONDS)); - Assert.assertNotSame("different threads must get different pinned Senders", - mainPinned, otherPinned.get()); - - pool.releaseCurrentThread(); - } - } - // ---------------------------------------------------------------------- // Teardown robustness: a delegate close() can throw an Error (e.g. an // -ea AssertionError), not just a RuntimeException. The pool's best-effort @@ -578,9 +423,12 @@ public void testCloseSurvivesDelegateCloseError() throws Exception { * while the test does not leak native memory. */ private static void installFailingCloseDelegate(PooledSender ps, AtomicInteger closeAttempts) throws Exception { - Field f = PooledSender.class.getDeclaredField("delegate"); + Field slotF = PooledSender.class.getDeclaredField("slot"); + slotF.setAccessible(true); + Object slot = slotF.get(ps); + Field f = slot.getClass().getDeclaredField("delegate"); f.setAccessible(true); - Sender real = (Sender) f.get(ps); + Sender real = (Sender) f.get(slot); Sender failing = (Sender) Proxy.newProxyInstance( Sender.class.getClassLoader(), new Class[]{Sender.class}, @@ -601,6 +449,6 @@ private static void installFailingCloseDelegate(PooledSender ps, AtomicInteger c } return method.invoke(real, args); }); - f.set(ps, failing); + f.set(slot, failing); } } diff --git a/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java b/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java index 69453c77..51003bfc 100644 --- a/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java +++ b/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java @@ -77,6 +77,7 @@ public void testEveryIngressKeyIsHonored() { assertHonored("connection_listener_inbox_capacity=64", "connection_listener_inbox_capacity", 64); assertHonored("token=ey.abc", "token", "ey.abc"); assertHonored("auth_timeout_ms=4321", "auth_timeout_ms", 4321L); + assertHonored("connect_timeout=7000", "connect_timeout", 7000); // username/password together (both-or-neither), and the user/pass aliases. Map creds = snapshot("ws::addr=h:9000;username=alice;password=secret;"); diff --git a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java index 25b138bd..8d4ca755 100644 --- a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java +++ b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java @@ -81,7 +81,8 @@ public void testHandshakeWrapOverflowWithNonEmptyBufferShouldNotLoopForever() th CountDownLatch done = new CountDownLatch(1); t = new Thread(() -> { try { - socket.startTlsSession("test.host"); + socket.startTlsSession("test.host", op -> { + }); } catch (Throwable ignored) { // Expected: a healthy handshake loop should fail loudly here, // not spin forever. Any exception (AssertionError, SSLException, diff --git a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java index 506ce783..af5de346 100644 --- a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java +++ b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java @@ -25,9 +25,11 @@ package io.questdb.client.test.network; import io.questdb.client.ClientTlsConfiguration; +import io.questdb.client.network.IOOperation; import io.questdb.client.network.JavaTlsClientSocket; import io.questdb.client.network.NetworkFacade; import io.questdb.client.network.NetworkFacadeImpl; +import io.questdb.client.network.SocketReadinessWaiter; import io.questdb.client.std.MemoryTag; import io.questdb.client.std.Unsafe; import io.questdb.client.test.tools.TestUtils; @@ -40,9 +42,11 @@ import javax.net.ssl.SSLParameters; import javax.net.ssl.SSLSession; import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; import static org.junit.Assert.assertEquals; @@ -190,6 +194,89 @@ public void testRecvProcessesBufferedRecordAfterEmptyOkUnwrap() throws Exception } } + /** + * Regression test for the TLS handshake busy-spin / unbounded handshake. + * On a non-blocking socket, a peer that completes TCP but stalls before + * sending its half of the handshake leaves the engine in NEED_UNWRAP with + * the socket returning "would block" (recv == 0). The handshake must hand + * control to the readiness waiter -- which in production parks on the event + * loop bounded by the connect deadline -- instead of re-reading in a tight + * loop. Here the waiter stands in for that deadline: it records the wait + * and then throws, exactly as the bounded ioWait() does once the budget is + * spent. The method-level timeout fails the test if the handshake ever + * busy-spins past the waiter (i.e. if the deadline-aware wait is removed). + */ + @Test(timeout = 30_000) + public void testHandshakeWaitsForReadabilityInsteadOfBusySpinning() throws Exception { + TestUtils.assertMemoryLeak(() -> { + try (JavaTlsClientSocket socket = newSocket()) { + invoke(socket, "prepareInternalBuffers"); + setField(socket, "sslEngine", new StallingUnwrapSslEngine()); + + Method runHandshake = JavaTlsClientSocket.class.getDeclaredMethod( + "runHandshake", SocketReadinessWaiter.class); + runHandshake.setAccessible(true); + + AtomicInteger readWaits = new AtomicInteger(); + AtomicInteger writeWaits = new AtomicInteger(); + SocketReadinessWaiter waiter = op -> { + if (op == IOOperation.READ) { + readWaits.incrementAndGet(); + } else { + writeWaits.incrementAndGet(); + } + // Stand in for the connect deadline firing inside ioWait(). + throw new DeadlineReached(); + }; + + try { + runHandshake.invoke(socket, waiter); + Assert.fail("runHandshake must not complete the handshake against a stalled peer"); + } catch (InvocationTargetException e) { + Assert.assertTrue( + "handshake must surface the readiness waiter's deadline, was: " + e.getCause(), + e.getCause() instanceof DeadlineReached); + } + + Assert.assertEquals( + "handshake must wait for the socket to become readable instead of busy-spinning", + 1, readWaits.get()); + Assert.assertEquals( + "a NEED_UNWRAP stall must not trigger a write wait", 0, writeWaits.get()); + } + }); + } + + /** + * Happy-path guard for the refactor: when the engine makes progress (a + * complete record is available, unwrap returns OK and the handshake + * finishes), runHandshake must complete without ever parking on socket + * readiness. The would-block waits only fire on recv/send == 0, so a + * responsive peer never triggers them. + */ + @Test(timeout = 30_000) + public void testHandshakeCompletesWithoutWaitingWhenEngineMakesProgress() throws Exception { + TestUtils.assertMemoryLeak(() -> { + try (JavaTlsClientSocket socket = newSocket()) { + invoke(socket, "prepareInternalBuffers"); + setField(socket, "sslEngine", new ProgressingUnwrapSslEngine()); + + Method runHandshake = JavaTlsClientSocket.class.getDeclaredMethod( + "runHandshake", SocketReadinessWaiter.class); + runHandshake.setAccessible(true); + + AtomicInteger waits = new AtomicInteger(); + SocketReadinessWaiter waiter = op -> waits.incrementAndGet(); + + runHandshake.invoke(socket, waiter); // must return normally (handshake finished) + + Assert.assertEquals( + "a handshake that makes progress must not wait on socket readiness", + 0, waits.get()); + } + }); + } + private static void assertBytes(String expected, long ptr, int len) { Assert.assertEquals(expected.length(), len); for (int i = 0; i < len; i++) { @@ -333,6 +420,48 @@ public SSLEngineResult unwrap(ByteBuffer src, ByteBuffer[] dsts, int offset, int } } + private static final class DeadlineReached extends RuntimeException { + } + + private static final class ProgressingUnwrapSslEngine extends StubSslEngine { + @Override + public SSLEngineResult.HandshakeStatus getHandshakeStatus() { + return SSLEngineResult.HandshakeStatus.NEED_UNWRAP; + } + + @Override + public SSLEngineResult unwrap(ByteBuffer src, ByteBuffer[] dsts, int offset, int length) { + // A complete record was available: consume it and finish the + // handshake, so the loop exits without waiting. + return new SSLEngineResult( + SSLEngineResult.Status.OK, + SSLEngineResult.HandshakeStatus.FINISHED, + 0, + 0 + ); + } + } + + private static final class StallingUnwrapSslEngine extends StubSslEngine { + @Override + public SSLEngineResult.HandshakeStatus getHandshakeStatus() { + return SSLEngineResult.HandshakeStatus.NEED_UNWRAP; + } + + @Override + public SSLEngineResult unwrap(ByteBuffer src, ByteBuffer[] dsts, int offset, int length) { + // No complete TLS record buffered yet: ask for more bytes from the + // socket. The stalled peer never sends them, so the handshake must + // wait on readability rather than spin. + return new SSLEngineResult( + SSLEngineResult.Status.BUFFER_UNDERFLOW, + SSLEngineResult.HandshakeStatus.NEED_UNWRAP, + 0, + 0 + ); + } + } + private static abstract class StubSslEngine extends SSLEngine { @Override public void beginHandshake() { diff --git a/core/src/test/java/io/questdb/client/test/network/NetConnectTimeoutTest.java b/core/src/test/java/io/questdb/client/test/network/NetConnectTimeoutTest.java new file mode 100644 index 00000000..b5d2c5d0 --- /dev/null +++ b/core/src/test/java/io/questdb/client/test/network/NetConnectTimeoutTest.java @@ -0,0 +1,118 @@ +/*+***************************************************************************** + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (c) 2014-2019 Appsicle + * Copyright (c) 2019-2026 QuestDB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ + +package io.questdb.client.test.network; + +import io.questdb.client.network.NetworkFacade; +import io.questdb.client.network.NetworkFacadeImpl; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Test; + +import java.net.InetSocketAddress; +import java.net.ServerSocket; + +/** + * Exercises the native non-blocking connect-with-timeout primitive + * ({@link NetworkFacade#connectAddrInfoTimeout}). + */ +public class NetConnectTimeoutTest { + + private static final NetworkFacade NF = NetworkFacadeImpl.INSTANCE; + + @Test + public void testConnectRefusedReturnsErrorNotTimeout() throws Exception { + // Bind then immediately close to obtain a port with no listener; a + // connect to it is refused (RST) rather than timed out. + int port; + try (ServerSocket ss = new ServerSocket()) { + ss.bind(new InetSocketAddress("127.0.0.1", 0)); + port = ss.getLocalPort(); + } + + long addrInfo = NF.getAddrInfo("127.0.0.1", port); + Assert.assertNotEquals(-1, addrInfo); + int fd = NF.socketTcp(true); + try { + int rc = NF.connectAddrInfoTimeout(fd, addrInfo, 5_000); + Assert.assertNotEquals("refused connect must not report success", 0, rc); + Assert.assertNotEquals("refused connect must not be reported as a timeout", + NetworkFacade.CONNECT_TIMEOUT, rc); + } finally { + NF.freeAddrInfo(addrInfo); + NF.close(fd); + } + } + + @Test + public void testConnectSucceedsWithinTimeout() throws Exception { + try (ServerSocket ss = new ServerSocket()) { + ss.bind(new InetSocketAddress("127.0.0.1", 0)); + int port = ss.getLocalPort(); + + long addrInfo = NF.getAddrInfo("127.0.0.1", port); + Assert.assertNotEquals(-1, addrInfo); + int fd = NF.socketTcp(true); + try { + int rc = NF.connectAddrInfoTimeout(fd, addrInfo, 5_000); + Assert.assertEquals("loopback connect should succeed", 0, rc); + } finally { + NF.freeAddrInfo(addrInfo); + NF.close(fd); + } + } + } + + @Test + public void testConnectToBlackholeTimesOut() { + // 192.0.2.0/24 is TEST-NET-1 (RFC 5737); packets are silently dropped on + // a normal network, so the SYN goes unanswered and the timeout fires + // instead of the (much longer) OS connect timeout. + long addrInfo = NF.getAddrInfo("192.0.2.1", 9009); + Assert.assertNotEquals(-1, addrInfo); + int fd = NF.socketTcp(true); + try { + long start = System.nanoTime(); + int rc = NF.connectAddrInfoTimeout(fd, addrInfo, 500); + long elapsedMs = (System.nanoTime() - start) / 1_000_000L; + + // Whatever the outcome, the key guarantee is that we never blocked + // on the (multi-minute) OS connect timeout. + Assert.assertTrue("connect must return near the budget, was " + elapsedMs + "ms", elapsedMs < 5_000); + + // The deterministic outcome depends on the runner's routing for + // TEST-NET-1: a dropped SYN yields a real timeout (the path under + // test), while a runner with no route to 192.0.2.0/24 fails fast + // with ENETUNREACH/EHOSTUNREACH (rc == -1) and a rare appliance may + // even accept it (rc == 0). Only the timeout case is assertable; the + // others can't exercise the timeout, so skip rather than flake. + Assume.assumeTrue("no route to blackhole on this runner (rc=" + rc + ")", + rc == NetworkFacade.CONNECT_TIMEOUT); + Assert.assertEquals("blackhole connect should time out", NetworkFacade.CONNECT_TIMEOUT, rc); + } finally { + NF.freeAddrInfo(addrInfo); + NF.close(fd); + } + } +} diff --git a/design/qwp-client-ergonomics-issues.md b/design/qwp-client-ergonomics-issues.md new file mode 100644 index 00000000..fb5ed951 --- /dev/null +++ b/design/qwp-client-ergonomics-issues.md @@ -0,0 +1,214 @@ +# QWP client startup/failover — ergonomics issues + +Tracked sharp edges surfaced while reviewing +[`qwp-client-startup-failover-behavior.md`](./qwp-client-startup-failover-behavior.md). +Each entry is grounded in source. "Candidate" = likely defect worth changing; +"Intended (revisit)" = deliberate contract that may still deserve reconsideration. + +Severity legend: **P1** user-visible footgun likely to cause an outage or hang · +**P2** confusing/surprising but recoverable · **P3** polish. + +--- + +## ERG-1 — `initial_connect_retry` is implicitly promoted to SYNC (P1, Candidate) + +**Symptom.** A user sets `reconnect_max_duration_millis` for resilience and, +without setting `initial_connect_retry`, their application now **blocks** on +startup for the entire budget when the server is down. + +**Source.** `Sender.java` (~line 1451): if `initialConnectMode == null` and any +`reconnect_*` knob is set, the mode resolves to `SYNC`. + +**Why it's bad.** Mode is inferred from an unrelated knob. The "make me more +resilient" action produces a "hang my boot" side effect. The code comment itself +acknowledges the knob "reads as a generic retry budget but the underlying path +only governs reconnects." + +**Proposed fix.** +- Make initial-connect mode an explicit, independent choice; stop inferring it. +- If inference must stay for back-compat, log a `WARN` when a `reconnect_*` knob + flips startup to `SYNC`, naming the knob and the resulting blocking behavior. + +**Acceptance.** With only `reconnect_max_duration_millis` set and the server +down, `build()` either returns promptly (OFF default) or logs an explicit +warning before blocking. A test asserts the warning / non-blocking default. + +--- + +## ERG-2 — `reconnect_max_duration_millis`: misleading name + inconsistent `0` (P2, Candidate) + +**Symptom.** Two confusions: +1. The name implies "reconnect only" but it also bounds the **initial** connect + in SYNC/ASYNC modes. +2. `reconnect_max_duration_millis=0` means **give up immediately**, whereas + `idle_timeout_ms=0` and `max_lifetime_ms=0` in the same config surface mean + **infinite**. There is no infinite-retry mode at all. + +**Source.** `CursorWebSocketSendLoop.java:827` — `deadlineNanos = start + dur*1e6`, +loop `while (now < deadline)`; `0` ⇒ zero iterations. Contrast +`QuestDBBuilder.idleTimeoutMillis/maxLifetimeMillis` (`0 ⇒ Long.MAX_VALUE`). + +**Why it's bad.** Same `0` token, opposite semantics depending on the knob; +tolerating a long maintenance window forces magic numbers like `86400000`. + +**Proposed fix.** +- Adopt one `0` convention. Recommended: `0 ⇒ infinite`, matching the pool + knobs, which also gives a real infinite-retry mode. +- Consider an alias `connect_retry_budget_ms` that reflects it covers initial + + reconnect; keep the old key as a deprecated alias. + +**Acceptance.** Documented, consistent `0` semantics across the config surface; +test covering `=0` behavior and (if added) infinite mode. + +--- + +## ERG-3 — `failover` does not cover startup; queries have no async connect (P2, Candidate) + +**Symptom.** Users expect `failover=on` to make startup resilient. It does not — +it only governs reconnect+replay during `execute()` after a connection exists. +Query initial connect is always synchronous and blocking, with no async/lazy +mode (unlike ingest). + +**Source.** `QwpQueryClient.connect()` is synchronous; `failover_*` defaults at +`QwpQueryClient.java:139-141`; spec "Query client behavior". + +**Why it's bad.** Expectation mismatch on a safety-critical knob; asymmetry +between ingest (3 modes) and query (1 mode) forces two mental models. + +**Proposed fix.** +- Document `failover`'s scope prominently (done in the rewrite). +- Evaluate an async/lazy initial-connect mode for the query client to match + ingest, or a unified `initial_connect` setting shared by both sides. + +**Acceptance.** Either query supports a documented non-blocking initial-connect +mode, or the docs make the scope unambiguous and the limitation is explicitly +accepted. + +--- + +## ERG-4 — No first-class write-only facade (P2, Candidate) + +**Symptom.** A write-only user of `QuestDB` must still supply a query config they +never use **and** remember `query_pool_min=0` to avoid a build-time query +connection. + +**Source.** `QuestDBBuilder.build()` hard-requires both `ingestConfig` and +`queryConfig`; no write-only path. + +**Why it's bad.** Leaky and error-prone; the doc's own recommendation is "prefer +direct `Sender`," which is an admission the facade is awkward here. + +**Proposed fix.** +- Add `QuestDB.builder().ingestConfig(...).writeOnly()` (or a `writeOnly()` + shortcut) that skips the query pool entirely. +- Symmetric `readOnly()` is a natural follow-up. + +**Acceptance.** A write-only facade builds with no query config and creates no +query pool; documented and tested. + +--- + +## ERG-5 — A single endpoint's `401`/`403` aborts the whole walk (P2, Intended, revisit) + +**Symptom.** One misconfigured endpoint returning `401`/`403` blocks startup +even when other listed endpoints would accept the credentials. Applies to both +ingest and query walks, including at startup. + +**Source.** Ingest/query endpoint matrices; `CursorWebSocketSendLoop` treats +`QwpAuthFailedException` as terminal across all endpoints. + +**Why it's debatable.** "Fail fast on bad credentials" is reasonable, but it is +asymmetric with how every *transport* failure is tolerated, and surprising +during rolling credential rotation or a single bad node. + +**Proposed fix (revisit).** +- Keep terminal-on-auth as the contract, but make it a deliberately documented + contract (done in the rewrite). +- Consider an opt-in (e.g. `auth_failure=continue`) that demotes auth failure to + a per-endpoint skip for heterogeneous fleets. + +**Acceptance.** Behavior documented as intentional; decision recorded on whether +an opt-in continue mode is warranted. + +--- + +## ERG-6 — Facade can't reach error handler / connection listener / serverInfoTimeout (P2, Candidate) + +**Symptom.** Through the `QuestDB` facade you cannot install a +`SenderErrorHandler` or `SenderConnectionListener` (ingest), nor set +`serverInfoTimeoutMs` (query). The latter has no config key at all. + +**Source.** `LineSenderBuilder.errorHandler()/connectionListener()` exist only on +the direct sender builder; `serverInfoTimeoutMs` is a `QwpQueryClient` builder +field with no `ConfigSchema` key (`ConfigSchema.java` EGRESS section). + +**Why it's bad.** The facade is the recommended high-level entry point, yet it +cannot configure observability hooks or a documented query timeout. + +**Proposed fix.** +- Expose ingest error handler / connection listener on `QuestDBBuilder` + (per-pool or shared). +- Add a `server_info_timeout_ms` config key so it is reachable from any conn + string (and therefore the facade). + +**Acceptance.** Both hooks and the timeout are reachable from the facade; +documented in the knob-availability matrix. + +--- + +## ERG-7 — Simplest API has the worst error visibility (P1, Candidate) + +**Symptom.** `Sender.fromConfig(cfg)` with `initial_connect_retry=async` swallows +terminal startup failures — they surface only on a later producer call or at +`close()`. The visible path requires switching to `Sender.builder(...)` and +installing a handler. + +**Source.** Async terminal `SenderError` delivered to a configured +`SenderErrorHandler`; "even without a handler they are surfaced by later producer +calls or close-time safety net behavior." + +**Why it's bad.** The nicest ergonomics and the worst observability are +inversely correlated for the single most important question: "did my writer ever +connect?" + +**Proposed fix.** +- Default to a sane error sink (e.g. `WARN`/`ERROR` log on terminal async + failure) even without a registered handler. +- Provide a lightweight status accessor (e.g. `wasEverConnected()` / + `lastError()`) on the public `Sender` surface for poll-based checks. + +**Acceptance.** A terminal async failure is observable without installing a +custom handler; documented and tested. + +--- + +## ERG-8 — No client-side TCP connect timeout (P2, Intended, revisit) + +**Symptom.** A black-holed host in the `addr` list blocks the endpoint walk +until the OS connect timeout, undercutting the resilience value of listing +multiple endpoints. + +**Source.** `auth_timeout_ms` bounds only the post-connect upgrade/auth phase; +no separate application-level TCP connect timeout in the transport. + +**Why it's debatable.** It is a transport limitation, but it directly defeats the +multi-endpoint failover use case at startup. + +**Proposed fix (revisit).** +- Add a client-side connect timeout so the walk can abandon black-holed hosts + and proceed to the next endpoint. + +**Acceptance.** A black-holed first endpoint no longer blocks past a configurable +bound before the walk advances; documented and tested. + +--- + +## Suggested sequencing + +1. **ERG-1** and **ERG-7** (both P1) — they cause hangs and silent failures. +2. **ERG-2**, **ERG-4**, **ERG-6** (P2 Candidate) — naming/consistency and + facade completeness. +3. **ERG-3**, **ERG-8** (P2, need design) — async query connect and connect + timeout. +4. **ERG-5** — confirm/record the auth-terminal contract; opt-in continue mode + only if a concrete fleet use case justifies it. diff --git a/design/qwp-client-startup-failover-behavior.md b/design/qwp-client-startup-failover-behavior.md new file mode 100644 index 00000000..01d53233 --- /dev/null +++ b/design/qwp-client-startup-failover-behavior.md @@ -0,0 +1,443 @@ +# QWP client startup, pooling, failover, and store-and-forward + +This document describes how the Java QWP client behaves at **startup**, under +**connection loss**, and with **store-and-forward (SF)** durability. It is +written for client *users* first: the [Quick start](#quick-start) and +[Mental model](#mental-model) sections are enough to configure a correct client. +The [Reference](#reference) section is the exhaustive behavior matrix. The +[Implementation appendix](#implementation-appendix) documents internals for +maintainers. + +It is descriptive — it records what the code does today, including current +sharp edges. Where a behavior is a likely footgun, it is marked +**⚠ Sharp edge** and tracked in +[`qwp-client-ergonomics-issues.md`](./qwp-client-ergonomics-issues.md). + +--- + +## Quick start + +### Write-only client that tolerates the server being down at startup + +Use the direct `Sender` API (not the `QuestDB` facade — see +[sharp edge #4](#sharp-edges)). + +```java +String cfg = "ws::addr=db-a:9000,db-b:9000;" + + "sf_dir=/var/lib/my-app/questdb-sf;" // opt into disk durability + + "sender_id=writer-1;" // unique per process per sf_dir + + "initial_connect_retry=async;" // non-blocking startup + + "reconnect_max_duration_millis=86400000;" // outage budget (24h) + + "sf_max_total_bytes=100g;"; + +// For production, prefer the builder so you can install an error handler: +try (Sender sender = Sender.builder(cfg) + .errorHandler(myErrorHandler) // see "Error visibility" below + .connectionListener(myConnectionListener) + .build()) { + sender.table("telemetry").longColumn("v", 42).atNow(); + sender.flush(); // persists to SF storage; wire ACK is asynchronous +} +``` + +Why each line matters: + +- `sf_dir` is the **only** SF enable switch — there is no boolean flag. +- `initial_connect_retry=async` is what makes `build()` return without a live + socket. Without it, startup is blocking (see [Mental model](#mental-model)). +- `reconnect_max_duration_millis` is the outage budget for **both** the initial + connect and later reconnects. If it expires, the sender latches terminal and + stops; data already in `sf_dir` survives for a future sender on the same slot. + +**Error visibility ⚠:** the simplest path (`Sender.fromConfig(...)` + async) +surfaces terminal async failures only *later*, through a producer call or at +`close()`. For production, use `Sender.builder(...)` and install a +`SenderErrorHandler` / `SenderConnectionListener` +([sharp edge #7](#sharp-edges)). + +### Read client that only reads from replicas + +```java +String cfg = "ws::addr=replica-a:9000,replica-b:9000,replica-c:9000;" + + "target=replica;" // without this, the client may bind a primary + + "failover=on;"; // default; affects execute()-time recovery only + +try (QuestDB db = QuestDB.connect(cfg)) { + try (Query q = db.borrowQuery()) { + q.sql("select * from telemetry limit 10").handler(myBatchHandler).submit().await(); + } +} +``` + +Why each line matters: + +- `target=replica` is required to avoid binding a primary/standalone server. + The default `target=any` will accept any role. +- `failover=on` is the default. It does **not** affect startup; it only governs + reconnect+replay after a query connection that was already established later + fails during `execute()`. + +--- + +## Mental model + +### Three independent "connect" models live in one client + +A `QuestDB` facade owns an **ingest pool** and a **query pool**. They do not +share a startup model. You must hold all three in mind: + +| Concern | Controlled by | Startup is... | +| --- | --- | --- | +| Ingest sender initial connect | `initial_connect_retry` = `off` / `sync` / `async` | one-shot / blocking-retry / background-retry | +| Query client initial connect | (no mode; always synchronous) | always blocking | +| Facade prewarm (how many of each connect at `build()`) | `sender_pool_min`, `query_pool_min` | eager if `min>0`, lazy if `min=0` | + +`failover=on` (query default) is **not** a startup setting — it only affects +query execution after a connection exists. This naming trips people up +([sharp edge #3](#sharp-edges)). + +### Ingest initial-connect modes + +| `initial_connect_retry` | Mode | `build()` behavior on a down server | +| --- | --- | --- | +| `off` / `false` | `OFF` | one attempt on caller thread; throws immediately | +| `on` / `true` / `sync` | `SYNC` | retry loop on caller thread, bounded by `reconnect_max_duration_millis` (blocks) | +| `async` | `ASYNC` | returns immediately; I/O thread retries in background | + +**Default resolution ⚠:** if you don't set `initial_connect_retry` explicitly but +you *do* set any `reconnect_*` knob, the mode becomes `SYNC` — so a "resilience" +knob silently turns startup into a multi-minute **blocking** retry. If no +`reconnect_*` knob is set either, the mode is `OFF`. Always set +`initial_connect_retry` explicitly to avoid this ([sharp edge #1](#sharp-edges)). + +### Facade prewarm + +`QuestDBBuilder.build()` validates both configs (without connecting), then +eagerly creates `min` connections per pool. Consequences: + +| Configuration | Build-time network behavior | +| --- | --- | +| defaults (`min=1` both) | creates one sender + one query client; build fails if either cannot connect — unless ingest uses `initial_connect_retry=async` | +| `sender_pool_min=0` | no sender at build; first `borrowSender()` creates it (then follows the ingest initial-connect mode) | +| `query_pool_min=0` | no query client at build; first query `submit()` creates it | +| both mins `0` | config-only validation at build; all network work is lazy | + +After prewarm, both pools grow lazily up to `max` on demand, and shrink back to +`min` when idle. Growth uses the same real connect path as prewarm. At `max`, +callers block up to `acquire_timeout_ms` then throw. + +--- + +## Defaults (single source of truth) + +### Pool (facade only) + +| Key / builder | Default | +| --- | ---: | +| `sender_pool_min` | `1` | +| `sender_pool_max` | `4` | +| `query_pool_min` | `1` | +| `query_pool_max` | `4` | +| `acquire_timeout_ms` | `5000` | +| `idle_timeout_ms` | `60000` (`0` ⇒ infinite) | +| `max_lifetime_ms` | `1800000` (`0` ⇒ infinite) | +| `housekeeper_interval_ms` | `5000` | + +### Ingest sender (SF + reconnect) + +| Key | Default | +| --- | ---: | +| `sender_id` | `default` | +| `sf_max_bytes` (segment size) | `4 MiB` | +| `sf_max_total_bytes` (SF mode) | `10 GiB` | +| `sf_durability` | `MEMORY` | +| `sf_append_deadline_millis` | `30000` | +| `reconnect_max_duration_millis` | `300000` (`0` ⇒ **give up immediately**, not infinite ⚠) | +| `reconnect_initial_backoff_millis` | `100` | +| `reconnect_max_backoff_millis` | `5000` | +| `close_flush_timeout_millis` | `60000` | +| `auth_timeout_ms` | `15000` | + +### Query client + +| Key | Default | +| --- | ---: | +| `target` | `any` | +| `failover` | `on` | +| `failover_max_attempts` | `8` (incl. original) | +| `failover_max_duration_ms` | `30000` (`0` disables the duration cap) | +| `failover_backoff_initial_ms` | `50` | +| `failover_backoff_max_ms` | `1000` | +| `auth_timeout_ms` | `15000` | +| `serverInfoTimeoutMs` | `5000` (builder API only — no config key ⚠) | + +Note the inconsistent `0` convention: `idle_timeout_ms=0`/`max_lifetime_ms=0` +mean *infinite*, but `reconnect_max_duration_millis=0` means *give up now* +([sharp edge #2](#sharp-edges)). + +--- + +## Knob availability by surface + +Three configuration surfaces exist. Not every knob is reachable from every +surface — this matrix shows where each lives. + +- **Conn string**: a `ws`/`wss` config string. Works for `Sender.fromConfig`, + `QwpQueryClient.fromConfig`, and `QuestDB.connect(...)`. +- **Sender builder**: `Sender.builder(...)` (`LineSenderBuilder`) — direct + ingest only. +- **Facade builder**: `QuestDB.builder()` (`QuestDBBuilder`) — pool knobs only; + query/ingest behavior must come from the conn string. + +| Knob | Conn string | Sender builder | Facade builder | +| --- | :---: | :---: | :---: | +| `addr` | ✅ | ✅ `address()/port()` | via conn string | +| `username`/`password`/`token` | ✅ | ✅ | via conn string | +| `tls_verify`/`tls_roots` | ✅ | ✅ | via conn string | +| `auth_timeout_ms` | ✅ | ✅ | via conn string | +| `initial_connect_retry` | ✅ | ✅ `initialConnectMode()` | via conn string | +| `reconnect_*` | ✅ | ✅ | via conn string | +| `sf_dir`/`sender_id`/`sf_*` | ✅ | ✅ | via conn string | +| `request_durable_ack` | ✅ | ✅ | via conn string | +| `close_flush_timeout_millis` | ✅ | ✅ | via conn string | +| `SenderErrorHandler` | ❌ | ✅ `errorHandler()` | ❌ (not reachable) | +| `SenderConnectionListener` | ❌ | ✅ `connectionListener()` | ❌ (not reachable) | +| `target` | ✅ | n/a | via conn string | +| `failover`/`failover_*` | ✅ | n/a | via conn string | +| `serverInfoTimeoutMs` | ❌ | n/a | ❌ (QwpQueryClient builder only) | +| `sender_pool_*`/`query_pool_*` | ✅ | n/a | ✅ | +| `acquire_timeout_ms`/`idle_timeout_ms`/`max_lifetime_ms` | ✅ | n/a | ✅ | + +⚠ Gaps worth noting: the ingest **error handler / connection listener** cannot +be installed through the facade at all, and **`serverInfoTimeoutMs`** has no +config key, so a facade query client cannot tune it +([sharp edge #6](#sharp-edges)). + +--- + +## Known sharp edges + +Each item links to a tracked issue in +[`qwp-client-ergonomics-issues.md`](./qwp-client-ergonomics-issues.md). +"Intended" means it is a deliberate contract; "Candidate" means it is a likely +ergonomic defect worth changing. + +| # | Sharp edge | Status | +| --- | --- | --- | +| 1 | `initial_connect_retry` is implicitly promoted to `SYNC` when any `reconnect_*` knob is set — a resilience knob silently makes startup block. | Candidate | +| 2 | `reconnect_max_duration_millis` name implies "reconnect only" but also governs initial connect; `0` means "give up now" while sibling `0`s mean "infinite"; no infinite mode exists. | Candidate | +| 3 | `failover` sounds like it covers startup but only affects post-connect query `execute()`. Queries have no async/lazy initial connect at all. | Candidate | +| 4 | No first-class write-only facade: a write-only user must still supply a query config and remember `query_pool_min=0`. | Candidate | +| 5 | A single endpoint returning `401`/`403` is treated as cluster-wide terminal and aborts the whole endpoint walk, even at startup, even if other endpoints would accept the credentials. | Intended (documented), revisit | +| 6 | Ingest `errorHandler`/`connectionListener` and query `serverInfoTimeoutMs` are unreachable from the facade. | Candidate | +| 7 | The simplest API (`fromConfig` + async) has the worst error visibility — terminal async failures surface only on later producer calls or at `close()`. | Candidate | +| 8 | No client-side TCP connect timeout: a black-holed host in `addr` blocks the endpoint walk until the OS connect timeout. | Intended (transport limitation), revisit | + +--- + +## Reference + +### Store-and-forward semantics + +`sf_dir=...` enables SF. There is no separate boolean enable flag. + +- The sender owns one slot: `//`. Default `sender_id` is + `default`. +- Multiple independent senders sharing one `sf_dir` must use distinct + `sender_id` values, else the second fails because the slot lock is held. +- In pooled `QuestDB` usage, `SenderPool` derives per-slot IDs from the base: + `-0`, `-1`, … so pooled senders never collide. +- On restart, the cursor engine opens existing segment files and replays + unacknowledged frames; acknowledged/truncated frames are not replayed. + +`flush()` semantics (QWP sender): + +- Encodes pending rows into the cursor engine. +- In SF mode, data is persisted to mmap-backed segment files before `flush()` + returns. +- `flush()` does **not** wait for server ACKs unless backpressure requires + space. The I/O thread sends frames and trims ACKed frames asynchronously. +- `drain(timeoutMillis)` flushes and waits for the server to ACK all currently + published frames, up to the timeout. +- `close()` flushes then waits up to `close_flush_timeout_millis` for ACKs, + unless that timeout is `<= 0`. + +### Async initial connect (ingest) + +With `initial_connect_retry=async`: + +- `build()` returns without a live socket; `wasEverConnected()` is `false`. +- Producer calls and `flush()` can run before the server exists; frames + accumulate in the cursor engine (and on disk with `sf_dir`). +- The I/O thread retries in the background using the same loop used after wire + failure. +- If a server appears before the budget expires, buffered frames are + sent/replayed and ACK-driven trimming begins. +- If the budget expires before any connection, the sender latches a terminal + `SenderError` whose message contains `never-connected-budget-exhausted`. +- If it connected at least once and a later outage exhausts the budget, the + message contains `connection-lost-budget-exhausted`. +- Terminal async errors go to a configured `SenderErrorHandler`; without one + they surface on later producer calls or at close-time. + +There is no infinite-retry mode. For long maintenance windows, set a large +`reconnect_max_duration_millis`. On budget exhaustion the current sender stops; +persisted `sf_dir` data remains for a future sender on the same slot. + +### Ingest endpoint walk (`addr=a:9000,b:9000,...`) + +| Per-endpoint result | Sender behavior | +| --- | --- | +| DNS failure | transport error; try next endpoint | +| TCP connect failure | transport error; try next endpoint | +| TLS session/certificate failure | transport error; try next endpoint | +| HTTP upgrade timeout / non-auth transport error | try next endpoint | +| `421` with `X-QuestDB-Role: REPLICA` | role reject; try next endpoint | +| `401` / `403` auth failure | **terminal**; do not try later endpoints ⚠ | +| durable-ack requested but unsupported | terminal mismatch | +| successful write upgrade | bind this endpoint | +| all endpoints fail transport | throw / retry per initial/reconnect mode | +| all endpoints role-reject as replicas | `QwpRoleMismatchException` | + +### Query client initial connect + +`QwpQueryClient.connect()` is synchronous. Per endpoint it: opens TCP/TLS, +performs the WebSocket upgrade to `/read/v1`, reads the initial `SERVER_INFO` +frame, applies the `target=` role filter, and starts the egress I/O thread on +the first match. If no endpoint can be used, it throws. There is no async +initial-connect mode for queries. + +`target=` matching: + +| Target | Accepted roles | +| --- | --- | +| `any` | any role | +| `primary` | `PRIMARY`, `PRIMARY_CATCHUP`, `STANDALONE` | +| `replica` | `REPLICA` only | + +Query initial-connect endpoint matrix: + +| Per-endpoint result | Behavior | +| --- | --- | +| DNS / TCP / TLS failure | record transport error; try next endpoint | +| HTTP upgrade timeout | transport error; try next endpoint | +| HTTP `401` / `403` | **terminal** `QwpAuthFailedException`; do not try later ⚠ | +| HTTP `421` + role header | role reject; try next endpoint | +| upgrade ok but no `SERVER_INFO` before timeout | transport error; try next | +| `SERVER_INFO` role ≠ `target` | role reject; try next endpoint | +| endpoint matches target | bind and return success | +| all endpoints transport-fail | `HttpClientException: all QWP endpoints unreachable ...` | +| all endpoints role-reject | `QwpRoleMismatchException` | + +`auth_timeout_ms` bounds the upgrade/auth phase **after** TCP connect. There is +no separate client-side TCP connect timeout, so a black-holed connect blocks +until the OS timeout before the walk advances ⚠. + +### Query execution-time failover + +With `failover=on`: + +- A transport/protocol terminal failure during `execute()` is intercepted; the + client reconnects via the host tracker and re-submits. +- The handler receives `onFailoverReset(...)` before replayed batches. +- Bounded by `failover_max_attempts` (default `8`, incl. original) **and** + `failover_max_duration_ms` (default `30000`; `0` disables the duration cap). +- Backoff: `failover_backoff_initial_ms=50`, `failover_backoff_max_ms=1000`. +- Auth failure during failover reconnect is terminal and reported to the handler. + +With `failover=off`, a transport failure is reported to the handler with no +reconnect/replay. + +### Scenario matrix + +#### Facade startup + +| Scenario | Config | Result | +| --- | --- | --- | +| Default `connect`, all servers down | default mins | build fails | +| Default `connect`, first endpoint down, second works | multi-addr | build can succeed; each prewarmed client walks endpoints | +| Write-only-ish startup while down | `query_pool_min=0` + sender async | build returns | +| Fully lazy startup | both mins `0` | build returns after validation only | +| Query first use after lazy startup while down | `query_pool_min=0` | first `submit()` throws | +| Sender first use after lazy startup while down | `sender_pool_min=0` | first sender creation follows ingest initial mode | + +#### Direct sender startup + +| Scenario | Config | Result | +| --- | --- | --- | +| server down, default mode | no `reconnect_*`, no async | one attempt; build throws | +| server down, reconnect duration set, no mode | `reconnect_max_duration_millis=...` | **synchronous** retry; build blocks ⚠ | +| server down, async | `initial_connect_retry=async` | build returns; I/O thread retries | +| server returns `401`/`403` | any mode | terminal auth failure; no endpoint continuation | +| server appears before async budget | async + budget | buffered frames sent and ACKed | +| server appears after async budget | async + exhausted | sender terminal; new sender/restart needed | + +#### Read-replica startup (one bad endpoint, another replica works) + +| Bad endpoint type | Continue to working replica? | Notes | +| --- | --- | --- | +| DNS failure | Yes | transport error | +| TCP refused/unreachable | Yes | transport error; black-hole waits for OS timeout | +| TLS handshake failure | Yes | transport error | +| HTTP upgrade timeout | Yes | after `auth_timeout_ms` | +| upgrades but no `SERVER_INFO` | Yes | after `serverInfoTimeoutMs` (builder only) | +| primary/standalone while `target=replica` | Yes | role mismatch | +| `421` role reject | Yes | try next | +| `401`/`403` | **No** | auth treated as cluster-wide terminal ⚠ | +| broken shared TLS/trust store | No | every endpoint fails | +| all endpoints down | No | `all QWP endpoints unreachable` | +| reachable but none match `target` | No | `QwpRoleMismatchException` | + +--- + +## Implementation appendix + +For maintainers. Primary source areas: + +- `io.questdb.client.QuestDB` / `QuestDBBuilder` +- `io.questdb.client.impl.SenderPool` / `QueryClientPool` / `PoolHousekeeper` +- `io.questdb.client.Sender.LineSenderBuilder` +- `io.questdb.client.cutlass.qwp.client.QwpWebSocketSender` +- `io.questdb.client.cutlass.qwp.client.QwpQueryClient` +- `io.questdb.client.cutlass.qwp.client.sf.cursor.CursorSendEngine` +- `io.questdb.client.cutlass.qwp.client.sf.cursor.CursorWebSocketSendLoop` +- `io.questdb.client.cutlass.qwp.client.QwpHostHealthTracker` +- `io.questdb.client.impl.ConfigSchema` (the single key registry) + +### `QuestDBBuilder.build()` steps + +1. Require both ingest and query configs. +2. Parse + validate both configs without connecting (runs even when mins are + `0`; malformed pool/ingest/query/TLS/auth/enum/range values fail here). +3. Resolve pool keys: explicit builder setters override conn-string keys; + conflicting pool values across the two conn strings fail. +4. Construct `SenderPool` and `QueryClientPool`. +5. Eagerly create `min` connections per pool. +6. Start the `PoolHousekeeper`. + +### Initial-connect mode resolution (`Sender.java`) + +```text +if initialConnectMode set explicitly -> use it (incl. OFF + tuned budget) +else if any reconnect_* set -> SYNC +else -> OFF +``` + +### Pooled SF startup recovery nuance + +- Live/prewarmed sender slots recover their own unacked data via their + `CursorSendEngine`. +- Non-live managed slots are scanned by the housekeeper startup recovery path, + so `build()` does not block on stranded slots. +- Recovery of non-live stranded slots is best-effort and bounded: a build/drain + failure aborts that scan; data stays durable for a later attempt, but the + current process does not retry the aborted scan indefinitely. +- For immediate background drain of all slots, keep enough `sender_pool_min` + slots warm or construct direct senders for the slots that must actively retry. + +### Reconnect deadline (`CursorWebSocketSendLoop`) + +`deadlineNanos = outageStartNanos + reconnect_max_duration_millis * 1e6`; the +loop runs `while (running && now < deadline)`. Hence `0` ⇒ no iterations ⇒ +immediate give-up. `QwpAuthFailedException` / `WebSocketUpgradeException` inside +the loop are terminal across all endpoints.