diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5ccfaa64..b1856df8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,10 +17,10 @@ defaults:
 jobs:
   # JDK 8 is the source of truth: the client ships as a Java 8 artifact
   # (io.questdb:questdb-client) and is released from JDK 8, so on JDK 8 it must
-  # compile, the full test suite must pass against the committed native
-  # libraries, and the javadoc jar must build (-P javadoc attaches it at the
-  # package phase). The committed native .so/.dylib/.dll are enough -- the only
-  # git submodule (zstd) is needed solely for C++ native rebuilds, not here.
+  # compile, the full test suite must pass, and the javadoc jar must build
+  # (-P javadoc attaches it at the package phase). The native libraries are no
+  # longer committed, so this job compiles libquestdb.so from source (hence the
+  # zstd submodule + cmake/nasm/build-essential toolchain) before the tests run.
   build-jdk8:
     name: Build, test & javadoc (JDK 8)
     runs-on: ubuntu-latest
@@ -28,6 +28,9 @@ jobs:
     steps:
       - name: Check out
         uses: actions/checkout@v4
+        with:
+          # zstd is required to compile the native library.
+          submodules: recursive
 
       - name: Set up JDK 8
         uses: actions/setup-java@v4
@@ -36,6 +39,23 @@ jobs:
           java-version: "8"
           cache: maven
 
+      - name: Install native build toolchain
+        run: sudo apt-get update && sudo apt-get install -y cmake nasm build-essential
+
+      - name: Build native libquestdb.so
+        # JAVA_HOME points at the JDK 8 above, so the lib is compiled against the
+        # Java 8 JNI headers -- the artifact's floor. Copy it into src resources
+        # (not target/) so it survives the `mvn clean` in the next step and gets
+        # packaged + loaded via the production bin/<platform> path.
+        run: |
+          cd core
+          cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S.
+          cmake --build cmake-build-release --config Release
+          test -f target/classes/io/questdb/client/bin-local/libquestdb.so
+          mkdir -p src/main/resources/io/questdb/client/bin/linux-x86-64
+          cp target/classes/io/questdb/client/bin-local/libquestdb.so \
+             src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so
+
       - name: Compile, test, and build javadoc
         run: mvn -B -ntp -P javadoc clean install
 
diff --git a/.github/workflows/rebuild_native_libs.yml b/.github/workflows/rebuild_native_libs.yml
index 026d3c3e..26b25dbc 100644
--- a/.github/workflows/rebuild_native_libs.yml
+++ b/.github/workflows/rebuild_native_libs.yml
@@ -68,57 +68,38 @@ jobs:
           key: nativelibs-osx-${{ github.sha }}
   build-all-linux-x86-64:
     runs-on: ubuntu-latest
-    # manylinux2014 is a container with new-ish compilers and tools, but old glibc - 2.17
-    # 2.17 is old enough to be compatible with most Linux distributions out there
+    # manylinux_2_28 (glibc 2.28) replaces the previous manylinux2014 (glibc
+    # 2.17) container: GitHub Actions now forces actions (checkout, cache) onto
+    # Node 24, whose binary requires glibc >= 2.27, so it can no longer run
+    # inside the glibc-2.17 image (the old Node-20-glibc-217 override hack only
+    # patched /__e/node20, not /__e/node24). 2.28 still runs stock Node 24 and
+    # matches the linux-aarch64 job, which already ships glibc-2.28 binaries.
+    #
+    # NOTE: the build container's glibc (2.28) does NOT dictate the artifact's
+    # runtime glibc floor. clock_gettime is pinned back to GLIBC_2.2.5 via
+    # src/main/c/share/glibc_compat.h so the linux-x86-64 .so keeps loading on
+    # glibc 2.14+ (its floor is memcpy@GLIBC_2.14), unchanged from before the
+    # container move. If you add a symbol with a higher version node here, the
+    # floor will rise -- check with: objdump -T libquestdb.so | grep GLIBC_.
     container:
-      image: quay.io/pypa/manylinux2014_x86_64
-      volumes:
-        - /node20217:/node20217
-        - /node20217:/__e/node20
+      image: quay.io/pypa/manylinux_2_28_x86_64
     steps:
-      - name: Install tools, most are needed to build nasm
-        run: |
-          ldd --version
-          yum update -y
-          yum install 'perl(Env)' perl-Font-TTF perl-Sort-Versions gcc wget perf asciidoc xmlto ghostscript adobe-source-sans-pro-fonts adobe-source-code-pro-fonts rpm-build zstd curl -y
-      - name: Build nasm
-        # we need nasm 2.14+ due to this bug https://bugzilla.nasm.us/show_bug.cgi?id=3392205
-        # manylinux2014 distribution includes nasm 2.10
-        # the nasm project itself provides RPMs, but they built against a newer glibc and other dependencies too
-        # thus we take src.rpm from nasm project and rebuild it in the manylinux2014 container
-        # this way we get a nasm binary that is compatible with the manylinux2014 environment
-        run: |
-          wget https://www.nasm.us/pub/nasm/releasebuilds/2.16.03/linux/nasm-2.16.03-0.fc39.src.rpm
-          rpmbuild --rebuild ./nasm-2.16.03-0.fc39.src.rpm
-          rpm -i ~/rpmbuild/RPMS/x86_64/nasm-2.16.03-0.el7.x86_64.rpm
-      - name: Install Node.js 20 glibc2.17
-        # A hack to override default nodejs 20 to a build compatible with older glibc.
-        # Inspired by https://github.com/pytorch/test-infra/pull/5959 If it's good for pytorch, it's good for us too! :)
-        # Q: Why do we need this hack at all? A: Because many github actions, include action/checkout@v4, depend on nodejs 20.
-        # GitHub Actions runner provides a build of nodejs 20 that requires a newer glibc than manylinux2014 has.
-        # Thus we download a build of nodejs 20 that is compatible with manylinux2014 and override the default one.
-        run: |
-          curl -LO https://unofficial-builds.nodejs.org/download/release/v20.9.0/node-v20.9.0-linux-x64-glibc-217.tar.xz
-          tar -xf node-v20.9.0-linux-x64-glibc-217.tar.xz --strip-components 1 -C /node20217
-          ldd /__e/node20/bin/node
       - uses: actions/checkout@v4
         with:
           submodules: true
-      - name: Install up-to-date CMake
+      - name: Install tooling
         run: |
-          wget -nv https://github.com/Kitware/CMake/releases/download/v3.29.2/cmake-3.29.2-linux-x86_64.tar.gz
-          tar -zxf cmake-3.29.2-linux-x86_64.tar.gz
-          echo "PATH=`pwd`/cmake-3.29.2-linux-x86_64/bin/:$PATH" >> "$GITHUB_ENV"
+          yum update -y
+          yum install wget nasm zstd -y
       - name: Install GraalVM JDK 25 (for jni.h)
         run: |
-          wget -nv -O graalvm.tar.gz https://download.oracle.com/graalvm/25/latest/graalvm-jdk-25_linux-x64_bin.tar.gz
+          wget -v --timeout=180 -O graalvm.tar.gz https://download.oracle.com/graalvm/25/latest/graalvm-jdk-25_linux-x64_bin.tar.gz
           mkdir graalvm
           tar xfz graalvm.tar.gz -C graalvm --strip-components=1
           echo "JAVA_HOME=`pwd`/graalvm" >> "$GITHUB_ENV"
       - name: Generate Makefiles
         run: |
           cd ./core
-          # git submodule update --init
           cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S.
       - name: Build linux-x86-64 CXX Library
         run: |
diff --git a/ci/build_native.yaml b/ci/build_native.yaml
new file mode 100644
index 00000000..a831e58d
--- /dev/null
+++ b/ci/build_native.yaml
@@ -0,0 +1,92 @@
+# Builds the native libquestdb shared library on the test runner itself.
+#
+# The Linux (.so) and Windows (.dll) binaries are no longer committed to the
+# repository -- they are produced and committed only by the release
+# "Build and Push Release CXX Libraries" GitHub Action. So the test CI has to
+# compile them locally before running the tests.
+#
+# All three platforms are built on their own native runner: Linux (.so),
+# Windows (.dll) and macOS (.dylib). None of these binaries are committed.
+#
+# CMake writes the artifact to:
+#   core/target/classes/io/questdb/client/bin-local/libquestdb.<ext>
+# which io.questdb.client.std.Os loads first (the "dev CXX lib" path), so the
+# client tests pick it up directly. We additionally copy it into
+#   core/src/main/resources/io/questdb/client/bin/<platform>/libquestdb.<ext>
+# so that `mvn install` packages it into the client jar exactly like the
+# committed binary used to be -- this is what the downstream QuestDB OSS server
+# tests load from the installed jar.
+#
+# JAVA_HOME (set to GraalVM JDK 25 by setup.yaml) provides jni.h / jni_md.h:
+#   - Linux:   $JAVA_HOME/include + $JAVA_HOME/include/linux
+#   - macOS:   $JAVA_HOME/include + $JAVA_HOME/include/darwin
+#   - Windows: %JAVA_HOME%\include + %JAVA_HOME%\include\win32
+steps:
+  - bash: |
+      set -eux
+      git submodule update --init --recursive core/src/main/c/share/zstd
+    displayName: "Init zstd submodule"
+
+  - bash: |
+      set -eux
+      sudo apt-get update
+      sudo apt-get install -y cmake nasm build-essential
+      cd core
+      cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S.
+      cmake --build cmake-build-release --config Release
+      lib="target/classes/io/questdb/client/bin-local/libquestdb.so"
+      test -f "$lib"
+      # Fail fast if the linker left an unresolved dependency in the .so.
+      if ldd "$lib" | grep -i "not found"; then
+        echo "libquestdb.so has unresolved dependencies"
+        exit 1
+      fi
+      mkdir -p src/main/resources/io/questdb/client/bin/linux-x86-64
+      cp "$lib" src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so
+    displayName: "Build native libquestdb.so (Linux x86-64)"
+    condition: eq(variables['Agent.OS'], 'Linux')
+
+  - bash: |
+      set -eux
+      command -v cmake >/dev/null 2>&1 || brew install cmake
+      command -v nasm  >/dev/null 2>&1 || brew install nasm
+      # darwin-aarch64 on Apple silicon agents, darwin-x86-64 on Intel agents.
+      case "$(uname -m)" in
+        arm64)  platform="darwin-aarch64" ;;
+        x86_64) platform="darwin-x86-64" ;;
+        *) echo "unsupported macOS arch: $(uname -m)"; exit 1 ;;
+      esac
+      cd core
+      # Pin the dylib's minimum macOS version so the artifact stays loadable on
+      # older macOS, matching the release build.
+      export MACOSX_DEPLOYMENT_TARGET=13.0
+      cmake -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S.
+      cmake --build cmake-build-release --config Release
+      lib="target/classes/io/questdb/client/bin-local/libquestdb.dylib"
+      test -f "$lib"
+      mkdir -p "src/main/resources/io/questdb/client/bin/${platform}"
+      cp "$lib" "src/main/resources/io/questdb/client/bin/${platform}/libquestdb.dylib"
+    displayName: "Build native libquestdb.dylib (macOS)"
+    condition: eq(variables['Agent.OS'], 'Darwin')
+
+  - powershell: |
+      $ErrorActionPreference = "Stop"
+      # The CMake build is GCC/MinGW based (gcc flags, -static-libgcc/-static-libstdc++),
+      # so build the Windows DLL with the MinGW-w64 toolchain + NASM, not MSVC.
+      choco install -y --no-progress nasm mingw
+      Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
+      refreshenv
+      # choco's nasm package does not put nasm on PATH; add it explicitly.
+      $env:PATH = "C:\Program Files\NASM;" + $env:PATH
+      gcc --version
+      mingw32-make --version
+      nasm --version
+      cd core
+      cmake -G "MinGW Makefiles" -DCMAKE_BUILD_TYPE=Release -B cmake-build-release -S .
+      cmake --build cmake-build-release --config Release
+      $lib = "target/classes/io/questdb/client/bin-local/libquestdb.dll"
+      if (!(Test-Path $lib)) { throw "native build produced no $lib" }
+      New-Item -ItemType Directory -Force -Path "src/main/resources/io/questdb/client/bin/windows-x86-64" | Out-Null
+      Copy-Item $lib "src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll" -Force
+    displayName: "Build native libquestdb.dll (Windows x86-64)"
+    condition: eq(variables['Agent.OS'], 'Windows_NT')
diff --git a/ci/run_tests_pipeline.yaml b/ci/run_tests_pipeline.yaml
index 3268313b..86d65410 100644
--- a/ci/run_tests_pipeline.yaml
+++ b/ci/run_tests_pipeline.yaml
@@ -54,10 +54,6 @@ stages:
               imageName: "macos-15-arm64"
               poolName: "Azure Pipelines"
               jdkArch: "arm64"
-            mac-x64:
-              imageName: "macos-15"
-              poolName: "Azure Pipelines"
-              jdkArch: "x64"
             windows-msvc-2022-x64:
               imageName: "windows-2022"
               poolName: "Azure Pipelines"
@@ -82,6 +78,13 @@ stages:
                 maven | "$(Agent.OS)"
               path: $(HOME)/.m2/repository
             displayName: "Cache Maven repository"
+          # Compile the native libquestdb shared library on the runner; no
+          # platform's binary is committed anymore. Must run before the client
+          # jar is installed so the freshly built lib is packaged into it. The
+          # template builds the right artifact for the current native agent --
+          # Linux (.so), Windows (.dll), and macOS (.dylib) alike (see
+          # build_native.yaml).
+          - template: build_native.yaml
           - bash: |
               BRANCH="${SYSTEM_PULLREQUEST_SOURCEBRANCH:-$BUILD_SOURCEBRANCHNAME}"
               BRANCH="${BRANCH#refs/heads/}"
@@ -149,6 +152,9 @@ stages:
                 maven | "$(Agent.OS)"
               path: $(HOME)/.m2/repository
             displayName: "Cache Maven repository"
+          # Native binaries are no longer committed; compile libquestdb.so on the
+          # runner so the coverage test run can load it (same as BuildAndTest).
+          - template: build_native.yaml
           - task: Maven@3
             displayName: "Run tests with coverage"
             inputs:
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 3538aa7f..29611089 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -48,6 +48,7 @@ set(
         src/main/c/share/files.h
         src/main/c/share/net.h
         src/main/c/share/os.h
+        src/main/c/share/glibc_compat.h
         src/main/c/share/ooo.cpp
         src/main/c/share/cpprt_overrides.h
         src/main/c/share/cpprt_overrides.cpp
diff --git a/core/src/main/c/share/glibc_compat.h b/core/src/main/c/share/glibc_compat.h
new file mode 100644
index 00000000..24ea6211
--- /dev/null
+++ b/core/src/main/c/share/glibc_compat.h
@@ -0,0 +1,53 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#ifndef QUESTDB_GLIBC_COMPAT_H
+#define QUESTDB_GLIBC_COMPAT_H
+
+// Pin clock_gettime() to its original GLIBC_2.2.5 symbol version.
+//
+// glibc 2.17 moved clock_gettime() out of librt and into libc, exporting it
+// under a NEW version node: clock_gettime@GLIBC_2.17. The release binaries are
+// built in a modern toolchain container (CI uses manylinux_2_28 / glibc 2.28),
+// so without this pin the linker binds our calls to clock_gettime@GLIBC_2.17.
+// That single symbol raises the whole library's glibc floor to 2.17 and makes
+// it fail to LOAD on hosts running glibc 2.14-2.16 with:
+//
+//     version `GLIBC_2.17' not found (required by libquestdb.so)
+//
+// The original clock_gettime@GLIBC_2.2.5 symbol is still exported as a compat
+// symbol by librt.so.1 on every glibc since (and by libc after the 2.34 librt
+// merge), so forcing the reference back to it keeps the library loadable down
+// to the previous floor (glibc 2.14, set by memcpy@GLIBC_2.14) with no change
+// in runtime behaviour. librt is already a NEEDED dependency (CMake links rt).
+//
+// Scope: x86-64 glibc only. aarch64 glibc started at 2.17 and has only ever
+// shipped clock_gettime in libc@GLIBC_2.17 -- there is no GLIBC_2.2.5 version
+// there, so emitting the pin on aarch64 would fail the link with an undefined
+// clock_gettime@GLIBC_2.2.5. The directive is a no-op on macOS/Windows.
+#if defined(__linux__) && defined(__GLIBC__) && defined(__x86_64__)
+__asm__(".symver clock_gettime,clock_gettime@GLIBC_2.2.5");
+#endif
+
+#endif // QUESTDB_GLIBC_COMPAT_H
diff --git a/core/src/main/c/share/net.c b/core/src/main/c/share/net.c
index 05660f2b..3b0162fc 100644
--- a/core/src/main/c/share/net.c
+++ b/core/src/main/c/share/net.c
@@ -33,6 +33,9 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
+#include <poll.h>
+#include <time.h>
+#include "glibc_compat.h"
 #include "net.h"
 #include <netdb.h>
 #include "sysutil.h"
@@ -298,6 +301,100 @@ JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfo
     return handleEintrInConnect(fd, result);
 }
 
+// Waits up to timeout_millis for an in-progress non-blocking connect on fd to
+// finish. Returns 0 on success, -1 on connection failure (errno set so the
+// caller can read it via Os.errno()), or com_questdb_network_Net_ECONNTIMEOUT
+// on timeout.
+static jint awaitConnectComplete(int fd, jint timeout_millis) {
+    // Fix a single absolute deadline up front. Recomputing the remaining budget
+    // against a moving baseline on each EINTR (reset start = now, then subtract
+    // whole milliseconds) lets a high-frequency signal storm extend the timeout:
+    // under sub-millisecond interrupts every interval truncates to 0 ms, the
+    // budget never decrements, and poll is re-armed with the full budget each
+    // time. A fixed deadline is immune to interrupt frequency -- the remaining
+    // time can only ever decrease.
+    struct timespec deadline;
+    clock_gettime(CLOCK_MONOTONIC, &deadline);
+    long budget_millis = timeout_millis > 0 ? timeout_millis : 0;
+    deadline.tv_sec += budget_millis / 1000L;
+    deadline.tv_nsec += (budget_millis % 1000L) * 1000000L;
+    if (deadline.tv_nsec >= 1000000000L) {
+        deadline.tv_sec += 1;
+        deadline.tv_nsec -= 1000000000L;
+    }
+
+    for (;;) {
+        struct timespec now;
+        clock_gettime(CLOCK_MONOTONIC, &now);
+        // Remaining time until the deadline, truncated to whole milliseconds for
+        // poll(). Truncation only ever under-shoots by < 1 ms (it never extends
+        // the wait), which keeps the timeout a strict upper bound.
+        long remaining_millis = (deadline.tv_sec - now.tv_sec) * 1000L
+                                + (deadline.tv_nsec - now.tv_nsec) / 1000000L;
+        if (remaining_millis <= 0) {
+            errno = ETIMEDOUT;
+            return com_questdb_network_Net_ECONNTIMEOUT;
+        }
+
+        struct pollfd pfd;
+        pfd.fd = fd;
+        pfd.events = POLLOUT;
+        pfd.revents = 0;
+
+        int rc = poll(&pfd, 1, (int) remaining_millis);
+        if (rc > 0) {
+            // The connect attempt has finished one way or another; the only
+            // authoritative result is SO_ERROR (POLLOUT alone does not mean
+            // success -- a refused connection is also reported as writable).
+            int so_error = 0;
+            socklen_t len = sizeof(so_error);
+            if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &so_error, &len) < 0) {
+                return -1;
+            }
+            if (so_error != 0) {
+                errno = so_error;
+                return -1;
+            }
+            return 0;
+        }
+        if (rc == 0) {
+            errno = ETIMEDOUT;
+            return com_questdb_network_Net_ECONNTIMEOUT;
+        }
+        if (errno != EINTR) {
+            return -1;
+        }
+        // Interrupted by a signal: loop and recompute the remaining time against
+        // the fixed deadline. EINTR storms cannot extend the timeout.
+    }
+}
+
+JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfoTimeout
+        (JNIEnv *e, jclass cl, jint fd, jlong lpAddrInfo, jint timeoutMillis) {
+    struct addrinfo *addr = (struct addrinfo *) lpAddrInfo;
+
+    // Switch to non-blocking BEFORE connect so connect() returns immediately
+    // with EINPROGRESS instead of blocking on the OS connect timeout. The
+    // socket is left non-blocking on success, matching the post-connect
+    // configureNonBlocking() the callers already perform.
+    int flags = fcntl((int) fd, F_GETFL, 0);
+    if (flags < 0) {
+        return -1;
+    }
+    if (fcntl((int) fd, F_SETFL, flags | O_NONBLOCK) < 0) {
+        return -1;
+    }
+
+    int result = connect((int) fd, addr->ai_addr, (int) addr->ai_addrlen);
+    if (result == 0) {
+        return 0; // connected immediately (e.g. loopback)
+    }
+    if (errno == EINPROGRESS || errno == EINTR || errno == EWOULDBLOCK) {
+        return awaitConnectComplete((int) fd, timeoutMillis);
+    }
+    return -1; // immediate failure, errno set
+}
+
 JNIEXPORT void JNICALL Java_io_questdb_client_network_Net_freeAddrInfo0
         (JNIEnv *e, jclass cl, jlong address) {
     if (address != 0) {
diff --git a/core/src/main/c/share/net.h b/core/src/main/c/share/net.h
index 13adafcb..27143639 100644
--- a/core/src/main/c/share/net.h
+++ b/core/src/main/c/share/net.h
@@ -13,6 +13,8 @@ extern "C" {
 #define com_questdb_network_Net_EPEERDISCONNECT -1L
 #undef com_questdb_network_Net_EOTHERDISCONNECT
 #define com_questdb_network_Net_EOTHERDISCONNECT -2L
+#undef com_questdb_network_Net_ECONNTIMEOUT
+#define com_questdb_network_Net_ECONNTIMEOUT -3L
 
 /*
  * Class:     io_questdb_client_network_Net
diff --git a/core/src/main/c/share/os.c b/core/src/main/c/share/os.c
index 7262e3f4..ee0b1f69 100644
--- a/core/src/main/c/share/os.c
+++ b/core/src/main/c/share/os.c
@@ -30,6 +30,7 @@
 #include <string.h>
 #include <sys/time.h>
 #include <time.h>
+#include "glibc_compat.h"
 #include "../share/os.h"
 
 #ifdef __APPLE__
diff --git a/core/src/main/c/windows/net.c b/core/src/main/c/windows/net.c
index c32957d4..fd290629 100644
--- a/core/src/main/c/windows/net.c
+++ b/core/src/main/c/windows/net.c
@@ -160,6 +160,66 @@ JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfo
     return res;
 }
 
+JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_connectAddrInfoTimeout
+        (JNIEnv *e, jclass cl, jint fd, jlong lpAddrInfo, jint timeoutMillis) {
+    struct addrinfo *addr = (struct addrinfo *) lpAddrInfo;
+    SOCKET s = (SOCKET) fd;
+
+    // Switch to non-blocking BEFORE connect so it returns immediately with
+    // WSAEWOULDBLOCK instead of blocking on the OS connect timeout.
+    u_long mode = 1;
+    if (ioctlsocket(s, FIONBIO, &mode) != 0) {
+        SaveLastError();
+        return -1;
+    }
+
+    int res = connect(s, addr->ai_addr, (int) addr->ai_addrlen);
+    if (res == 0) {
+        return 0; // connected immediately (e.g. loopback)
+    }
+    if (WSAGetLastError() != WSAEWOULDBLOCK) {
+        SaveLastError();
+        return -1;
+    }
+
+    fd_set writefds, exceptfds;
+    FD_ZERO(&writefds);
+    FD_ZERO(&exceptfds);
+    FD_SET(s, &writefds);
+    FD_SET(s, &exceptfds);
+
+    struct timeval tv;
+    tv.tv_sec = timeoutMillis / 1000;
+    tv.tv_usec = (timeoutMillis % 1000) * 1000;
+
+    // Winsock signals a failed non-blocking connect via the exception set.
+    int sel = select(0, NULL, &writefds, &exceptfds, &tv);
+    if (sel == 0) {
+        WSASetLastError(WSAETIMEDOUT);
+        SaveLastError();
+        return com_questdb_network_Net_ECONNTIMEOUT;
+    }
+    if (sel == SOCKET_ERROR) {
+        SaveLastError();
+        return -1;
+    }
+
+    int so_error = 0;
+    int len = sizeof(so_error);
+    if (FD_ISSET(s, &exceptfds) || !FD_ISSET(s, &writefds)) {
+        getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &so_error, &len);
+        WSASetLastError(so_error != 0 ? so_error : WSAECONNREFUSED);
+        SaveLastError();
+        return -1;
+    }
+    if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &so_error, &len) == 0 && so_error != 0) {
+        WSASetLastError(so_error);
+        SaveLastError();
+        return -1;
+    }
+    return 0;
+}
+
 JNIEXPORT jint JNICALL Java_io_questdb_client_network_Net_configureNonBlocking
         (JNIEnv *e, jclass cl, jint fd) {
     u_long mode = 1;
diff --git a/core/src/main/java/io/questdb/client/Completion.java b/core/src/main/java/io/questdb/client/Completion.java
index 0888370d..615799e0 100644
--- a/core/src/main/java/io/questdb/client/Completion.java
+++ b/core/src/main/java/io/questdb/client/Completion.java
@@ -36,15 +36,22 @@
  * {@link #await(long, TimeUnit)} returning {@code true}, or an explicit
  * {@link #cancel()} that races to terminal).
  * <p>
- * Signaling: the Completion is signaled from the I/O thread of the pooled
- * query client when the handler's terminal callback ({@code onEnd},
- * {@code onError}, or {@code onExecDone}) returns.
+ * Signaling: the Completion is signaled on the worker (dispatch) thread of the
+ * pooled query client when the handler's terminal callback ({@code onEnd},
+ * {@code onError}, or {@code onExecDone}) returns -- that callback runs inline
+ * on the worker thread, not on the I/O thread. Because of this, {@code await()}
+ * must never be called from inside a handler (it would self-deadlock on the
+ * worker thread); use {@link #cancel()} to stop a query from inside a handler.
  */
 public interface Completion {
 
     /**
      * Blocks until the query completes. Rethrows any server-reported failure
      * as a {@link QueryException}. Returns normally on success.
+     * <p>
+     * Must NOT be called from a result handler (it runs on the worker thread
+     * and would self-deadlock); calling it there throws
+     * {@link IllegalStateException}. Use {@link #cancel()} instead.
      *
      * @throws QueryException       if the server reported an error or
      *                              {@link #cancel()} won the race
diff --git a/core/src/main/java/io/questdb/client/HttpClientConfiguration.java b/core/src/main/java/io/questdb/client/HttpClientConfiguration.java
index c644f698..587b8111 100644
--- a/core/src/main/java/io/questdb/client/HttpClientConfiguration.java
+++ b/core/src/main/java/io/questdb/client/HttpClientConfiguration.java
@@ -38,6 +38,15 @@ default boolean fixBrokenConnection() {
         return true;
     }
 
+    /**
+     * Upper bound, in milliseconds, on establishing the TCP connection. When
+     * {@code <= 0} (the default) no application-level connect timeout is applied
+     * and the connect falls back to the OS-level TCP connect timeout.
+     */
+    default int getConnectTimeout() {
+        return 0;
+    }
+
     default EpollFacade getEpollFacade() {
         return EpollFacadeImpl.INSTANCE;
     }
diff --git a/core/src/main/java/io/questdb/client/Query.java b/core/src/main/java/io/questdb/client/Query.java
index f6832e84..c2a752f7 100644
--- a/core/src/main/java/io/questdb/client/Query.java
+++ b/core/src/main/java/io/questdb/client/Query.java
@@ -27,19 +27,29 @@
 import io.questdb.client.cutlass.qwp.client.QwpBindSetter;
 import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler;
 
+import java.io.Closeable;
+
 /**
- * Per-thread, reusable builder for one query. Obtained from
- * {@link QuestDB#query()}: every call on the same thread returns the same
- * instance, reset to empty.
+ * A query handle leased from the {@link QuestDB} pool via
+ * {@link QuestDB#borrowQuery()}. The handle holds one pooled query client (one
+ * WebSocket + I/O thread) for the lifetime of the borrow; the caller MUST
+ * {@link #close()} it to release the client back to the pool (typically via
+ * try-with-resources).
+ * <p>
+ * Allocation: the per-submit path is allocation-free -- the heavy query state
+ * is pre-allocated on the leased pool slot and reused, and {@link #submit()}
+ * returns this same handle as its {@link Completion}. {@code borrowQuery()}
+ * creates one small lease handle per borrow (often scalar-replaced by the JIT
+ * when used with try-with-resources).
  * <p>
  * Lifecycle: configure with {@link #sql}, optional {@link #binds}, and
- * {@link #handler}, then call {@link #submit()} to obtain a {@link Completion}.
- * After the Completion terminates, the next {@code QuestDB.query()} call on
- * the same thread returns this same instance with its state reset.
+ * {@link #handler}, then call {@link #submit()} to obtain a {@link Completion}
+ * and {@code await()} it before the next {@link #submit()}.
  * <p>
- * Thread safety: not thread-safe. One in-flight query per thread.
+ * Thread safety: not thread-safe and single-flight -- one in-flight query per
+ * handle. To run queries concurrently, borrow one handle per concurrent query.
  */
-public interface Query {
+public interface Query extends Closeable {
 
     /** Discards the current configuration without submitting. */
     void abandon();
@@ -53,9 +63,39 @@ public interface Query {
     Query binds(QwpBindSetter binds);
 
     /**
-     * Sets the result-batch handler. The handler is invoked on the pooled
-     * query client's I/O thread; if it touches caller state, it is
-     * responsible for its own synchronization.
+     * Releases the leased pooled query client back to the pool. The caller
+     * MUST call this (typically via try-with-resources). A real disconnect only
+     * happens at {@link QuestDB#close()}. Idempotent.
+     * <p>
+     * If a submit is still in flight (the caller never awaited, or its
+     * {@code await(timeout)} expired), {@code close()} cancels it and waits for
+     * the terminal event so the client is idle before it returns to the pool.
+     * That wait is bounded by {@code query_close_timeout_ms} (default 5000ms,
+     * see {@link QuestDBBuilder#queryCloseTimeoutMillis(long)}) and is
+     * interruptible -- interrupting the calling thread aborts it. If the query
+     * does not drain within the budget, the client is discarded rather than
+     * returned (its connection may carry late frames for the abandoned query),
+     * and the pool grows a fresh one on the next borrow. {@code close()}
+     * therefore never blocks the caller unbounded, even when the server is slow
+     * to honor the cancel.
+     * <p>
+     * Must NOT be called from a result handler: handlers run on the worker
+     * thread, so {@code close()} would block waiting for a terminal event that
+     * only that thread can deliver. Calling it there throws
+     * {@link IllegalStateException}. Use {@link #cancel()} (non-blocking) to
+     * stop a query from inside a handler.
+     */
+    @Override
+    void close();
+
+    /**
+     * Sets the result-batch handler. The handler is invoked on the worker
+     * (dispatch) thread that drives {@code execute()} -- it consumes the pooled
+     * query client's I/O-thread event queue inline, it does NOT run on the I/O
+     * thread. If it touches caller state, it is responsible for its own
+     * synchronization. A handler must not call the blocking {@link #close()} or
+     * {@link Completion#await()} (they would self-deadlock on the worker
+     * thread); use {@link #cancel()} to stop from inside a handler.
      */
     Query handler(QwpColumnBatchHandler handler);
 
@@ -65,11 +105,12 @@ public interface Query {
     Query sql(CharSequence sql);
 
     /**
-     * Submits the query for execution. Returns the {@link Completion} field
-     * cached on this instance; never allocates. Blocks up to the builder's
-     * configured acquire timeout if the query pool is exhausted.
+     * Submits the query for execution on the leased client. Returns this handle
+     * as its own {@link Completion}; never allocates. The handle is
+     * single-flight: {@code await()} the returned Completion before the next
+     * {@code submit()}.
      *
-     * @return the single-flight Completion bound to this Query instance
+     * @return the single-flight Completion bound to this Query handle
      */
     Completion submit();
 }
diff --git a/core/src/main/java/io/questdb/client/QuestDB.java b/core/src/main/java/io/questdb/client/QuestDB.java
index a608e12f..ee93afcf 100644
--- a/core/src/main/java/io/questdb/client/QuestDB.java
+++ b/core/src/main/java/io/questdb/client/QuestDB.java
@@ -24,8 +24,6 @@
 
 package io.questdb.client;
 
-import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler;
-
 import java.io.Closeable;
 
 /**
@@ -34,37 +32,42 @@
  * share across threads.
  * <p>
  * Steady-state allocation is zero: pooled instances are pre-allocated and
- * reused, the per-thread {@link Query} handle is cached in a {@code ThreadLocal},
- * and the {@link Completion} associated with each query is a field on that
- * cached handle.
+ * reused, each borrowed {@link Query} handle is a pre-allocated front bound to
+ * its pool slot, and the {@link Completion} associated with each query is a
+ * field on that handle.
  * <p>
- * Configuration: use {@link #connect(CharSequence)} when the same address list
- * and credentials serve both ingest and egress -- the most common case.
- * Use {@link #connect(CharSequence, CharSequence)} or {@link #builder()} when
- * ingest and egress endpoints differ.
+ * Configuration: one {@code ws}/{@code wss} string describes the whole cluster
+ * (a single {@code addr} server list) and both the ingest and query pools
+ * connect across it. Use {@link #connect(CharSequence)} for the common case, or
+ * {@link #builder()} for pool sizing and the ingest callbacks. To tolerate the
+ * server being down at startup, set {@code lazy_connect=true} in the config
+ * (async ingest + lazy reads; reads stay enabled and connect once the server
+ * is up).
  * <p>
  * Thread safety: instances are safe to share. {@link #borrowSender()} and
- * {@link #query()} may be called concurrently from any thread; the pool
+ * {@link #borrowQuery()} may be called concurrently from any thread; the pool
  * guarantees mutual exclusion of pooled resources.
  */
 public interface QuestDB extends Closeable {
 
     /**
      * Builder for advanced configuration (pool sizes, acquisition timeouts,
-     * differing ingest/egress configs).
+     * ingest callbacks).
      */
     static QuestDBBuilder builder() {
         return new QuestDBBuilder();
     }
 
     /**
-     * Connects with a single configuration string used for both ingest and
-     * egress. The schema must be {@code ws} or {@code wss}: QuestDB ingests and
-     * queries over QWP (the QuestDB WebSocket protocol), so one string
-     * configures both clients.
+     * Connects with a single configuration string for the whole QuestDB cluster,
+     * used for both ingest and egress. The schema must be {@code ws} or
+     * {@code wss}: QuestDB ingests and queries over QWP (the QuestDB WebSocket
+     * protocol), so one string configures both clients. List every cluster node
+     * in a single {@code addr} server list and both pools connect across it.
      * <p>
-     * Use {@link #connect(CharSequence, CharSequence)} or {@link #builder()}
-     * when ingest and egress use different addresses or credentials.
+     * Use {@link #builder()} for pool sizing and the ingest callbacks. To
+     * tolerate the server being down at startup, set {@code lazy_connect=true}
+     * in the config (async ingest + lazy reads, reads still enabled).
      *
      * @param configurationString a {@code ws}/{@code wss} config string (see
      *                            {@link Sender#fromConfig} or
@@ -76,20 +79,29 @@ static QuestDB connect(CharSequence configurationString) {
     }
 
     /**
-     * Connects with explicit ingest and egress configuration strings.
+     * Borrows a {@link Query} handle from the pool. The caller MUST call
+     * {@link Query#close()} on the returned instance to release it back to the
+     * pool (typically via try-with-resources). The handle leases one pooled
+     * query client (one WebSocket + I/O thread) for the borrow's lifetime;
+     * submit one or more queries on it, then close it.
+     * <p>
+     * Allocation: zero at steady state -- the returned instance is a
+     * pre-allocated handle bound to the leased pool slot.
+     * <p>
+     * Blocking: blocks up to the builder's
+     * {@link QuestDBBuilder#acquireTimeoutMillis(long) acquire timeout} when
+     * the pool is exhausted; throws on timeout.
+     * <p>
+     * Concurrency: a single handle is single-flight. To run queries
+     * concurrently, borrow one handle per concurrent query (up to
+     * {@code query_pool_max}).
      *
-     * @param ingestConfigurationString config for the {@link Sender} pool
-     *                                  ({@link Sender#fromConfig} format)
-     * @param queryConfigurationString  config for the query pool
-     *                                  ({@link io.questdb.client.cutlass.qwp.client.QwpQueryClient#fromConfig} format)
-     * @return a connected QuestDB handle
+     * @return a Query handle leased from the pool; release with
+     * {@link Query#close()}
+     * @throws QueryException if the pool is exhausted beyond the acquire
+     *                        timeout, or if this handle is closed
      */
-    static QuestDB connect(CharSequence ingestConfigurationString, CharSequence queryConfigurationString) {
-        return builder()
-                .ingestConfig(ingestConfigurationString)
-                .queryConfig(queryConfigurationString)
-                .build();
-    }
+    Query borrowQuery();
 
     /**
      * Borrows a {@link Sender} from the pool. The caller MUST call
@@ -125,61 +137,4 @@ static QuestDB connect(CharSequence ingestConfigurationString, CharSequence quer
      */
     @Override
     void close();
-
-    /**
-     * One-shot convenience for queries with no bind parameters. Equivalent to
-     * {@code query().sql(sql).handler(handler).submit()}. Returns the same
-     * thread-local {@link Completion} instance that {@link #query()} would,
-     * so this method is also zero-allocation at steady state.
-     *
-     * @param sql     the SQL text; the buffer is not retained after submit
-     * @param handler the result-batch handler; invoked on the pooled query
-     *                client's I/O thread
-     * @return a single-flight handle for the in-flight query
-     */
-    Completion executeSql(CharSequence sql, QwpColumnBatchHandler handler);
-
-    /**
-     * Allocates a fresh {@link Query} handle. Unlike {@link #query()}, this
-     * does NOT return the per-thread cached instance; every call allocates.
-     * <p>
-     * Use this when one thread needs to hold multiple in-flight queries
-     * concurrently (each {@code submit()} acquires its own worker from the
-     * query pool, so up to {@code queryPoolSize} concurrent queries on a
-     * single thread is fine). For the common case of one query at a time,
-     * prefer {@link #query()} -- it is allocation-free.
-     */
-    Query newQuery();
-
-    /**
-     * Opens a query builder for the calling thread. Returns the same
-     * thread-local instance on every call: callers do not need to cache it
-     * themselves. The returned {@code Query} is in a reset state and is not
-     * thread-safe -- one in-flight query per thread.
-     * <p>
-     * For multiple concurrent in-flight queries from a single thread, use
-     * {@link #newQuery()} instead.
-     */
-    Query query();
-
-    /**
-     * Releases the thread-affine {@link Sender} (if any) currently attached
-     * to the calling thread back to the pool. Call this on threads borrowed
-     * from pools you do not own (for example, Netty event loops) before they
-     * are recycled, to avoid pinning a {@link Sender} for the lifetime of
-     * a thread that no longer needs it.
-     */
-    void releaseSender();
-
-    /**
-     * Returns a {@link Sender} pinned to the calling thread. First call on
-     * a thread takes one from the pool and pins it; subsequent calls on the
-     * same thread return the same instance. The pin is released by
-     * {@link #releaseSender()} or by {@link #close()} on this handle.
-     * <p>
-     * Use this for long-lived, dedicated producer threads where borrow/return
-     * overhead would dominate. For short-lived or event-loop callers, prefer
-     * {@link #borrowSender()}.
-     */
-    Sender sender();
 }
diff --git a/core/src/main/java/io/questdb/client/QuestDBBuilder.java b/core/src/main/java/io/questdb/client/QuestDBBuilder.java
index cae00942..71f78c0a 100644
--- a/core/src/main/java/io/questdb/client/QuestDBBuilder.java
+++ b/core/src/main/java/io/questdb/client/QuestDBBuilder.java
@@ -35,14 +35,20 @@
 
 /**
  * Builder for {@link QuestDB}. Most callers use {@link QuestDB#connect(CharSequence)};
- * this builder is for pool sizing, idle/lifetime knobs, acquire timeout,
- * and the case where ingest and egress configs differ.
+ * this builder adds pool sizing, idle/lifetime knobs, the acquire timeout, and
+ * the ingest callbacks.
  * <p>
- * Both configs must use the {@code ws} or {@code wss} schema (QWP over
- * WebSocket). A pool key (e.g. {@code sender_pool_min}) may be carried in the
- * connect string or set with an explicit builder call; an explicit call always
- * wins. When both connect strings carry the same pool key with different values,
- * {@link #build()} fails.
+ * To tolerate the server being down at startup, set {@code lazy_connect=true}
+ * in the config: the ingest side connects asynchronously (writes buffer until
+ * the wire is up) and the read pool connects lazily on first use. Reads stay
+ * fully enabled -- they just connect once the server is available.
+ * <p>
+ * One configuration string describes the whole QuestDB cluster (see
+ * {@link #fromConfig}): list every node in a single {@code addr} server list and
+ * both the ingest and query pools connect across it. The schema must be
+ * {@code ws} or {@code wss} (QWP over WebSocket). A pool key (e.g.
+ * {@code sender_pool_min}) may be carried in the connect string or set with an
+ * explicit builder call; an explicit call always wins.
  */
 public final class QuestDBBuilder {
 
@@ -52,6 +58,7 @@ public final class QuestDBBuilder {
     static final long DEFAULT_MAX_LIFETIME_MILLIS = 30 * 60_000L;
     static final int DEFAULT_POOL_MAX = 4;
     static final int DEFAULT_POOL_MIN = 1;
+    static final long DEFAULT_QUERY_CLOSE_TIMEOUT_MILLIS = 5_000;
 
     // Every valid pool value is >= 0, so -1 unambiguously marks "not set
     // explicitly". The public pool setters are the only writers of these
@@ -59,11 +66,15 @@ public final class QuestDBBuilder {
     private static final int UNSET = -1;
 
     private long acquireTimeoutMillis = UNSET;
+    // Optional ingest-side async callbacks. Null -> each pooled Sender uses its
+    // loud-not-silent default. Applied to every Sender the pool builds.
+    private SenderConnectionListener connectionListener;
+    private SenderErrorHandler errorHandler;
     private long housekeeperIntervalMillis = UNSET;
+    private String config;
     private long idleTimeoutMillis = UNSET;
-    private String ingestConfig;
     private long maxLifetimeMillis = UNSET;
-    private String queryConfig;
+    private long queryCloseTimeoutMillis = UNSET;
     private int queryPoolMax = UNSET;
     private int queryPoolMin = UNSET;
     private int senderPoolMax = UNSET;
@@ -85,6 +96,54 @@ public QuestDBBuilder acquireTimeoutMillis(long millis) {
         return this;
     }
 
+    /**
+     * Maximum time {@link Query#close()} waits for an in-flight query to drain
+     * (after issuing a cancel) before discarding the leased query client and
+     * letting the pool grow a fresh one. Bounds the close of a handle whose
+     * {@code submit()} is still running -- e.g. when the caller's own
+     * {@code await(timeout)} expired and they gave up. Defaults to 5000ms.
+     */
+    public QuestDBBuilder queryCloseTimeoutMillis(long millis) {
+        if (millis < 0) {
+            throw new IllegalArgumentException("queryCloseTimeoutMillis must be >= 0");
+        }
+        this.queryCloseTimeoutMillis = millis;
+        return this;
+    }
+
+    /**
+     * Sets the async connection-event listener applied to every pooled ingest
+     * {@link Sender}. The listener observes connect / disconnect / failover
+     * transitions across the whole sender pool; events are delivered on the
+     * senders' I/O threads, so the listener must be thread-safe and must not
+     * block. Pass {@code null} (the default) to keep each sender's
+     * loud-not-silent default listener.
+     *
+     * @param listener the shared connection listener, or {@code null} for the default
+     * @return this instance for method chaining
+     */
+    public QuestDBBuilder connectionListener(SenderConnectionListener listener) {
+        this.connectionListener = listener;
+        return this;
+    }
+
+    /**
+     * Sets the async error handler applied to every pooled ingest
+     * {@link Sender}. The handler receives terminal/async ingest errors
+     * (connect-budget exhaustion, terminal upgrade failures, write errors)
+     * from across the whole sender pool; notifications are delivered on the
+     * senders' I/O threads, so the handler must be thread-safe and must not
+     * block. Pass {@code null} (the default) to keep each sender's
+     * loud-not-silent default handler.
+     *
+     * @param handler the shared error handler, or {@code null} for the default
+     * @return this instance for method chaining
+     */
+    public QuestDBBuilder errorHandler(SenderErrorHandler handler) {
+        this.errorHandler = handler;
+        return this;
+    }
+
     /**
      * Builds the {@link QuestDB} handle. Validates both connect strings up
      * front -- so a malformed config fails here even when both pools have
@@ -101,39 +160,45 @@ public QuestDBBuilder acquireTimeoutMillis(long millis) {
      * and is delivered once the server acks; until then it stays preserved.
      */
     public QuestDB build() {
-        if (ingestConfig == null) {
-            throw new IllegalStateException("ingest configuration is required; call fromConfig() or ingestConfig()");
+        if (config == null) {
+            throw new IllegalStateException("configuration is required; call fromConfig()");
         }
-        if (queryConfig == null) {
-            throw new IllegalStateException("query configuration is required; call fromConfig() or queryConfig()");
+        ConfigString cs = ConfigString.parse(config);
+        ConfigView view = new ConfigView(cs);
+        // Validate the single cluster config exactly as both pools will, but
+        // without connecting: the full Sender parse plus validateParameters
+        // (ingress value keys are registry-STRING, so only the real parse
+        // validates their values), then the typed egress validateConfig. Each
+        // side applies the keys it owns and silently ignores the rest, so one
+        // string drives both. A malformed config therefore fails here even when
+        // a pool min is 0 and nothing connects.
+        Sender.LineSenderBuilder.validateWsConfigString(config);
+        QwpQueryClient.validateConfig(view, "wss".equals(cs.schema()));
+
+        // lazy_connect: tolerate a down server at startup without disabling
+        // reads. The ingest side connects asynchronously (writes buffer until the
+        // wire is up) and the read pool defaults to min=0 -- it connects lazily
+        // on the first query once the server is up. Reads stay enabled.
+        boolean lazyConnect = view.getBool("lazy_connect", false);
+        String ingestConfig = config;
+        if (lazyConnect) {
+            ingestConfig = resolveLazyConnect(view);
         }
-        ConfigString ingestCs = ConfigString.parse(ingestConfig);
-        ConfigString queryCs = ConfigString.parse(queryConfig);
-        ConfigView ingestView = new ConfigView(ingestCs);
-        ConfigView queryView = new ConfigView(queryCs);
-        // Validate both connect strings exactly as the pools will, but without
-        // connecting. The ingest string runs the full Sender parse plus
-        // validateParameters -- ingress value keys are registry-STRING, so only
-        // the real parse validates their values. The egress string runs the
-        // typed validateConfig. A malformed config therefore fails here even
-        // when a pool min is 0 and nothing connects.
-        Sender.LineSenderBuilder.validateWsConfigString(ingestConfig);
-        QwpQueryClient.validateConfig(queryView, "wss".equals(queryCs.schema()));
-
-        // A view carries no side; getInt/getLong read any key, so the ingest
-        // and query views also serve the POOL reads.
-        resolvePoolInt(senderPoolMin, "sender_pool_min", ingestView, queryView, DEFAULT_POOL_MIN, this::senderPoolMin);
-        resolvePoolInt(senderPoolMax, "sender_pool_max", ingestView, queryView, DEFAULT_POOL_MAX, this::senderPoolMax);
-        resolvePoolInt(queryPoolMin, "query_pool_min", ingestView, queryView, DEFAULT_POOL_MIN, this::queryPoolMin);
-        resolvePoolInt(queryPoolMax, "query_pool_max", ingestView, queryView, DEFAULT_POOL_MAX, this::queryPoolMax);
-        resolvePoolLong(acquireTimeoutMillis, "acquire_timeout_ms", ingestView, queryView, DEFAULT_ACQUIRE_TIMEOUT_MILLIS, this::acquireTimeoutMillis);
-        resolvePoolLong(idleTimeoutMillis, "idle_timeout_ms", ingestView, queryView, DEFAULT_IDLE_TIMEOUT_MILLIS, this::idleTimeoutMillis);
-        resolvePoolLong(maxLifetimeMillis, "max_lifetime_ms", ingestView, queryView, DEFAULT_MAX_LIFETIME_MILLIS, this::maxLifetimeMillis);
-        resolvePoolLong(housekeeperIntervalMillis, "housekeeper_interval_ms", ingestView, queryView, DEFAULT_HOUSEKEEPER_INTERVAL_MILLIS, this::housekeeperIntervalMillis);
+
+        resolvePoolInt(senderPoolMin, "sender_pool_min", view, DEFAULT_POOL_MIN, this::senderPoolMin);
+        resolvePoolInt(senderPoolMax, "sender_pool_max", view, DEFAULT_POOL_MAX, this::senderPoolMax);
+        // lazy_connect makes the read pool lazy (min=0); without it the default min is 1.
+        resolvePoolInt(queryPoolMin, "query_pool_min", view, lazyConnect ? 0 : DEFAULT_POOL_MIN, this::queryPoolMin);
+        resolvePoolInt(queryPoolMax, "query_pool_max", view, DEFAULT_POOL_MAX, this::queryPoolMax);
+        resolvePoolLong(acquireTimeoutMillis, "acquire_timeout_ms", view, DEFAULT_ACQUIRE_TIMEOUT_MILLIS, this::acquireTimeoutMillis);
+        resolvePoolLong(queryCloseTimeoutMillis, "query_close_timeout_ms", view, DEFAULT_QUERY_CLOSE_TIMEOUT_MILLIS, this::queryCloseTimeoutMillis);
+        resolvePoolLong(idleTimeoutMillis, "idle_timeout_ms", view, DEFAULT_IDLE_TIMEOUT_MILLIS, this::idleTimeoutMillis);
+        resolvePoolLong(maxLifetimeMillis, "max_lifetime_ms", view, DEFAULT_MAX_LIFETIME_MILLIS, this::maxLifetimeMillis);
+        resolvePoolLong(housekeeperIntervalMillis, "housekeeper_interval_ms", view, DEFAULT_HOUSEKEEPER_INTERVAL_MILLIS, this::housekeeperIntervalMillis);
 
         return new QuestDBImpl(
                 ingestConfig,
-                queryConfig,
+                config,
                 senderPoolMin,
                 senderPoolMax,
                 queryPoolMin,
@@ -141,19 +206,62 @@ public QuestDB build() {
                 acquireTimeoutMillis,
                 idleTimeoutMillis,
                 maxLifetimeMillis,
-                housekeeperIntervalMillis
+                housekeeperIntervalMillis,
+                queryCloseTimeoutMillis,
+                errorHandler,
+                connectionListener
         );
     }
 
+    // Validates the lazy_connect contract and returns the ingest config to use:
+    // the original string with a non-blocking async initial connect injected
+    // when the user did not set one. lazy_connect requires BOTH sides to start
+    // non-blocking, so an explicit knob that forces a blocking / fail-fast
+    // startup is a configuration conflict and is rejected with a clear remedy.
+    private String resolveLazyConnect(ConfigView view) {
+        // (1) ingest side: only initial_connect_retry=async is non-blocking;
+        // off/false/on/true/sync all block or fail-fast at startup.
+        String mode = view.getStr("initial_connect_retry");
+        if (mode != null && !"async".equalsIgnoreCase(mode)) {
+            throw new IllegalArgumentException(
+                    "conflicting configuration: lazy_connect=true needs a non-blocking startup, but "
+                            + "initial_connect_retry=" + mode + " makes the initial connect block / fail-fast. "
+                            + "Resolve by removing initial_connect_retry (lazy_connect implies "
+                            + "initial_connect_retry=async) or setting initial_connect_retry=async.");
+        }
+        // (2) read side: lazy_connect requires query_pool_min=0 so the read pool
+        // does not eagerly fail-fast at startup. An explicit query_pool_min > 0
+        // (builder call or connect string) contradicts that.
+        int explicitQueryMin;
+        if (queryPoolMin != UNSET) {
+            explicitQueryMin = queryPoolMin; // explicit builder call
+        } else if (view.has("query_pool_min")) {
+            explicitQueryMin = view.getInt("query_pool_min", UNSET); // connect string
+        } else {
+            explicitQueryMin = 0; // unset -> lazy default of 0
+        }
+        if (explicitQueryMin > 0) {
+            throw new IllegalArgumentException(
+                    "conflicting configuration: lazy_connect=true needs query_pool_min=0 (the read pool "
+                            + "connects lazily on first use and must not fail-fast at startup), but query_pool_min="
+                            + explicitQueryMin + " was set. Resolve by removing query_pool_min (lazy_connect "
+                            + "defaults it to 0) or setting query_pool_min=0.");
+        }
+        // No explicit initial_connect_retry -> inject async so the ingest build
+        // is non-blocking. An explicit async needs no injection.
+        return mode == null ? withDefaultAsyncConnect(config) : config;
+    }
+
     /**
-     * Sets a single configuration string used for both ingest and egress. The
-     * schema must be {@code ws} or {@code wss}.
+     * Sets the single configuration string for the whole QuestDB cluster --
+     * used for both ingest and egress. List every cluster node in one
+     * {@code addr} (comma-separated, or by repeating the key); the ingest and
+     * query pools each connect across that one server list. The schema must be
+     * {@code ws} or {@code wss}.
      */
     public QuestDBBuilder fromConfig(CharSequence configurationString) {
-        requireWebSocketSchema(configurationString, "connection");
-        String s = configurationString.toString();
-        this.ingestConfig = s;
-        this.queryConfig = s;
+        requireWebSocketSchema(configurationString, "cluster");
+        this.config = configurationString.toString();
         return this;
     }
 
@@ -183,16 +291,6 @@ public QuestDBBuilder idleTimeoutMillis(long millis) {
         return this;
     }
 
-    /**
-     * Sets the ingest-side configuration. The schema must be {@code ws} or
-     * {@code wss}.
-     */
-    public QuestDBBuilder ingestConfig(CharSequence configurationString) {
-        requireWebSocketSchema(configurationString, "ingest");
-        this.ingestConfig = configurationString.toString();
-        return this;
-    }
-
     /**
      * Maximum age of a pooled connection before the housekeeper recycles it
      * (next time it is idle). Useful for picking up DNS / load-balancer
@@ -206,16 +304,6 @@ public QuestDBBuilder maxLifetimeMillis(long millis) {
         return this;
     }
 
-    /**
-     * Sets the query-side configuration. The schema must be {@code ws} or
-     * {@code wss}.
-     */
-    public QuestDBBuilder queryConfig(CharSequence configurationString) {
-        requireWebSocketSchema(configurationString, "query");
-        this.queryConfig = configurationString.toString();
-        return this;
-    }
-
     /**
      * Maximum query-pool size. Defaults to 4.
      */
@@ -303,12 +391,24 @@ public java.util.Map<String, Object> poolConfigSnapshotForTest() {
         m.put("query_pool_min", queryPoolMin);
         m.put("query_pool_max", queryPoolMax);
         m.put("acquire_timeout_ms", acquireTimeoutMillis);
+        m.put("query_close_timeout_ms", queryCloseTimeoutMillis);
         m.put("idle_timeout_ms", idleTimeoutMillis);
         m.put("max_lifetime_ms", maxLifetimeMillis);
         m.put("housekeeper_interval_ms", housekeeperIntervalMillis);
         return m;
     }
 
+    // Inject a non-blocking async initial connect right after the schema
+    // separator so lazy_connect's build never blocks or fail-fast on a down
+    // server. Only used when the user set no initial_connect_retry of their own
+    // (resolveLazyConnect rejects an explicit blocking mode rather than silently
+    // overriding it), so placement is immaterial -- there is no competing value.
+    private static String withDefaultAsyncConnect(String config) {
+        int sep = config.indexOf("::");
+        // sep >= 0: fromConfig() validated a ws/wss schema, so "::" is present.
+        return config.substring(0, sep + 2) + "initial_connect_retry=async;" + config.substring(sep + 2);
+    }
+
     private static void requireWebSocketSchema(CharSequence config, String role) {
         String schema = ConfigString.parse(config).schema();
         if (!"ws".equals(schema) && !"wss".equals(schema)) {
@@ -317,53 +417,17 @@ private static void requireWebSocketSchema(CharSequence config, String role) {
         }
     }
 
-    private void resolvePoolInt(int current, String key, ConfigView ingest, ConfigView query, int dflt, IntConsumer setter) {
+    private void resolvePoolInt(int current, String key, ConfigView view, int dflt, IntConsumer setter) {
         if (current != UNSET) {
-            return; // explicit builder call wins; skip the conflict check
-        }
-        boolean inIngest = ingest.has(key);
-        boolean inQuery = query.has(key);
-        int value;
-        if (inIngest && inQuery) {
-            int vi = ingest.getInt(key, UNSET);
-            int vq = query.getInt(key, UNSET);
-            if (vi != vq) {
-                throw new IllegalArgumentException(
-                        "conflicting pool config: " + key + " (ingest=" + vi + ", query=" + vq + ")");
-            }
-            value = vi;
-        } else if (inIngest) {
-            value = ingest.getInt(key, UNSET);
-        } else if (inQuery) {
-            value = query.getInt(key, UNSET);
-        } else {
-            value = dflt;
+            return; // explicit builder call wins
         }
-        setter.accept(value);
+        setter.accept(view.has(key) ? view.getInt(key, UNSET) : dflt);
     }
 
-    private void resolvePoolLong(long current, String key, ConfigView ingest, ConfigView query, long dflt, LongConsumer setter) {
+    private void resolvePoolLong(long current, String key, ConfigView view, long dflt, LongConsumer setter) {
         if (current != UNSET) {
-            return; // explicit builder call wins; skip the conflict check
-        }
-        boolean inIngest = ingest.has(key);
-        boolean inQuery = query.has(key);
-        long value;
-        if (inIngest && inQuery) {
-            long vi = ingest.getLong(key, UNSET);
-            long vq = query.getLong(key, UNSET);
-            if (vi != vq) {
-                throw new IllegalArgumentException(
-                        "conflicting pool config: " + key + " (ingest=" + vi + ", query=" + vq + ")");
-            }
-            value = vi;
-        } else if (inIngest) {
-            value = ingest.getLong(key, UNSET);
-        } else if (inQuery) {
-            value = query.getLong(key, UNSET);
-        } else {
-            value = dflt;
+            return; // explicit builder call wins
         }
-        setter.accept(value);
+        setter.accept(view.has(key) ? view.getLong(key, UNSET) : dflt);
     }
 }
diff --git a/core/src/main/java/io/questdb/client/Sender.java b/core/src/main/java/io/questdb/client/Sender.java
index 604f45d5..dc94f42b 100644
--- a/core/src/main/java/io/questdb/client/Sender.java
+++ b/core/src/main/java/io/questdb/client/Sender.java
@@ -1011,6 +1011,9 @@ final class LineSenderBuilder {
         private int autoFlushRows = PARAMETER_NOT_SET_EXPLICITLY;
         private int bufferCapacity = PARAMETER_NOT_SET_EXPLICITLY;
         private long closeFlushTimeoutMillis = CLOSE_FLUSH_TIMEOUT_NOT_SET;
+        // Upper bound (ms) on the TCP connect. PARAMETER_NOT_SET_EXPLICITLY ->
+        // 0 (no application-level connect timeout; OS connect timeout applies).
+        private int connectTimeoutMillis = PARAMETER_NOT_SET_EXPLICITLY;
         // Optional user-supplied async connection-event listener. When null,
         // the sender uses DefaultSenderConnectionListener.INSTANCE
         // (loud-not-silent log of every transition).
@@ -1078,6 +1081,11 @@ public String getSettingsPath() {
             public int getTimeout() {
                 return httpTimeout == PARAMETER_NOT_SET_EXPLICITLY ? DEFAULT_HTTP_TIMEOUT : httpTimeout;
             }
+
+            @Override
+            public int getConnectTimeout() {
+                return connectTimeoutMillis == PARAMETER_NOT_SET_EXPLICITLY ? 0 : connectTimeoutMillis;
+            }
         };
         private long minRequestThroughput = PARAMETER_NOT_SET_EXPLICITLY;
         private int multicastTtl = PARAMETER_NOT_SET_EXPLICITLY;
@@ -1199,6 +1207,28 @@ public AdvancedTlsSettings advancedTls() {
             return new AdvancedTlsSettings();
         }
 
+        /**
+         * Upper bound, in milliseconds, on establishing the TCP connection to a
+         * QuestDB endpoint. When set, a connect that does not complete within
+         * this budget is aborted (instead of riding the much longer OS-level
+         * connect timeout). Applies to both HTTP/WebSocket transports. Default
+         * is unset (0), which falls back to the OS connect timeout.
+         *
+         * @param millis connect timeout in milliseconds; must be &gt; 0
+         * @return this instance for method chaining
+         */
+        public LineSenderBuilder connectTimeoutMillis(int millis) {
+            if (this.connectTimeoutMillis != PARAMETER_NOT_SET_EXPLICITLY) {
+                throw new LineSenderException("connect timeout was already configured ")
+                        .put("[connect_timeout=").put(this.connectTimeoutMillis).put("]");
+            }
+            if (millis <= 0) {
+                throw new LineSenderException("connect_timeout must be > 0: ").put(millis);
+            }
+            this.connectTimeoutMillis = millis;
+            return this;
+        }
+
         /**
          * Per-endpoint timeout on the WebSocket upgrade response read. Default
          * {@value QwpWebSocketSender#DEFAULT_AUTH_TIMEOUT_MS} ms.
@@ -1531,6 +1561,7 @@ public Sender build() {
                             actualErrorInboxCapacity,
                             actualDurableAckKeepaliveIntervalMillis,
                             authTimeoutMillis,
+                            connectTimeoutMillis == PARAMETER_NOT_SET_EXPLICITLY ? 0 : connectTimeoutMillis,
                             connectionListener,
                             actualConnectionListenerInboxCapacity
                     );
@@ -3166,6 +3197,9 @@ private LineSenderBuilder fromConfig(CharSequence configurationString) {
                     pos = getValue(configurationString, pos, sink, "request_timeout");
                     int requestTimeout = parseIntValue(sink, "request_timeout");
                     httpTimeoutMillis(requestTimeout);
+                } else if (Chars.equals("connect_timeout", sink)) {
+                    pos = getValue(configurationString, pos, sink, "connect_timeout");
+                    connectTimeoutMillis(parseIntValue(sink, "connect_timeout"));
                 } else if (Chars.equals("request_min_throughput", sink)) {
                     pos = getValue(configurationString, pos, sink, "request_min_throughput");
                     int requestMinThroughput = parseIntValue(sink, "request_min_throughput");
@@ -3446,6 +3480,9 @@ private LineSenderBuilder fromConfigWebSocket(CharSequence configurationString)
                 if (view.has("auth_timeout_ms")) {
                     authTimeoutMillis(view.getLong("auth_timeout_ms", 0));
                 }
+                if (view.has("connect_timeout")) {
+                    connectTimeoutMillis((int) view.getLong("connect_timeout", 0));
+                }
 
                 s = view.getStr("auto_flush_rows");
                 if (s != null) {
@@ -3701,6 +3738,7 @@ public java.util.Map<String, Object> wsConfigSnapshotForTest() {
             m.put("connection_listener_inbox_capacity", connectionListenerInboxCapacity);
             m.put("token", httpToken);
             m.put("auth_timeout_ms", authTimeoutMillis);
+            m.put("connect_timeout", connectTimeoutMillis == PARAMETER_NOT_SET_EXPLICITLY ? 0 : connectTimeoutMillis);
             m.put("username", username);
             m.put("password", password);
             m.put("tls_verify", tlsValidationMode == null ? null : tlsValidationMode.name());
diff --git a/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java b/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java
index 94562663..0175ad6c 100644
--- a/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java
+++ b/core/src/main/java/io/questdb/client/cutlass/http/client/HttpClient.java
@@ -66,6 +66,7 @@ public abstract class HttpClient implements QuietCloseable {
     protected final NetworkFacade nf;
     protected final Socket socket;
     private final ObjectPool<DirectUtf8String> csPool = new ObjectPool<>(DirectUtf8String.FACTORY, 64);
+    private final int connectTimeout;
     private final int defaultTimeout;
     private final boolean fixBrokenConnection;
     private final int maxBufferSize;
@@ -84,6 +85,7 @@ public HttpClient(HttpClientConfiguration configuration, SocketFactory socketFac
         this.nf = configuration.getNetworkFacade();
         this.socket = socketFactory.newInstance(nf, LOG);
         this.defaultTimeout = configuration.getTimeout();
+        this.connectTimeout = configuration.getConnectTimeout();
         this.bufferSize = configuration.getInitialRequestBufferSize();
         this.maxBufferSize = configuration.getMaximumRequestBufferSize();
         this.responseParserBufSize = configuration.getResponseBufferSize();
@@ -617,10 +619,16 @@ private void connect(CharSequence host, int port) {
                 throw new HttpClientException("could not resolve host ").put("[host=").put(host).put("]");
             }
 
-            if (nf.connectAddrInfo(fd, addrInfo) != 0) {
+            final int connectResult = connectTimeout > 0
+                    ? nf.connectAddrInfoTimeout(fd, addrInfo, connectTimeout)
+                    : nf.connectAddrInfo(fd, addrInfo);
+            if (connectResult != 0) {
                 int errno = nf.errno();
                 nf.freeAddrInfo(addrInfo);
                 disconnect();
+                if (connectResult == NetworkFacade.CONNECT_TIMEOUT) {
+                    throw new HttpClientException("connect timed out ").put("[host=").put(host).put(", port=").put(port).put(", timeout=").put(connectTimeout).put(']').flagAsTimeout();
+                }
                 throw new HttpClientException("could not connect to host ").put("[host=").put(host).put(", port=").put(port).put(", errno=").put(errno).put(']');
             }
             nf.freeAddrInfo(addrInfo);
@@ -631,9 +639,20 @@ private void connect(CharSequence host, int port) {
                 throw new HttpClientException("could not configure socket to be non-blocking [fd=").put(fd).put(", errno=").put(errno).put(']');
             }
 
+            // Register the fd with the event loop before the TLS handshake so the
+            // handshake can park on socket readiness via ioWait() instead of
+            // busy-spinning on the non-blocking socket.
+            setupIoWait();
+
             if (socket.supportsTls()) {
+                // Bound the TLS handshake by the connect budget (falling back to
+                // the request timeout when connect_timeout is unset), so a peer
+                // that completes TCP but stalls mid-handshake cannot hang or pin a
+                // CPU.
+                final long tlsHandshakeStartNanos = System.nanoTime();
+                final int tlsHandshakeBudgetMillis = connectTimeout > 0 ? connectTimeout : defaultTimeout;
                 try {
-                    socket.startTlsSession(host);
+                    socket.startTlsSession(host, op -> ioWait(remainingTime(tlsHandshakeBudgetMillis, tlsHandshakeStartNanos), op));
                 } catch (TlsSessionInitFailedException e) {
                     int errno = nf.errno();
                     disconnect();
@@ -641,9 +660,15 @@ private void connect(CharSequence host, int port) {
                             .put(", error=").put(e.getFlyweightMessage())
                             .put(", errno=").put(errno)
                             .put(']');
+                } catch (Throwable t) {
+                    // ioWait() throws a timeout-flagged HttpClientException when the
+                    // handshake budget is exhausted; any other error can also surface
+                    // mid-handshake. Disconnect so the fd and native buffers do not
+                    // leak, then propagate.
+                    disconnect();
+                    throw t;
                 }
             }
-            setupIoWait();
         }
 
         private void doSend(long lo, long hi, int timeoutMillis) {
diff --git a/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java b/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java
index 81ad7c86..5bdb6fe1 100644
--- a/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java
+++ b/core/src/main/java/io/questdb/client/cutlass/http/client/WebSocketClient.java
@@ -101,6 +101,10 @@ public abstract class WebSocketClient implements QuietCloseable {
     private final WebSocketSendBuffer sendBuffer;
     // volatile: written by user thread in close(), read by I/O thread in checkConnected()/sendFrame()/receiveFrame()
     private volatile boolean closed;
+    // Upper bound (ms) on the TCP connect. <= 0 disables the application-level
+    // timeout and falls back to the OS connect timeout. Seeded from the
+    // configuration; the QWP sender may override it via setConnectTimeout().
+    private int connectTimeoutMillis;
     private int fragmentBufPos;
     private long fragmentBufPtr;       // native buffer for accumulating fragment payloads
     private int fragmentBufSize;
@@ -168,6 +172,7 @@ public WebSocketClient(HttpClientConfiguration configuration, SocketFactory sock
         this.nf = configuration.getNetworkFacade();
         this.socket = socketFactory.newInstance(nf, LOG);
         this.defaultTimeout = configuration.getTimeout();
+        this.connectTimeoutMillis = configuration.getConnectTimeout();
 
         int sendBufSize = Math.max(configuration.getInitialRequestBufferSize(), DEFAULT_SEND_BUFFER_SIZE);
         int maxSendBufSize = Math.max(configuration.getMaximumRequestBufferSize(), sendBufSize);
@@ -481,6 +486,16 @@ public void sendPing(int timeout) {
         }
     }
 
+    /**
+     * Overrides the TCP connect timeout (milliseconds) for subsequent
+     * {@link #connect} calls. {@code <= 0} disables the application-level
+     * timeout and falls back to the OS connect timeout. Must be called before
+     * {@link #connect}.
+     */
+    public void setConnectTimeout(int connectTimeoutMillis) {
+        this.connectTimeoutMillis = connectTimeoutMillis;
+    }
+
     /**
      * Sets the value sent as the {@code X-QWP-Accept-Encoding} upgrade header,
      * e.g. {@code "zstd;level=1,raw"}. Pass {@code null} to omit the header
@@ -922,10 +937,18 @@ private void doConnect(CharSequence host, int port) {
             throw new HttpClientException("could not resolve host [host=").put(host).put(']');
         }
 
-        if (nf.connectAddrInfo(fd, addrInfo) != 0) {
+        final int connectResult = connectTimeoutMillis > 0
+                ? nf.connectAddrInfoTimeout(fd, addrInfo, connectTimeoutMillis)
+                : nf.connectAddrInfo(fd, addrInfo);
+        if (connectResult != 0) {
             int errno = nf.errno();
             nf.freeAddrInfo(addrInfo);
             disconnect();
+            if (connectResult == NetworkFacade.CONNECT_TIMEOUT) {
+                throw new HttpClientException("connect timed out [host=").put(host)
+                        .put(", port=").put(port)
+                        .put(", timeout=").put(connectTimeoutMillis).put(']').flagAsTimeout();
+            }
             throw new HttpClientException("could not connect [host=").put(host)
                     .put(", port=").put(port)
                     .put(", errno=").put(errno).put(']');
@@ -939,19 +962,35 @@ private void doConnect(CharSequence host, int port) {
                     .put(", errno=").put(errno).put(']');
         }
 
+        // Register the fd with the event loop before the TLS handshake so the
+        // handshake can park on socket readiness via ioWait() instead of
+        // busy-spinning on the non-blocking socket.
+        setupIoWait();
+
         if (socket.supportsTls()) {
+            // Bound the TLS handshake by the connect budget (falling back to the
+            // request timeout when connect_timeout is unset), so a peer that
+            // completes TCP but stalls mid-handshake cannot hang or pin a CPU.
+            final long tlsHandshakeStartNanos = System.nanoTime();
+            final int tlsHandshakeBudgetMillis = connectTimeoutMillis > 0 ? connectTimeoutMillis : defaultTimeout;
             try {
-                socket.startTlsSession(host);
+                socket.startTlsSession(host, op -> ioWait(getRemainingTimeOrThrow(tlsHandshakeBudgetMillis, tlsHandshakeStartNanos), op));
             } catch (TlsSessionInitFailedException e) {
                 int errno = nf.errno();
                 disconnect();
                 throw new HttpClientException("could not start TLS session [fd=").put(fd)
                         .put(", error=").put(e.getFlyweightMessage())
                         .put(", errno=").put(errno).put(']');
+            } catch (Throwable t) {
+                // ioWait() throws a timeout-flagged HttpClientException when the
+                // handshake budget is exhausted; any other error can also surface
+                // mid-handshake. Disconnect so the fd and native buffers do not
+                // leak, then propagate.
+                disconnect();
+                throw t;
             }
         }
 
-        setupIoWait();
         if (LOG.isDebugEnabled()) {
             LOG.debug("Connected to [host={}, port={}]", host, port);
         }
diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java
index 1706401e..92b4f6a7 100644
--- a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java
+++ b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpQueryClient.java
@@ -165,6 +165,9 @@ public class QwpQueryClient implements QuietCloseable {
     private final Random failoverRandom = new Random();
     private long authTimeoutMs = DEFAULT_AUTH_TIMEOUT_MS;
     private String authorizationHeader;
+    // Upper bound (ms) on each TCP connect attempt. 0 (default) falls back to
+    // the OS connect timeout.
+    private int connectTimeoutMs = 0;
     private int bufferPoolSize = DEFAULT_IO_BUFFER_POOL_SIZE;
     private String clientId;
     // Client-configured zone (failover.md §1.1), opaque case-insensitive
@@ -387,6 +390,7 @@ public static QwpQueryClient fromConfig(CharSequence configurationString) {
         Long failoverMaxDurationMs = view.has("failover_max_duration_ms")
                 ? view.getLong("failover_max_duration_ms", 0) : null;
         Long authTimeoutMs = view.has("auth_timeout_ms") ? view.getLong("auth_timeout_ms", 0) : null;
+        Integer connectTimeout = view.has("connect_timeout") ? (int) view.getLong("connect_timeout", 0) : null;
         Long initialCredit = view.has("initial_credit") ? view.getLong("initial_credit", 0) : null;
         int poolSize = view.getInt("buffer_pool_size", DEFAULT_IO_BUFFER_POOL_SIZE);
         String compression = view.getEnum("compression");
@@ -442,6 +446,9 @@ public static QwpQueryClient fromConfig(CharSequence configurationString) {
             if (authTimeoutMs != null) {
                 client.withAuthTimeout(authTimeoutMs);
             }
+            if (connectTimeout != null) {
+                client.withConnectTimeout(connectTimeout);
+            }
             if (initialCredit != null) {
                 client.withInitialCredit(initialCredit);
             }
@@ -497,6 +504,7 @@ public static void validateConfig(ConfigView view, boolean tls) {
         view.getLong("failover_max_duration_ms", -1);
         view.getLong("initial_credit", -1);
         view.getLong("auth_timeout_ms", -1);
+        view.getLong("connect_timeout", -1);
         String username = view.getStr("username");
         String password = view.getStr("password");
         String token = view.getStr("token");
@@ -867,6 +875,7 @@ public java.util.Map<String, Object> configSnapshotForTest() {
         m.put("client_id", clientId);
         m.put("zone", clientZone);
         m.put("auth_timeout_ms", authTimeoutMs);
+        m.put("connect_timeout", connectTimeoutMs);
         m.put("authorization_header", authorizationHeader);
         m.put("tls_verify", tlsValidationMode);
         m.put("tls_roots", trustStorePath);
@@ -994,6 +1003,22 @@ public QwpQueryClient withAuthTimeout(long authTimeoutMs) {
         return this;
     }
 
+    /**
+     * Upper bound, in milliseconds, on establishing the TCP connection to an
+     * endpoint. Unlike {@link #withAuthTimeout(long)} this DOES bound the TCP
+     * connect itself (via a non-blocking connect), so a routing blackhole that
+     * never returns SYN-ACK is aborted within this budget instead of riding the
+     * OS connect timeout. {@code 0} (default) keeps the OS connect timeout.
+     */
+    public QwpQueryClient withConnectTimeout(int connectTimeoutMs) {
+        checkPreConnect("withConnectTimeout");
+        if (connectTimeoutMs <= 0) {
+            throw new IllegalArgumentException("connectTimeoutMs must be > 0");
+        }
+        this.connectTimeoutMs = connectTimeoutMs;
+        return this;
+    }
+
     /**
      * Configures HTTP Basic authentication for the WebSocket upgrade request.
      * The server verifies the credentials against the same user store the
@@ -1369,6 +1394,7 @@ private void connectToEndpoint(Endpoint ep) {
         webSocketClient.setQwpClientId(clientId != null ? clientId : defaultClientId());
         webSocketClient.setQwpAcceptEncoding(buildAcceptEncodingHeader());
         webSocketClient.setQwpMaxBatchRows(maxBatchRows);
+        webSocketClient.setConnectTimeout(connectTimeoutMs);
         runUpgradeWithTimeout(ep);
         negotiatedQwpVersion = webSocketClient.getServerQwpVersion();
         negotiatedZstdLevel = webSocketClient.getServerNegotiatedZstdLevel();
@@ -1745,12 +1771,21 @@ private void reconnectViaTracker() {
     }
 
     private void runUpgradeWithTimeout(Endpoint ep) {
+        // Connect first, OUTSIDE the upgrade try. A connect-phase failure --
+        // including a connect_timeout overage flagged via flagAsTimeout() -- must
+        // keep its own message ("connect timed out ...") and must NOT be relabeled
+        // as an auth_timeout overage below. doConnect() tears down its own socket
+        // on failure; the failover walker treats the propagated HttpClientException
+        // as a transport error and moves on to the next endpoint.
+        webSocketClient.connect(ep.host, ep.port);
+
         int timeoutMs = (int) Math.min(authTimeoutMs, Integer.MAX_VALUE);
         try {
-            webSocketClient.connect(ep.host, ep.port);
             webSocketClient.upgrade(DEFAULT_ENDPOINT_PATH, timeoutMs, authorizationHeader);
         } catch (HttpClientException ex) {
             if (ex.isTimeout()) {
+                // Reachable only for an upgrade/auth-phase timeout now, so the
+                // auth_timeout attribution is accurate.
                 HttpClientException timeout = new HttpClientException("WebSocket upgrade to ")
                         .put(ep.host).put(':').put(ep.port)
                         .put(" exceeded auth_timeout=").put(authTimeoutMs).put("ms");
diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java
index 9b9cc45d..aa1c7188 100644
--- a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java
+++ b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java
@@ -154,6 +154,9 @@ public class QwpWebSocketSender implements Sender {
     private final ClientTlsConfiguration tlsConfig;
     private MicrobatchBuffer activeBuffer;
     private long authTimeoutMs = DEFAULT_AUTH_TIMEOUT_MS;
+    // Upper bound (ms) on each TCP connect attempt. 0 (default) falls back to
+    // the OS connect timeout. Applied to every WebSocketClient before connect.
+    private int connectTimeoutMs = 0;
     // Double-buffering for async I/O
     private MicrobatchBuffer buffer0;
     // Cached column references to avoid repeated hashmap lookups
@@ -577,7 +580,7 @@ public static QwpWebSocketSender connect(
                 reconnectInitialBackoffMillis, reconnectMaxBackoffMillis,
                 initialConnectMode, errorHandler, errorInboxCapacity,
                 durableAckKeepaliveIntervalMillis, authTimeoutMs,
-                null, SenderConnectionDispatcher.DEFAULT_CAPACITY);
+                0, null, SenderConnectionDispatcher.DEFAULT_CAPACITY);
     }
 
     /**
@@ -602,6 +605,7 @@ public static QwpWebSocketSender connect(
             int errorInboxCapacity,
             long durableAckKeepaliveIntervalMillis,
             long authTimeoutMs,
+            int connectTimeoutMs,
             SenderConnectionListener connectionListener,
             int connectionListenerInboxCapacity
     ) {
@@ -613,6 +617,7 @@ public static QwpWebSocketSender connect(
         try {
             sender.requestDurableAck = requestDurableAck;
             sender.authTimeoutMs = authTimeoutMs;
+            sender.connectTimeoutMs = connectTimeoutMs;
             sender.closeFlushTimeoutMillis = closeFlushTimeoutMillis;
             sender.reconnectMaxDurationMillis = reconnectMaxDurationMillis;
             sender.reconnectInitialBackoffMillis = reconnectInitialBackoffMillis;
@@ -2439,6 +2444,7 @@ private synchronized WebSocketClient buildAndConnect(ReconnectSupplier ctx) {
                 newClient.setQwpMaxVersion(QwpConstants.VERSION);
                 newClient.setQwpClientId(QwpConstants.CLIENT_ID);
                 newClient.setQwpRequestDurableAck(requestDurableAck);
+                newClient.setConnectTimeout(connectTimeoutMs);
                 newClient.connect(ep.host, ep.port);
                 int upgradeTimeoutMs = (int) Math.min(authTimeoutMs, Integer.MAX_VALUE);
                 newClient.upgrade(WRITE_PATH, upgradeTimeoutMs, authorizationHeader);
diff --git a/core/src/main/java/io/questdb/client/impl/ConfigSchema.java b/core/src/main/java/io/questdb/client/impl/ConfigSchema.java
index b36f3207..0508428e 100644
--- a/core/src/main/java/io/questdb/client/impl/ConfigSchema.java
+++ b/core/src/main/java/io/questdb/client/impl/ConfigSchema.java
@@ -56,6 +56,7 @@ public final class ConfigSchema {
         str("tls_roots", Side.COMMON);
         str("tls_roots_password", Side.COMMON);
         longRange("auth_timeout_ms", Side.COMMON, 0, OPEN_MAX, true, false); // > 0
+        longRange("connect_timeout", Side.COMMON, 0, OPEN_MAX, true, false); // > 0
 
         // INGRESS -- the WebSocket Sender applies. STRING in the registry; the
         // Sender parses suffix/mode values (off/on, 64k, durability) with its
@@ -108,9 +109,11 @@ public final class ConfigSchema {
         intRange("query_pool_min", Side.POOL, OPEN, OPEN_MAX, false, false);
         intRange("query_pool_max", Side.POOL, OPEN, OPEN_MAX, false, false);
         longRange("acquire_timeout_ms", Side.POOL, OPEN, OPEN_MAX, false, false);
+        longRange("query_close_timeout_ms", Side.POOL, OPEN, OPEN_MAX, false, false);
         longRange("idle_timeout_ms", Side.POOL, OPEN, OPEN_MAX, false, false);
         longRange("max_lifetime_ms", Side.POOL, OPEN, OPEN_MAX, false, false);
         longRange("housekeeper_interval_ms", Side.POOL, OPEN, OPEN_MAX, false, false);
+        boolOnOff("lazy_connect", Side.POOL); // facade flag: tolerant non-blocking startup (async ingest + lazy reads)
 
         // RESERVED -- accepted no-op (error-policy keys reserved by the spec).
         str("on_internal_error", Side.RESERVED);
diff --git a/core/src/main/java/io/questdb/client/impl/ConfigView.java b/core/src/main/java/io/questdb/client/impl/ConfigView.java
index 1160c2d6..74621eef 100644
--- a/core/src/main/java/io/questdb/client/impl/ConfigView.java
+++ b/core/src/main/java/io/questdb/client/impl/ConfigView.java
@@ -95,6 +95,25 @@ public static String relocatedHint(String key) {
         return RELOCATED_HINTS.get(key);
     }
 
+    /**
+     * A boolean flag accepting {@code true}/{@code false} (and {@code on}/{@code off}
+     * for consistency with the rest of the connect-string surface). Returns
+     * {@code dflt} when the key is absent; throws on any other value.
+     */
+    public boolean getBool(String key, boolean dflt) {
+        String v = getStr(key);
+        if (v == null) {
+            return dflt;
+        }
+        if ("true".equals(v) || "on".equals(v)) {
+            return true;
+        }
+        if ("false".equals(v) || "off".equals(v)) {
+            return false;
+        }
+        throw new IllegalArgumentException("invalid " + key + ": " + v + " (expected true, false, on, off)");
+    }
+
     public boolean getBoolOnOff(String key, boolean dflt) {
         String v = getStr(key);
         if (v == null) {
diff --git a/core/src/main/java/io/questdb/client/impl/PooledSender.java b/core/src/main/java/io/questdb/client/impl/PooledSender.java
index 61d89296..e36a8384 100644
--- a/core/src/main/java/io/questdb/client/impl/PooledSender.java
+++ b/core/src/main/java/io/questdb/client/impl/PooledSender.java
@@ -37,123 +37,112 @@
 import java.time.temporal.ChronoUnit;
 
 /**
- * Decorator that lends a real {@link Sender} from {@link SenderPool}. The
- * decorator is pre-allocated once per pool slot and reused for every borrow.
+ * Thin per-borrow handle returned by {@link SenderPool#borrow()}. A fresh
+ * instance is created on every borrow, capturing the immutable lease
+ * {@code generation} stamped by {@code borrow()}; it forwards every
+ * {@link Sender} call to the reused {@link SenderSlot}'s delegate, validating
+ * that generation first via {@link SenderSlot#live(long)}.
  * <p>
- * Behavior difference from a raw Sender: {@link #close()} on a pooled Sender
- * flushes the buffer and returns the decorator to the pool. The underlying
- * Sender is only truly closed when {@link io.questdb.client.QuestDB#close()}
- * shuts down the pool.
+ * Behaviour difference from a raw Sender: {@link #close()} flushes the buffer
+ * and returns the slot to the pool. The underlying Sender is only truly closed
+ * when {@link io.questdb.client.QuestDB#close()} shuts the pool down.
+ * <p>
+ * Because the slot is reused across borrows, this wrapper -- not the slot --
+ * carries the lease identity. A stale handle (held after {@link #close()}, with
+ * the slot since re-borrowed) fails its generation check: data calls throw and
+ * {@link #close()} is a no-op, so it can never flush into, release, or be
+ * enqueued twice for a slot a different borrower now owns. This mirrors the
+ * egress {@code QueryLease} guard.
  */
 public final class PooledSender implements Sender {
 
-    private final long createdAtMillis;
-    private final Sender delegate;
-    private final SenderPool pool;
-    // Index of the store-and-forward slot this wrapper owns within the pool,
-    // or -1 when SF is disabled. Stable for the wrapper's whole life; the
-    // pool returns it to the free set only when the wrapper is evicted from
-    // {@code all} (discardBroken / reapIdle). Used to derive a distinct
-    // {@code sender_id} per pooled sender so concurrent SF senders sharing
-    // one {@code sf_dir} never collide on the slot {@code flock}.
-    private final int slotIndex;
-    private volatile long idleSinceMillis;
-    private volatile boolean inUse;
-    private volatile boolean invalidated;
-
-    PooledSender(Sender delegate, SenderPool pool, int slotIndex) {
-        this.delegate = delegate;
-        this.pool = pool;
-        this.slotIndex = slotIndex;
-        this.createdAtMillis = System.currentTimeMillis();
-        this.idleSinceMillis = this.createdAtMillis;
+    private final long generation;
+    private final SenderSlot slot;
+
+    PooledSender(SenderSlot slot, long generation) {
+        this.slot = slot;
+        this.generation = generation;
     }
 
     @Override
     public void at(long timestamp, ChronoUnit unit) {
-        delegate.at(timestamp, unit);
+        slot.live(generation).at(timestamp, unit);
     }
 
     @Override
     public void at(Instant timestamp) {
-        delegate.at(timestamp);
+        slot.live(generation).at(timestamp);
     }
 
     @Override
     public void atNow() {
-        delegate.atNow();
+        slot.live(generation).atNow();
     }
 
     @Override
     public boolean awaitAckedFsn(long targetFsn, long timeoutMillis) {
-        return delegate.awaitAckedFsn(targetFsn, timeoutMillis);
+        return slot.live(generation).awaitAckedFsn(targetFsn, timeoutMillis);
     }
 
     @Override
     public Sender binaryColumn(CharSequence name, byte[] value) {
-        delegate.binaryColumn(name, value);
+        slot.live(generation).binaryColumn(name, value);
         return this;
     }
 
     @Override
     public Sender binaryColumn(CharSequence name, long ptr, long len) {
-        delegate.binaryColumn(name, ptr, len);
+        slot.live(generation).binaryColumn(name, ptr, len);
         return this;
     }
 
     @Override
     public Sender binaryColumn(CharSequence name, DirectByteSlice slice) {
-        delegate.binaryColumn(name, slice);
+        slot.live(generation).binaryColumn(name, slice);
         return this;
     }
 
     @Override
     public Sender boolColumn(CharSequence name, boolean value) {
-        delegate.boolColumn(name, value);
+        slot.live(generation).boolColumn(name, value);
         return this;
     }
 
     @Override
     public DirectByteSlice bufferView() {
-        return delegate.bufferView();
+        return slot.live(generation).bufferView();
     }
 
     @Override
     public Sender byteColumn(CharSequence name, byte value) {
-        delegate.byteColumn(name, value);
+        slot.live(generation).byteColumn(name, value);
         return this;
     }
 
     @Override
     public void cancelRow() {
-        delegate.cancelRow();
+        slot.live(generation).cancelRow();
     }
 
     @Override
     public Sender charColumn(CharSequence name, char value) {
-        delegate.charColumn(name, value);
+        slot.live(generation).charColumn(name, value);
         return this;
     }
 
     /**
-     * Flushes pending rows and returns this decorator to the pool. Does not
-     * actually close the underlying {@link Sender}; that only happens when
-     * the owning {@code QuestDB} is closed.
-     * <p>
-     * Idempotent: a second call after a return is a no-op.
+     * Flushes pending rows and returns this lease's slot to the pool. Does not
+     * actually close the underlying {@link Sender}; that only happens when the
+     * owning {@code QuestDB} is closed.
      * <p>
-     * Clears the current thread's pin (if any) before the slot becomes
-     * borrowable again. Without this step a thread that pinned this
-     * wrapper and then closed it via the public {@link Sender#close()}
-     * (the natural try-with-resources idiom) would still hold the pin
-     * in its {@link ThreadLocal}; a subsequent {@code QuestDB.sender()}
-     * call on that thread would return the cached wrapper even though
-     * another consumer has since borrowed the slot, and the two
-     * consumers would write to the same underlying delegate.
+     * Idempotent: a stale generation (the lease was already returned and the
+     * slot possibly re-borrowed) is a no-op, so a double close cannot flush
+     * into, or re-enqueue, a slot a different borrower now owns. The pool
+     * re-checks the generation under its lock.
      */
     @Override
     public void close() {
-        if (!inUse) {
+        if (generation != slot.generation()) {
             return;
         }
         // Track normal completion rather than catching a specific throwable
@@ -163,257 +152,222 @@ public void close() {
         // abnormal exit as unrecyclable, which is the fail-safe default.
         boolean flushed = false;
         try {
-            delegate.flush();
+            slot.delegate().flush();
             flushed = true;
         } finally {
-            inUse = false;
-            // Clear the pin BEFORE returning the slot. If we cleared
-            // after giveBack(), a concurrent borrower could grab the
-            // slot while this thread's pin still references it, and a
-            // re-pin on this thread would return the (now in-use)
-            // wrapper -- the same race this clear is meant to close.
-            pool.clearPinIfCurrent(this);
             if (flushed) {
-                pool.giveBack(this);
+                slot.pool().giveBack(this);
             } else {
-                // flush() did not complete normally. Sender does not clear
-                // its buffer on flush failure (see Sender Javadoc), and
-                // WebSocket transport latches the failure for good. Either
-                // way the wrapper is unsafe to recycle: the next borrower
-                // would inherit the failed rows or a dead connection. The
-                // original throwable propagates naturally once this finally
-                // returns -- no explicit rethrow needed.
-                pool.discardBroken(this);
+                // flush() did not complete normally. Sender does not clear its
+                // buffer on flush failure (see Sender Javadoc), and WebSocket
+                // transport latches the failure for good. Either way the slot
+                // is unsafe to recycle: the next borrower would inherit the
+                // failed rows or a dead connection. The original throwable
+                // propagates naturally once this finally returns -- no explicit
+                // rethrow needed.
+                slot.pool().discardBroken(this);
             }
         }
     }
 
     @Override
     public Sender decimalColumn(CharSequence name, Decimal256 value) {
-        delegate.decimalColumn(name, value);
+        slot.live(generation).decimalColumn(name, value);
         return this;
     }
 
     @Override
     public Sender decimalColumn(CharSequence name, Decimal128 value) {
-        delegate.decimalColumn(name, value);
+        slot.live(generation).decimalColumn(name, value);
         return this;
     }
 
     @Override
     public Sender decimalColumn(CharSequence name, Decimal64 value) {
-        delegate.decimalColumn(name, value);
+        slot.live(generation).decimalColumn(name, value);
         return this;
     }
 
     @Override
     public Sender decimalColumn(CharSequence name, CharSequence value) {
-        delegate.decimalColumn(name, value);
+        slot.live(generation).decimalColumn(name, value);
         return this;
     }
 
     @Override
     public Sender doubleArray(@NotNull CharSequence name, double[] values) {
-        delegate.doubleArray(name, values);
+        slot.live(generation).doubleArray(name, values);
         return this;
     }
 
     @Override
     public Sender doubleArray(@NotNull CharSequence name, double[][] values) {
-        delegate.doubleArray(name, values);
+        slot.live(generation).doubleArray(name, values);
         return this;
     }
 
     @Override
     public Sender doubleArray(@NotNull CharSequence name, double[][][] values) {
-        delegate.doubleArray(name, values);
+        slot.live(generation).doubleArray(name, values);
         return this;
     }
 
     @Override
     public Sender doubleArray(CharSequence name, DoubleArray array) {
-        delegate.doubleArray(name, array);
+        slot.live(generation).doubleArray(name, array);
         return this;
     }
 
     @Override
     public Sender doubleColumn(CharSequence name, double value) {
-        delegate.doubleColumn(name, value);
+        slot.live(generation).doubleColumn(name, value);
         return this;
     }
 
     @Override
     public boolean drain(long timeoutMillis) {
-        return delegate.drain(timeoutMillis);
+        return slot.live(generation).drain(timeoutMillis);
     }
 
     @Override
     public Sender floatColumn(CharSequence name, float value) {
-        delegate.floatColumn(name, value);
+        slot.live(generation).floatColumn(name, value);
         return this;
     }
 
     @Override
     public void flush() {
-        delegate.flush();
+        slot.live(generation).flush();
     }
 
     @Override
     public long flushAndGetSequence() {
-        return delegate.flushAndGetSequence();
+        return slot.live(generation).flushAndGetSequence();
     }
 
     @Override
     public Sender geoHashColumn(CharSequence name, long bits, int precisionBits) {
-        delegate.geoHashColumn(name, bits, precisionBits);
+        slot.live(generation).geoHashColumn(name, bits, precisionBits);
         return this;
     }
 
     @Override
     public Sender geoHashColumn(CharSequence name, CharSequence value) {
-        delegate.geoHashColumn(name, value);
+        slot.live(generation).geoHashColumn(name, value);
         return this;
     }
 
     @Override
     public long getAckedFsn() {
-        return delegate.getAckedFsn();
+        return slot.live(generation).getAckedFsn();
     }
 
     @Override
     public Sender intColumn(CharSequence name, int value) {
-        delegate.intColumn(name, value);
+        slot.live(generation).intColumn(name, value);
         return this;
     }
 
     @Override
     public Sender ipv4Column(CharSequence name, int address) {
-        delegate.ipv4Column(name, address);
+        slot.live(generation).ipv4Column(name, address);
         return this;
     }
 
     @Override
     public Sender ipv4Column(CharSequence name, CharSequence address) {
-        delegate.ipv4Column(name, address);
+        slot.live(generation).ipv4Column(name, address);
         return this;
     }
 
     @Override
     public Sender long256Column(CharSequence name, long l0, long l1, long l2, long l3) {
-        delegate.long256Column(name, l0, l1, l2, l3);
+        slot.live(generation).long256Column(name, l0, l1, l2, l3);
         return this;
     }
 
     @Override
     public Sender longArray(@NotNull CharSequence name, long[] values) {
-        delegate.longArray(name, values);
+        slot.live(generation).longArray(name, values);
         return this;
     }
 
     @Override
     public Sender longArray(@NotNull CharSequence name, long[][] values) {
-        delegate.longArray(name, values);
+        slot.live(generation).longArray(name, values);
         return this;
     }
 
     @Override
     public Sender longArray(@NotNull CharSequence name, long[][][] values) {
-        delegate.longArray(name, values);
+        slot.live(generation).longArray(name, values);
         return this;
     }
 
     @Override
     public Sender longArray(@NotNull CharSequence name, LongArray values) {
-        delegate.longArray(name, values);
+        slot.live(generation).longArray(name, values);
         return this;
     }
 
     @Override
     public Sender longColumn(CharSequence name, long value) {
-        delegate.longColumn(name, value);
+        slot.live(generation).longColumn(name, value);
         return this;
     }
 
     @Override
     public void reset() {
-        delegate.reset();
+        slot.live(generation).reset();
     }
 
     @Override
     public Sender shortColumn(CharSequence name, short value) {
-        delegate.shortColumn(name, value);
+        slot.live(generation).shortColumn(name, value);
         return this;
     }
 
     @Override
     public Sender stringColumn(CharSequence name, CharSequence value) {
-        delegate.stringColumn(name, value);
+        slot.live(generation).stringColumn(name, value);
         return this;
     }
 
     @Override
     public Sender symbol(CharSequence name, CharSequence value) {
-        delegate.symbol(name, value);
+        slot.live(generation).symbol(name, value);
         return this;
     }
 
     @Override
     public Sender table(CharSequence table) {
-        delegate.table(table);
+        slot.live(generation).table(table);
         return this;
     }
 
     @Override
     public Sender timestampColumn(CharSequence name, long value, ChronoUnit unit) {
-        delegate.timestampColumn(name, value, unit);
+        slot.live(generation).timestampColumn(name, value, unit);
         return this;
     }
 
     @Override
     public Sender timestampColumn(CharSequence name, Instant value) {
-        delegate.timestampColumn(name, value);
+        slot.live(generation).timestampColumn(name, value);
         return this;
     }
 
     @Override
     public Sender uuidColumn(CharSequence name, long lo, long hi) {
-        delegate.uuidColumn(name, lo, hi);
+        slot.live(generation).uuidColumn(name, lo, hi);
         return this;
     }
 
-    long createdAtMillis() {
-        return createdAtMillis;
-    }
-
-    int slotIndex() {
-        return slotIndex;
-    }
-
-    Sender delegate() {
-        return delegate;
-    }
-
-    long idleSinceMillis() {
-        return idleSinceMillis;
-    }
-
-    boolean isInUse() {
-        return inUse;
-    }
-
-    boolean isInvalidated() {
-        return invalidated;
-    }
-
-    void markIdleAt(long nowMillis) {
-        idleSinceMillis = nowMillis;
-    }
-
-    void markInUse() {
-        inUse = true;
+    long generation() {
+        return generation;
     }
 
-    void markInvalidated() {
-        invalidated = true;
+    SenderSlot slot() {
+        return slot;
     }
 }
diff --git a/core/src/main/java/io/questdb/client/impl/QueryClientPool.java b/core/src/main/java/io/questdb/client/impl/QueryClientPool.java
index a6365dfa..cbbc150a 100644
--- a/core/src/main/java/io/questdb/client/impl/QueryClientPool.java
+++ b/core/src/main/java/io/questdb/client/impl/QueryClientPool.java
@@ -26,6 +26,7 @@
 
 import io.questdb.client.QueryException;
 import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
+import org.jetbrains.annotations.TestOnly;
 
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@@ -49,6 +50,12 @@
  */
 public final class QueryClientPool implements AutoCloseable {
 
+    // Default upper bound, in milliseconds, on how long Query.close() waits for
+    // an in-flight query to drain (after issuing a cancel) before discarding the
+    // worker. Mirrors the ingest side's close_flush_timeout_millis default so a
+    // close() can never block the caller unbounded. Tunable per pool via
+    // closeQueryTimeoutMillis(long).
+    static final long DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS = 5_000;
     private final long acquireTimeoutMillis;
     private final ArrayList<QueryWorker> all;
     private final ArrayDeque<QueryWorker> available;
@@ -75,6 +82,10 @@ public final class QueryClientPool implements AutoCloseable {
     private final AtomicInteger nextSlotIndex = new AtomicInteger();
     private final Condition workerReleased;
     private volatile boolean closed;
+    // Upper bound on the Query.close() drain wait; see
+    // DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS. Volatile because QuestDBImpl sets it
+    // once at build time on a different thread than the borrowers that read it.
+    private volatile long closeQueryTimeoutMillis = DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS;
     private int inFlightCreations;
 
     public QueryClientPool(
@@ -89,11 +100,12 @@ public QueryClientPool(
                 idleTimeoutMillis, maxLifetimeMillis, null);
     }
 
-    // Package-private constructor exposing the connectHook test seam: production
-    // passes null (-> the real QwpQueryClient.connect()). White-box tests in
-    // io.questdb.client.test.impl reach this by reflection to inject a hook that
-    // throws a non-RuntimeException Throwable from the native connect path.
-    QueryClientPool(
+    // Constructor exposing the connectHook seam. Production (QuestDBImpl) passes
+    // null -> the real QwpQueryClient.connect(); white-box tests pass a hook that
+    // throws a non-RuntimeException Throwable from the native connect path. This
+    // is the construction path QuestDBImpl uses, so it is a real (public) ctor,
+    // not test-only.
+    public QueryClientPool(
             String configurationString,
             int minSize,
             int maxSize,
@@ -106,13 +118,12 @@ public QueryClientPool(
                 idleTimeoutMillis, maxLifetimeMillis, connectHook, null);
     }
 
-    // Package-private constructor exposing both the connectHook and startHook
-    // test seams: production passes null for each (-> the real
-    // QwpQueryClient.connect() and QueryWorker.start()). White-box tests in
-    // io.questdb.client.test.impl reach this by reflection to inject a hook that
-    // throws a Throwable from either the native connect path (connectHook) or
-    // the worker thread-start path (startHook).
-    QueryClientPool(
+    // Constructor exposing both the connectHook and startHook seams. Production
+    // reaches it via the overload above (both null -> the real
+    // QwpQueryClient.connect() and QueryWorker.start()); white-box tests pass a
+    // hook that throws a Throwable from either the native connect path
+    // (connectHook) or the worker thread-start path (startHook).
+    public QueryClientPool(
             String configurationString,
             int minSize,
             int maxSize,
@@ -197,7 +208,12 @@ public QueryWorker acquire() {
                     throw new QueryException((byte) 0, "QuestDB handle is closed");
                 }
                 if (!available.isEmpty()) {
-                    return available.pollFirst();
+                    QueryWorker w = available.pollFirst();
+                    // Stamp a fresh lease id under the lock so the QueryLease
+                    // about to be handed out can be distinguished from any
+                    // prior, now-stale borrow of the same worker.
+                    w.bumpGeneration();
+                    return w;
                 }
                 if (all.size() + inFlightCreations < maxSize) {
                     inFlightCreations++;
@@ -248,6 +264,8 @@ public QueryWorker acquire() {
                         throw new QueryException((byte) 0, "QuestDB handle is closed");
                     }
                     all.add(created);
+                    // Stamp the first lease id for this freshly built worker.
+                    created.bumpGeneration();
                     return created;
                 }
                 if (remainingNanos <= 0) {
@@ -297,6 +315,87 @@ public void close() {
         }
     }
 
+    /**
+     * Cancels the in-flight query on {@code w} only while its lease generation
+     * still equals {@code gen}, holding the pool lock across both the check and
+     * the wire cancel. acquire() and release() bump the generation under this
+     * same lock, so once this method holds it the generation cannot change: a
+     * cancel whose lease has already gone stale (the worker was released and
+     * re-borrowed) is dropped instead of aborting the new borrower's query. The
+     * cancel itself is non-blocking -- a volatile flag plus an AtomicLong set --
+     * so the lock is held only briefly.
+     */
+    void cancelIfCurrent(QueryWorker w, long gen) {
+        lock.lock();
+        try {
+            if (closed) {
+                return;
+            }
+            if (w.generation() != gen) {
+                return;
+            }
+            w.cancelInFlight();
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    long closeQueryTimeoutMillis() {
+        return closeQueryTimeoutMillis;
+    }
+
+    void closeQueryTimeoutMillis(long millis) {
+        this.closeQueryTimeoutMillis = millis;
+    }
+
+    /**
+     * Evicts a worker whose lease {@link QueryImpl#close(long)} could not drain
+     * the in-flight query within {@link #closeQueryTimeoutMillis} (the cancel
+     * was not honored in time, or the caller was interrupted). The worker's
+     * connection is left in an unknown protocol state -- a late {@code RESULT_*}
+     * frame for the abandoned query could corrupt the next borrower's stream --
+     * so it must NOT return to the pool. Removes it from {@code all} (freeing
+     * capacity for a fresh worker) and tears it down outside the lock via
+     * {@link QueryWorker#shutdown()}, which interrupts the dispatch thread so a
+     * stuck {@code execute()} returns promptly.
+     * <p>
+     * Bails when the pool is already closed: {@link #close()} owns the teardown
+     * of every worker via its snapshot loop, so mutating {@code all} here would
+     * race that iteration on a non-thread-safe {@code ArrayList}. Also bails on a
+     * stale generation -- the worker was already released/discarded and possibly
+     * re-borrowed, so discarding it would evict a worker a different borrower now
+     * owns. Mirrors {@link SenderPool#discardBroken} on the ingest side.
+     */
+    void discard(QueryWorker w, long gen) {
+        lock.lock();
+        try {
+            if (closed) {
+                return;
+            }
+            if (w.generation() != gen) {
+                return;
+            }
+            // Invalidate the lease so a duplicate close()/release with the same
+            // generation is dropped and the in-flight handle can no longer drive
+            // this worker.
+            w.bumpGeneration();
+            all.remove(w);
+            // Capacity freed -- a waiter in acquire() may now create a fresh
+            // worker in this slot's place.
+            workerReleased.signal();
+        } finally {
+            lock.unlock();
+        }
+        // Tear down outside the lock so a slow join doesn't keep the pool
+        // latched. shutdown() is best-effort and idempotent.
+        try {
+            w.shutdown();
+        } catch (Throwable ignored) {
+            // Best-effort: a teardown Error (e.g. an -ea AssertionError) must
+            // not propagate out of Query.close().
+        }
+    }
+
     void reapIdle() {
         if (closed) {
             return;
@@ -340,14 +439,30 @@ void reapIdle() {
         }
     }
 
-    void release(QueryWorker w) {
-        long now = System.currentTimeMillis();
-        w.markIdleAt(now);
+    void release(QueryWorker w, long gen) {
         lock.lock();
         try {
             if (closed) {
                 return;
             }
+            if (w.generation() != gen) {
+                // Stale release: this lease was already returned and the worker
+                // has since been re-borrowed (or this is a duplicate close of an
+                // already-released lease). Dropping it is what makes
+                // Query.close() idempotent even under a concurrent re-borrow --
+                // without this guard a double close would enqueue the worker
+                // twice and hand it to two borrowers at once, corrupting the
+                // whole pool. The flag a stale close() reads is no longer its
+                // own lease's, so a non-validated release path could not catch
+                // this; the generation captured at borrow time can.
+                return;
+            }
+            // Invalidate the just-returned lease so a duplicate release with the
+            // same generation is also dropped and the in-flight handle can no
+            // longer drive this worker.
+            w.bumpGeneration();
+            w.markIdleAt(System.currentTimeMillis());
+            assert !available.contains(w) : "worker already present in available deque on release";
             available.addLast(w);
             workerReleased.signal();
         } finally {
@@ -355,11 +470,12 @@ void release(QueryWorker w) {
         }
     }
 
-    // Package-private white-box accessor for tests: reports the current
-    // in-flight creation count under the pool lock. A non-zero value after a
-    // failed acquire() means the slot reservation was never released -- the
-    // capacity-shrink bug this guards against.
-    int inFlightCreations() {
+    // White-box accessor for tests: reports the current in-flight creation count
+    // under the pool lock. A non-zero value after a failed acquire() means the
+    // slot reservation was never released -- the capacity-shrink bug this guards
+    // against.
+    @TestOnly
+    public int inFlightCreations() {
         lock.lock();
         try {
             return inFlightCreations;
diff --git a/core/src/main/java/io/questdb/client/impl/QueryImpl.java b/core/src/main/java/io/questdb/client/impl/QueryImpl.java
index fc80d263..baf483ea 100644
--- a/core/src/main/java/io/questdb/client/impl/QueryImpl.java
+++ b/core/src/main/java/io/questdb/client/impl/QueryImpl.java
@@ -24,8 +24,6 @@
 
 package io.questdb.client.impl;
 
-import io.questdb.client.Completion;
-import io.questdb.client.Query;
 import io.questdb.client.QueryException;
 import io.questdb.client.cutlass.qwp.client.QwpBindSetter;
 import io.questdb.client.cutlass.qwp.client.QwpBindValues;
@@ -40,39 +38,54 @@
 import java.util.concurrent.locks.ReentrantLock;
 
 /**
- * Per-thread implementation of {@link Query}. Holds the configured query
- * state (SQL, optional binds, handler), an inner {@link Completion}, and a
- * wrapping {@link QwpColumnBatchHandler} that forwards callbacks to the user
- * handler and signals the Completion on terminal events.
+ * Reusable per-{@link QueryWorker} query state: the configured SQL, optional
+ * binds, handler, terminal-event signalling, and a wrapping
+ * {@link QwpColumnBatchHandler} that forwards callbacks to the user handler and
+ * signals completion on terminal events. One instance is pre-allocated per
+ * worker in the constructor and reused across every borrow.
  * <p>
- * Lifecycle: {@link QuestDBImpl#query()} returns a per-thread instance, reset
- * to empty if it was in a terminal state. {@link #submit()} acquires a
- * worker, dispatches, and returns the cached {@link Completion}.
+ * Because the instance is shared across borrows, it must never be handed to a
+ * caller directly -- a stale reference would leak into a later borrow's
+ * lifecycle. Callers instead receive a thin, per-borrow {@link QueryLease} that
+ * carries the lease {@code generation} stamped at borrow time and passes it
+ * into every operation here. Each operation validates that generation against
+ * {@link QueryWorker#generation()}:
+ * <ul>
+ *   <li>builder/await operations on a stale generation throw
+ *       {@code IllegalStateException} ("query handle is closed"),</li>
+ *   <li>{@link #close(long)} and {@link #cancel(long)} on a stale generation are
+ *       no-ops -- this is what makes {@code Query.close()} idempotent and
+ *       prevents a stale handle from releasing, or cancelling the in-flight
+ *       query of, a worker a different borrower now owns.</li>
+ * </ul>
+ * <p>
+ * Lifecycle: {@link QueryWorker#lease()} resets this state and wraps it in a
+ * fresh {@link QueryLease} when {@link QuestDBImpl#borrowQuery()} acquires the
+ * worker. {@link #submit(long)} dispatches on the held worker (single-flight);
+ * {@link #close(long)} returns the worker to the pool.
  */
-final class QueryImpl implements Query {
+final class QueryImpl {
 
-    private final InnerCompletion completion = new InnerCompletion();
     private final Condition doneCondition;
     private final ReentrantLock doneLock = new ReentrantLock();
-    private final QueryClientPool pool;
     private final StringSink sqlBuffer = new StringSink();
+    private final QueryWorker worker;
+    private final QwpBindSetter wireBinds = this::applyBinds;
     private final WrappingHandler wrappingHandler = new WrappingHandler();
-    private volatile QueryWorker currentWorker;
     private volatile boolean done = true;
     private volatile String resultMessage;
     private volatile byte resultStatus;
     private volatile Throwable unexpectedError;
     private QwpBindSetter userBinds;
-    private final QwpBindSetter wireBinds = this::applyBinds;
     private QwpColumnBatchHandler userHandler;
 
-    QueryImpl(QueryClientPool pool) {
-        this.pool = pool;
+    QueryImpl(QueryWorker worker) {
+        this.worker = worker;
         this.doneCondition = doneLock.newCondition();
     }
 
-    @Override
-    public void abandon() {
+    void abandon(long gen) {
+        checkLive(gen);
         if (!done) {
             throw new IllegalStateException("a previous submit() is still in flight; await the Completion first");
         }
@@ -81,27 +94,113 @@ public void abandon() {
         sqlBuffer.clear();
     }
 
-    @Override
-    public Query binds(QwpBindSetter binds) {
+    void await(long gen) throws InterruptedException {
+        rejectHandlerReentry("await");
+        checkLive(gen);
+        doneLock.lock();
+        try {
+            while (!done) {
+                doneCondition.await();
+            }
+        } finally {
+            doneLock.unlock();
+        }
+        throwIfFailed();
+    }
+
+    boolean await(long gen, long timeout, TimeUnit unit) throws InterruptedException {
+        rejectHandlerReentry("await");
+        checkLive(gen);
+        long remaining = unit.toNanos(timeout);
+        doneLock.lock();
+        try {
+            while (!done) {
+                if (remaining <= 0) {
+                    return false;
+                }
+                remaining = doneCondition.awaitNanos(remaining);
+            }
+        } finally {
+            doneLock.unlock();
+        }
+        throwIfFailed();
+        return true;
+    }
+
+    void cancel(long gen) {
+        // Fast-path drop of an obviously-stale or already-finished cancel,
+        // without taking the pool lock. This is only a hint -- the
+        // authoritative re-check runs under the pool lock inside
+        // worker.cancelInFlight(gen).
+        if (gen != worker.generation() || done) {
+            return;
+        }
+        // Re-check the lease generation and issue the wire cancel atomically
+        // under the pool lock (the same lock acquire()/release() bump the
+        // generation under). An unlocked check followed by an unlocked cancel
+        // is a TOCTOU: a cross-thread watchdog can pass the check, get
+        // preempted while this lease is released and the worker re-borrowed by
+        // another caller, then resume and abort that caller's in-flight query.
+        worker.cancelInFlight(gen);
+    }
+
+    void close(long gen) {
+        rejectHandlerReentry("close");
+        // A stale generation means this lease was already released and the
+        // worker may now be owned by another borrower. Dropping the call is
+        // what keeps close() idempotent without releasing someone else's
+        // worker or cancelling their in-flight query. release() re-checks the
+        // generation under the pool lock, so the worker can never be enqueued
+        // twice even if two threads race a close on the same live lease.
+        if (gen != worker.generation()) {
+            return;
+        }
+        // If a submit is still in flight (the caller did not await, or its
+        // await timed out), cancel it and wait for the terminal event so the
+        // leased worker is idle before it returns to the pool -- otherwise the
+        // next borrower would inherit a running execute().
+        //
+        // The wait is bounded (closeQueryTimeoutMillis) and interruptible, so a
+        // caller that bounded its own await() is never pinned to the full
+        // remaining query duration here. If the query does NOT drain in time (a
+        // server slow to honor the cancel, or the caller interrupting), the
+        // worker is still running execute() on a connection whose protocol state
+        // is now uncertain -- a late RESULT_* for the abandoned query could
+        // corrupt the next borrower's stream -- so it is discarded rather than
+        // returned. The pool grows a fresh worker on the next borrow.
+        if (!done) {
+            worker.cancelInFlight(gen);
+            if (!awaitDone(worker.closeQueryTimeoutMillis())) {
+                worker.discardFromPool(gen);
+                return;
+            }
+        }
+        worker.releaseToPool(gen);
+    }
+
+    boolean isDone(long gen) {
+        checkLive(gen);
+        return done;
+    }
+
+    void setBinds(long gen, QwpBindSetter binds) {
+        checkLive(gen);
         this.userBinds = binds;
-        return this;
     }
 
-    @Override
-    public Query handler(QwpColumnBatchHandler handler) {
+    void setHandler(long gen, QwpColumnBatchHandler handler) {
+        checkLive(gen);
         this.userHandler = handler;
-        return this;
     }
 
-    @Override
-    public Query sql(CharSequence sql) {
+    void setSql(long gen, CharSequence sql) {
+        checkLive(gen);
         sqlBuffer.clear();
         sqlBuffer.put(sql);
-        return this;
     }
 
-    @Override
-    public Completion submit() {
+    void submit(long gen) {
+        checkLive(gen);
         if (sqlBuffer.length() == 0) {
             throw new IllegalStateException("sql is required");
         }
@@ -111,7 +210,6 @@ public Completion submit() {
         if (!done) {
             throw new IllegalStateException("a previous submit() is still in flight; await the Completion first");
         }
-        QueryWorker w = pool.acquire();
         // Reset terminal state under the lock so a stale signal from a prior
         // run can't be observed by the upcoming await().
         doneLock.lock();
@@ -120,12 +218,10 @@ public Completion submit() {
             resultStatus = 0;
             resultMessage = null;
             unexpectedError = null;
-            currentWorker = w;
         } finally {
             doneLock.unlock();
         }
-        w.dispatch(this);
-        return completion;
+        worker.dispatch(this);
     }
 
     private void applyBinds(QwpBindValues binds) {
@@ -135,6 +231,56 @@ private void applyBinds(QwpBindValues binds) {
         }
     }
 
+    /**
+     * Waits up to {@code timeoutMillis} for the in-flight query's terminal
+     * event. Returns {@code true} once {@code done} is set, {@code false} on
+     * timeout or interrupt. Unlike an uninterruptible drain, an interrupt aborts
+     * the wait and re-raises the thread's interrupt flag, so {@code close()}
+     * stays responsive to a caller that wants to give up.
+     */
+    private boolean awaitDone(long timeoutMillis) {
+        long remaining = TimeUnit.MILLISECONDS.toNanos(timeoutMillis);
+        doneLock.lock();
+        try {
+            while (!done) {
+                if (remaining <= 0) {
+                    return false;
+                }
+                try {
+                    remaining = doneCondition.awaitNanos(remaining);
+                } catch (InterruptedException e) {
+                    Thread.currentThread().interrupt();
+                    return false;
+                }
+            }
+            return true;
+        } finally {
+            doneLock.unlock();
+        }
+    }
+
+    private void checkLive(long gen) {
+        if (gen != worker.generation()) {
+            throw new IllegalStateException("query handle is not borrowed (closed or never leased)");
+        }
+    }
+
+    private void rejectHandlerReentry(String op) {
+        // Result handlers (onBatch/onEnd/onError) run inline on the worker's
+        // dispatch thread. A blocking lease op called from there would wait for
+        // a terminal event that only this same thread can deliver -- a
+        // permanent, uninterruptible self-deadlock plus a leaked worker. Fail
+        // loudly at the call site instead. cancel() is the non-blocking stop.
+        if (worker.isCurrentThreadWorker()) {
+            throw new IllegalStateException(
+                    op + "() must not be called from a result handler. Handlers "
+                            + "(onBatch/onEnd/onError) run on the worker thread, so " + op
+                            + "() would block forever waiting for a terminal event that only "
+                            + "this same thread can deliver. To stop a query from inside a "
+                            + "handler, call cancel() (non-blocking).");
+        }
+    }
+
     private void signalDone(byte status, String message, Throwable unexpected) {
         doneLock.lock();
         try {
@@ -145,27 +291,38 @@ private void signalDone(byte status, String message, Throwable unexpected) {
             this.resultMessage = message;
             this.unexpectedError = unexpected;
             this.done = true;
-            this.currentWorker = null;
             doneCondition.signalAll();
         } finally {
             doneLock.unlock();
         }
     }
 
+    private void throwIfFailed() {
+        Throwable unexpected = unexpectedError;
+        if (unexpected != null) {
+            throw new QueryException(resultStatus, resultMessage, unexpected);
+        }
+        if (resultStatus != 0) {
+            throw new QueryException(resultStatus, resultMessage);
+        }
+    }
+
     /**
-     * Drops any prior builder state (SQL, binds, handler) if no submit is
-     * currently in flight. {@link QuestDBImpl#query()} invokes this before
-     * returning the per-thread instance so callers see the "reset to empty"
-     * contract documented on {@link io.questdb.client.Query} regardless of
-     * whether the previous use ended at a terminal handler callback or at
-     * {@link #abandon()}.
+     * Resets builder and terminal state to empty. Called by
+     * {@link QueryWorker#lease()} when {@link QuestDBImpl#borrowQuery()} hands a
+     * freshly stamped {@link QueryLease} out, so each borrow starts from the
+     * documented "reset to empty" contract on {@link io.questdb.client.Query}.
+     * The leased worker is idle at this point (just acquired from the pool), so
+     * the reset is unconditional.
      */
-    void resetIfDone() {
-        if (done) {
-            userBinds = null;
-            userHandler = null;
-            sqlBuffer.clear();
-        }
+    void resetForBorrow() {
+        userBinds = null;
+        userHandler = null;
+        sqlBuffer.clear();
+        resultStatus = 0;
+        resultMessage = null;
+        unexpectedError = null;
+        done = true;
     }
 
     void runOn(QwpQueryClient client) {
@@ -185,63 +342,6 @@ void signalUnexpected(Throwable t) {
         signalDone((byte) 0, t.getMessage() != null ? t.getMessage() : t.getClass().getSimpleName(), t);
     }
 
-    private final class InnerCompletion implements Completion {
-
-        @Override
-        public void await() throws InterruptedException {
-            doneLock.lock();
-            try {
-                while (!done) {
-                    doneCondition.await();
-                }
-            } finally {
-                doneLock.unlock();
-            }
-            throwIfFailed();
-        }
-
-        @Override
-        public boolean await(long timeout, TimeUnit unit) throws InterruptedException {
-            long remaining = unit.toNanos(timeout);
-            doneLock.lock();
-            try {
-                while (!done) {
-                    if (remaining <= 0) {
-                        return false;
-                    }
-                    remaining = doneCondition.awaitNanos(remaining);
-                }
-            } finally {
-                doneLock.unlock();
-            }
-            throwIfFailed();
-            return true;
-        }
-
-        @Override
-        public void cancel() {
-            QueryWorker w = currentWorker;
-            if (w != null && !done) {
-                w.cancelInFlight();
-            }
-        }
-
-        @Override
-        public boolean isDone() {
-            return done;
-        }
-
-        private void throwIfFailed() {
-            Throwable unexpected = unexpectedError;
-            if (unexpected != null) {
-                throw new QueryException(resultStatus, resultMessage, unexpected);
-            }
-            if (resultStatus != 0) {
-                throw new QueryException(resultStatus, resultMessage);
-            }
-        }
-    }
-
     private final class WrappingHandler implements QwpColumnBatchHandler {
 
         @Override
diff --git a/core/src/main/java/io/questdb/client/impl/QueryLease.java b/core/src/main/java/io/questdb/client/impl/QueryLease.java
new file mode 100644
index 00000000..6083b802
--- /dev/null
+++ b/core/src/main/java/io/questdb/client/impl/QueryLease.java
@@ -0,0 +1,110 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.impl;
+
+import io.questdb.client.Completion;
+import io.questdb.client.Query;
+import io.questdb.client.cutlass.qwp.client.QwpBindSetter;
+import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Thin per-borrow handle returned by {@link QuestDBImpl#borrowQuery()}. A fresh
+ * instance is created on every borrow, capturing the immutable lease
+ * {@code generation} stamped by {@link QueryClientPool#acquire()}; it delegates
+ * every {@link Query} and {@link Completion} operation to the worker's reused
+ * {@link QueryImpl}, threading that generation through so a stale handle cannot
+ * disturb a later borrow on the same worker (see {@link QueryImpl}).
+ * <p>
+ * It implements {@link Completion} as well as {@link Query} so {@link #submit()}
+ * can return {@code this} -- the per-submit path stays allocation-free, and the
+ * single small allocation happens once per borrow (and is routinely
+ * scalar-replaced by the JIT in the common try-with-resources case).
+ */
+final class QueryLease implements Query, Completion {
+
+    private final long generation;
+    private final QueryImpl impl;
+
+    QueryLease(QueryImpl impl, long generation) {
+        this.impl = impl;
+        this.generation = generation;
+    }
+
+    @Override
+    public void abandon() {
+        impl.abandon(generation);
+    }
+
+    @Override
+    public void await() throws InterruptedException {
+        impl.await(generation);
+    }
+
+    @Override
+    public boolean await(long timeout, TimeUnit unit) throws InterruptedException {
+        return impl.await(generation, timeout, unit);
+    }
+
+    @Override
+    public Query binds(QwpBindSetter binds) {
+        impl.setBinds(generation, binds);
+        return this;
+    }
+
+    @Override
+    public void cancel() {
+        impl.cancel(generation);
+    }
+
+    @Override
+    public void close() {
+        impl.close(generation);
+    }
+
+    @Override
+    public Query handler(QwpColumnBatchHandler handler) {
+        impl.setHandler(generation, handler);
+        return this;
+    }
+
+    @Override
+    public boolean isDone() {
+        return impl.isDone(generation);
+    }
+
+    @Override
+    public Query sql(CharSequence sql) {
+        impl.setSql(generation, sql);
+        return this;
+    }
+
+    @Override
+    public Completion submit() {
+        impl.submit(generation);
+        return this;
+    }
+}
diff --git a/core/src/main/java/io/questdb/client/impl/QueryWorker.java b/core/src/main/java/io/questdb/client/impl/QueryWorker.java
index f4f641c8..b3c6a32c 100644
--- a/core/src/main/java/io/questdb/client/impl/QueryWorker.java
+++ b/core/src/main/java/io/questdb/client/impl/QueryWorker.java
@@ -24,6 +24,7 @@
 
 package io.questdb.client.impl;
 
+import io.questdb.client.Query;
 import io.questdb.client.QueryException;
 import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
 
@@ -39,7 +40,11 @@
  * The pooled query client's own I/O thread continues to drive the wire; the
  * worker thread exists only to keep {@code execute()} off the application's
  * submitting thread. Handler callbacks ({@code onBatch}, {@code onEnd},
- * {@code onError}) still run on the client's I/O thread.
+ * {@code onError}) run on this worker's own dispatch thread, which consumes the
+ * I/O thread's event queue inline -- not on the I/O thread itself. A handler
+ * must therefore never call the lease's blocking {@code close()}/{@code await()}
+ * (it would self-deadlock waiting for a terminal event only this thread can
+ * deliver); use the non-blocking {@code cancel()} to stop from inside a handler.
  */
 public final class QueryWorker {
 
@@ -47,16 +52,27 @@ public final class QueryWorker {
     private final QwpQueryClient client;
     private final long createdAtMillis;
     private final QueryClientPool pool;
+    private final QueryImpl query;
     private final Condition signalCondition;
     private final ReentrantLock signalLock = new ReentrantLock();
     private final Thread thread;
     private volatile QueryImpl current;
+    // Monotonic lease id. Mutated only under the QueryClientPool lock
+    // (bumped once in acquire() when the worker is handed out and once in
+    // release() when it is returned), so successive borrows of the same
+    // worker get distinct ids. A QueryLease captures the value live during
+    // its borrow; once the worker is released or re-borrowed the captured id
+    // no longer matches, which is how a stale handle's close()/cancel()/
+    // submit() are detected and dropped. Volatile so a stale handle on another
+    // thread observes the latest value without taking the pool lock.
+    private volatile long generation;
     private volatile long idleSinceMillis;
     private volatile boolean shuttingDown;
 
     public QueryWorker(QwpQueryClient client, QueryClientPool pool, int slotIndex) {
         this.client = client;
         this.pool = pool;
+        this.query = new QueryImpl(this);
         this.signalCondition = signalLock.newCondition();
         this.thread = new Thread(this::runLoop, "questdb-query-worker-" + slotIndex);
         this.thread.setDaemon(true);
@@ -68,17 +84,48 @@ long createdAtMillis() {
         return createdAtMillis;
     }
 
+    /**
+     * Advances the lease generation. Called by {@link QueryClientPool} under
+     * the pool lock when this worker is handed out (acquire) and when it is
+     * returned (release).
+     */
+    void bumpGeneration() {
+        generation++;
+    }
+
+    /**
+     * Current lease generation. See {@link #generation} for the visibility and
+     * mutation contract.
+     */
+    long generation() {
+        return generation;
+    }
+
     long idleSinceMillis() {
         return idleSinceMillis;
     }
 
+    /**
+     * True when the calling thread is this worker's own dispatch thread -- i.e.
+     * a reentrant call from inside a result handler, which runs inline on this
+     * thread. Blocking lease operations ({@link QueryImpl#close}/
+     * {@link QueryImpl#await}) use this to fail loudly instead of
+     * self-deadlocking.
+     */
+    boolean isCurrentThreadWorker() {
+        return Thread.currentThread() == thread;
+    }
+
     void markIdleAt(long nowMillis) {
         idleSinceMillis = nowMillis;
     }
 
     /**
-     * Cancels the in-flight query on this worker's client. Safe to call from
-     * any thread; harmless if the worker is idle.
+     * Issues an unconditional wire cancel against whatever query this worker's
+     * client is currently running. Callers must already own the worker for the
+     * current lease -- in practice this runs under the pool lock via
+     * {@link QueryClientPool#cancelIfCurrent}, which validates the lease
+     * generation first. Lease code must use {@link #cancelInFlight(long)}.
      */
     void cancelInFlight() {
         try {
@@ -88,6 +135,18 @@ void cancelInFlight() {
         }
     }
 
+    /**
+     * Cancels the in-flight query only if this worker's lease generation still
+     * equals {@code gen}. Delegates to the pool so the generation re-check and
+     * the wire cancel happen together under the pool lock that
+     * {@link QueryClientPool#acquire} and {@link QueryClientPool#release} bump
+     * the generation under. That atomicity stops a stale cross-thread cancel
+     * from aborting a later borrower's query on the same worker.
+     */
+    void cancelInFlight(long gen) {
+        pool.cancelIfCurrent(this, gen);
+    }
+
     /**
      * Returns the {@link QwpQueryClient} this worker drives. Exposed for
      * introspection and tests; callers must not invoke {@code execute()} on
@@ -97,6 +156,44 @@ public QwpQueryClient client() {
         return client;
     }
 
+    /**
+     * Resets the worker's reused {@link QueryImpl} and returns a fresh
+     * {@link QueryLease} stamped with the current lease {@link #generation}.
+     * Called by {@link QuestDBImpl#borrowQuery()} right after
+     * {@link QueryClientPool#acquire()} hands this worker out (which bumped the
+     * generation under the pool lock). The lease is a small per-borrow handle;
+     * the heavy state stays on the reused {@link QueryImpl}, and the per-submit
+     * path remains allocation-free.
+     */
+    Query lease() {
+        query.resetForBorrow();
+        return new QueryLease(query, generation);
+    }
+
+    long closeQueryTimeoutMillis() {
+        return pool.closeQueryTimeoutMillis();
+    }
+
+    /**
+     * Discards this worker from the pool instead of returning it. Called by
+     * {@link QueryImpl#close(long)} when the in-flight query could not be
+     * drained within the close budget, leaving the connection in an unknown
+     * protocol state. The captured lease {@code gen} lets the pool reject a
+     * stale discard whose worker has already been re-borrowed.
+     */
+    void discardFromPool(long gen) {
+        pool.discard(this, gen);
+    }
+
+    /**
+     * Returns this worker to the pool. Called by {@link QueryImpl#close(long)}
+     * when the borrowed lease is released; the captured lease {@code gen} lets
+     * the pool reject a stale release whose worker has already been re-borrowed.
+     */
+    void releaseToPool(long gen) {
+        pool.release(this, gen);
+    }
+
     void shutdown() {
         shuttingDown = true;
         signalLock.lock();
@@ -106,10 +203,19 @@ void shutdown() {
             signalLock.unlock();
         }
         try {
-            // If a query is in flight on this worker, ask the client to abort so
-            // execute() returns promptly and the thread can exit before join
-            // times out. cancel() is documented as thread-safe and is a no-op
-            // when idle.
+            // If a query is in flight on this worker, force execute() to return
+            // promptly so the dispatch thread exits before the join below times
+            // out. Two nudges, strongest first:
+            //   1. Interrupt the dispatch thread. takeEvent() (QwpSpscQueue.take)
+            //      is interrupt-aware, and executeOnce() turns the resulting
+            //      InterruptedException into a terminal event -> signalDone. This
+            //      releases a caller parked in Query.close() even when the I/O
+            //      thread is wedged and client.close()'s synthetic terminal
+            //      (closePool()) never runs -- the race that would otherwise
+            //      strand the caller forever.
+            //   2. Ask the client to cancel on the wire so the server stops work.
+            //      Best-effort and a no-op when idle.
+            thread.interrupt();
             try {
                 client.cancel();
             } catch (Throwable ignored) {
@@ -140,8 +246,10 @@ void start() {
     }
 
     /**
-     * Hands a configured {@link QueryImpl} to this worker. The caller must
-     * have just acquired this worker via QueryClientPool#acquire(long).
+     * Hands a configured {@link QueryImpl} to this worker for execution. The
+     * worker is held by an open {@link io.questdb.client.Query} lease (see
+     * {@link #lease()}), so a lease may dispatch repeatedly (single-flight)
+     * until it is closed.
      */
     void dispatch(QueryImpl q) {
         signalLock.lock();
@@ -181,6 +289,17 @@ private void runLoop() {
                     return;
                 }
                 q = current;
+                // Clear the hand-off slot under signalLock, at the moment of
+                // consumption -- NOT after runOn() returns. A lease is
+                // single-flight but reused: the user thread loops submit() ->
+                // await() on the same handle. The terminal callback inside
+                // runOn() wakes the user thread, which can call submit() ->
+                // dispatch() (current = q; signal) before this worker thread
+                // returns from runOn(). Clearing current after runOn() would
+                // race that dispatch, clobber the freshly-set job, drop its
+                // already-consumed signal, and park the worker forever while
+                // the user thread waits on a Completion that never fires.
+                current = null;
             } finally {
                 signalLock.unlock();
             }
@@ -188,9 +307,6 @@ private void runLoop() {
                 q.runOn(client);
             } catch (Throwable t) {
                 q.signalUnexpected(t);
-            } finally {
-                current = null;
-                pool.release(this);
             }
         }
     }
diff --git a/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java b/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java
index 5bba8d46..4e72237d 100644
--- a/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java
+++ b/core/src/main/java/io/questdb/client/impl/QuestDBImpl.java
@@ -24,27 +24,30 @@
 
 package io.questdb.client.impl;
 
-import io.questdb.client.Completion;
 import io.questdb.client.QuestDB;
 import io.questdb.client.Query;
 import io.questdb.client.Sender;
-import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler;
+import io.questdb.client.SenderConnectionListener;
+import io.questdb.client.SenderErrorHandler;
 import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
+import org.jetbrains.annotations.TestOnly;
 
 import java.util.function.Consumer;
 import java.util.function.IntFunction;
 
 /**
- * Implementation of {@link QuestDB}. Owns the elastic {@link SenderPool}
- * and {@link QueryClientPool}, a {@link PoolHousekeeper} that reaps idle
- * slots, and a {@link ThreadLocal} of {@link QueryImpl} instances so that
- * {@link #query()} is allocation-free after the first call on each thread.
+ * Implementation of {@link QuestDB}. Owns the elastic {@link SenderPool} and
+ * {@link QueryClientPool} and a {@link PoolHousekeeper} that reaps idle slots.
+ * {@link #borrowQuery()} leases a pooled {@link QueryWorker} and hands back a
+ * thin {@link QueryLease} over its reused {@link QueryImpl}; the heavy per-query
+ * state is pre-allocated on the worker and the per-submit path is
+ * allocation-free, so only the small lease handle is created per borrow (and is
+ * routinely scalar-replaced by the JIT in the try-with-resources case).
  */
 public final class QuestDBImpl implements QuestDB {
 
     private final PoolHousekeeper housekeeper;
     private final QueryClientPool queryPool;
-    private final ThreadLocal<QueryImpl> queryThreadLocal;
     private final SenderPool senderPool;
     private volatile boolean closed;
 
@@ -58,20 +61,24 @@ public QuestDBImpl(
             long acquireTimeoutMillis,
             long idleTimeoutMillis,
             long maxLifetimeMillis,
-            long housekeeperIntervalMillis
+            long housekeeperIntervalMillis,
+            long queryCloseTimeoutMillis,
+            SenderErrorHandler errorHandler,
+            SenderConnectionListener connectionListener
     ) {
         this(ingestConfig, queryConfig, senderMin, senderMax, queryMin, queryMax,
                 acquireTimeoutMillis, idleTimeoutMillis, maxLifetimeMillis,
-                housekeeperIntervalMillis, null, null);
+                housekeeperIntervalMillis, queryCloseTimeoutMillis, null, null, errorHandler, connectionListener);
     }
 
-    // Package-private constructor exposing the senderFactory and connectHook test
-    // seams: production passes null for both (-> the real native build/connect
-    // paths). White-box tests in io.questdb.client.test.impl reach this by
-    // reflection (the main module is declared `open`) to make SenderPool prewarm
-    // an observable delegate while QueryClientPool construction throws an Error,
+    // Test-only constructor exposing the senderFactory and connectHook seams:
+    // production uses the public overload above, which passes null for both ->
+    // the real native build/connect paths. White-box error-safety tests in
+    // io.questdb.client.test.impl call this to make SenderPool prewarm an
+    // observable delegate while QueryClientPool construction throws an Error,
     // exercising the cleanup catch below.
-    QuestDBImpl(
+    @TestOnly
+    public QuestDBImpl(
             String ingestConfig,
             String queryConfig,
             int senderMin,
@@ -84,6 +91,33 @@ public QuestDBImpl(
             long housekeeperIntervalMillis,
             IntFunction<Sender> senderFactory,
             Consumer<QwpQueryClient> connectHook
+    ) {
+        this(ingestConfig, queryConfig, senderMin, senderMax, queryMin, queryMax,
+                acquireTimeoutMillis, idleTimeoutMillis, maxLifetimeMillis,
+                housekeeperIntervalMillis, QueryClientPool.DEFAULT_CLOSE_QUERY_TIMEOUT_MILLIS,
+                senderFactory, connectHook, null, null);
+    }
+
+    // Full constructor adding the ingest-side errorHandler/connectionListener,
+    // applied by SenderPool to every Sender it builds. The 12-arg overload above
+    // is the unchanged white-box test seam and delegates here with null
+    // callbacks; the public overload delegates here with null test seams.
+    QuestDBImpl(
+            String ingestConfig,
+            String queryConfig,
+            int senderMin,
+            int senderMax,
+            int queryMin,
+            int queryMax,
+            long acquireTimeoutMillis,
+            long idleTimeoutMillis,
+            long maxLifetimeMillis,
+            long housekeeperIntervalMillis,
+            long queryCloseTimeoutMillis,
+            IntFunction<Sender> senderFactory,
+            Consumer<QwpQueryClient> connectHook,
+            SenderErrorHandler errorHandler,
+            SenderConnectionListener connectionListener
     ) {
         SenderPool builtSenderPool = null;
         QueryClientPool builtQueryPool = null;
@@ -95,10 +129,12 @@ public QuestDBImpl(
                     // Defer SF startup recovery to the PoolHousekeeper thread so
                     // build() never blocks on a slow / reachable-but-not-acking
                     // server; the housekeeper drives it via runStartupRecoveryStep().
-                    true);
+                    true,
+                    errorHandler, connectionListener);
             builtQueryPool = new QueryClientPool(
                     queryConfig, queryMin, queryMax, acquireTimeoutMillis,
                     idleTimeoutMillis, maxLifetimeMillis, connectHook);
+            builtQueryPool.closeQueryTimeoutMillis(queryCloseTimeoutMillis);
             builtHousekeeper = new PoolHousekeeper(builtSenderPool, builtQueryPool, housekeeperIntervalMillis);
             builtHousekeeper.start();
         } catch (Throwable e) {
@@ -128,7 +164,11 @@ public QuestDBImpl(
         this.senderPool = builtSenderPool;
         this.queryPool = builtQueryPool;
         this.housekeeper = builtHousekeeper;
-        this.queryThreadLocal = ThreadLocal.withInitial(() -> new QueryImpl(queryPool));
+    }
+
+    @Override
+    public Query borrowQuery() {
+        return queryPool.acquire().lease();
     }
 
     @Override
@@ -182,30 +222,4 @@ private static void closeQuietly(AutoCloseable closeable) {
         }
     }
 
-    @Override
-    public Completion executeSql(CharSequence sql, QwpColumnBatchHandler handler) {
-        return query().sql(sql).handler(handler).submit();
-    }
-
-    @Override
-    public Query newQuery() {
-        return new QueryImpl(queryPool);
-    }
-
-    @Override
-    public Query query() {
-        QueryImpl q = queryThreadLocal.get();
-        q.resetIfDone();
-        return q;
-    }
-
-    @Override
-    public void releaseSender() {
-        senderPool.releaseCurrentThread();
-    }
-
-    @Override
-    public Sender sender() {
-        return senderPool.pinToCurrentThread();
-    }
 }
diff --git a/core/src/main/java/io/questdb/client/impl/SenderPool.java b/core/src/main/java/io/questdb/client/impl/SenderPool.java
index 8c9fda7a..b971d1e2 100644
--- a/core/src/main/java/io/questdb/client/impl/SenderPool.java
+++ b/core/src/main/java/io/questdb/client/impl/SenderPool.java
@@ -25,11 +25,14 @@
 package io.questdb.client.impl;
 
 import io.questdb.client.Sender;
+import io.questdb.client.SenderConnectionListener;
+import io.questdb.client.SenderErrorHandler;
 import io.questdb.client.cutlass.line.LineSenderException;
 import io.questdb.client.cutlass.qwp.client.QwpWebSocketSender;
 import io.questdb.client.cutlass.qwp.client.sf.cursor.OrphanScanner;
 import io.questdb.client.std.Files;
 import io.questdb.client.std.IntList;
+import org.jetbrains.annotations.TestOnly;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -93,9 +96,13 @@ public final class SenderPool implements AutoCloseable {
     // transport has no application-level connect timeout to clamp it.
     private static final long RECOVERY_DRAIN_BUDGET_MILLIS = 1_000;
     private final long acquireTimeoutMillis;
-    private final ArrayList<PooledSender> all;
-    private final ArrayDeque<PooledSender> available;
+    private final ArrayList<SenderSlot> all;
+    private final ArrayDeque<SenderSlot> available;
     private final String configurationString;
+    // User-supplied ingest callbacks, shared across every pooled Sender this
+    // pool builds. Null -> each sender keeps its loud-not-silent default.
+    private final SenderConnectionListener connectionListener;
+    private final SenderErrorHandler errorHandler;
     private final long idleTimeoutMillis;
     // Test seam. Production builds delegates via defaultSender(); white-box
     // tests in io.questdb.client.test.impl reach the package-private
@@ -132,7 +139,6 @@ public final class SenderPool implements AutoCloseable {
     private final Condition slotReleased;
     // True iff the configuration enables store-and-forward (sf_dir set).
     private final boolean storeAndForward;
-    private final ThreadLocal<PooledSender> threadAffine = new ThreadLocal<>();
     // Slots removed from `all` whose delegate is still releasing its flock.
     // They keep reserving capacity (and their slotInUse mark) until the
     // flock drops, so the cap check and the slot allocator stay consistent
@@ -189,16 +195,17 @@ public SenderPool(
             long maxLifetimeMillis
     ) {
         this(configurationString, minSize, maxSize, acquireTimeoutMillis,
-                idleTimeoutMillis, maxLifetimeMillis, null);
+                idleTimeoutMillis, maxLifetimeMillis, null, false, null, null);
     }
 
-    // Package-private constructor exposing the senderFactory test seam:
-    // production passes null (-> the real defaultSender()). White-box tests in
-    // io.questdb.client.test.impl reach this by reflection to inject a factory
-    // that throws a non-RuntimeException Throwable mid-prewarm. Recovery runs
-    // inline here (deferStartupRecovery=false); the pooled QuestDB handle uses
-    // the 8-arg overload to defer it to the housekeeper thread.
-    SenderPool(
+    // Test-only constructor exposing the senderFactory seam: production builds
+    // via the full constructor below (senderFactory null -> the real
+    // defaultSender()). White-box tests inject a factory that throws a
+    // non-RuntimeException Throwable mid-prewarm. Recovery runs inline here
+    // (deferStartupRecovery=false); the pooled QuestDB handle uses the 8-arg
+    // overload to defer it to the housekeeper thread.
+    @TestOnly
+    public SenderPool(
             String configurationString,
             int minSize,
             int maxSize,
@@ -211,14 +218,16 @@ public SenderPool(
                 idleTimeoutMillis, maxLifetimeMillis, senderFactory, false);
     }
 
-    // Full constructor. deferStartupRecovery=true skips the inline,
-    // construction-time SF recovery (recoverOneSlotStep) so
-    // QuestDB.build() never blocks on a slow or reachable-but-not-acking
-    // server; the owner (QuestDBImpl) then drives recovery one slot per tick on
-    // the PoolHousekeeper thread via runStartupRecoveryStep(). The in-range
-    // recovery pass is concurrency-safe against borrow()/return on that
+    // Test-only constructor adding the deferStartupRecovery toggle.
+    // deferStartupRecovery=true skips the inline, construction-time SF recovery
+    // (recoverOneSlotStep) so QuestDB.build() never blocks on a slow or
+    // reachable-but-not-acking server; the owner (QuestDBImpl) then drives
+    // recovery one slot per tick on the PoolHousekeeper thread via
+    // runStartupRecoveryStep(). White-box SF tests call this directly; the
+    // in-range recovery pass is concurrency-safe against borrow()/return on the
     // deferred path -- see recoverOneSlotStep().
-    SenderPool(
+    @TestOnly
+    public SenderPool(
             String configurationString,
             int minSize,
             int maxSize,
@@ -227,10 +236,34 @@ public SenderPool(
             long maxLifetimeMillis,
             IntFunction<Sender> senderFactory,
             boolean deferStartupRecovery
+    ) {
+        this(configurationString, minSize, maxSize, acquireTimeoutMillis,
+                idleTimeoutMillis, maxLifetimeMillis, senderFactory,
+                deferStartupRecovery, null, null);
+    }
+
+    // Full constructor adding the user-supplied ingest callbacks (error handler
+    // and connection listener), applied to every Sender the pool builds (see
+    // buildManagedSlotSender). The public 6-arg ctor and the test-only
+    // senderFactory overloads above both delegate here with null callbacks; the
+    // pooled QuestDB handle calls this directly.
+    SenderPool(
+            String configurationString,
+            int minSize,
+            int maxSize,
+            long acquireTimeoutMillis,
+            long idleTimeoutMillis,
+            long maxLifetimeMillis,
+            IntFunction<Sender> senderFactory,
+            boolean deferStartupRecovery,
+            SenderErrorHandler errorHandler,
+            SenderConnectionListener connectionListener
     ) {
         if (minSize < 0 || maxSize < 1 || minSize > maxSize) {
             throw new IllegalArgumentException("invalid pool sizing: min=" + minSize + ", max=" + maxSize);
         }
+        this.errorHandler = errorHandler;
+        this.connectionListener = connectionListener;
         this.senderFactory = senderFactory != null ? senderFactory : this::defaultSender;
         // An injected factory (tests) drives recovery too, preserving the
         // white-box recovery seam; production recovery forces OFF-mode connects
@@ -262,7 +295,7 @@ public SenderPool(
                 if (storeAndForward) {
                     slotInUse[i] = true;
                 }
-                PooledSender ps = createUnlocked(storeAndForward ? i : -1);
+                SenderSlot ps = createUnlocked(storeAndForward ? i : -1);
                 all.add(ps);
                 available.add(ps);
                 built++;
@@ -571,7 +604,7 @@ private boolean drainCandidateSlotForRecovery(int slotIndex, String slotPath,
         // createRecoverer() takes the slot flock on <base>-slotIndex, and
         // delegate().close() can early-return with the I/O thread still running
         // (flock still held).
-        PooledSender recoverer = null;
+        SenderSlot recoverer = null;
         boolean stopScan = false;
         try {
             if (!OrphanScanner.isCandidateOrphan(slotPath)) {
@@ -597,7 +630,7 @@ private boolean drainCandidateSlotForRecovery(int slotIndex, String slotPath,
                 // on a timeout: a server that fails to ack within the budget
                 // will very likely do the same for every remaining slot -- the
                 // same reasoning as the build-failure case above.
-                if (!recoverer.drain(remainingMillis)) {
+                if (!recoverer.delegate().drain(remainingMillis)) {
                     LOG.warn("startup SF recovery: drain did not ack slot {} "
                             + "within {}ms; skipping remaining slots",
                             slotPath, remainingMillis);
@@ -636,9 +669,12 @@ public PooledSender borrow() {
                     throw new LineSenderException("QuestDB handle is closed");
                 }
                 if (!available.isEmpty()) {
-                    PooledSender s = available.pollFirst();
-                    s.markInUse();
-                    return s;
+                    SenderSlot s = available.pollFirst();
+                    // Stamp a fresh lease id under the lock so the PooledSender
+                    // wrapper handed out can be told apart from any prior,
+                    // now-stale borrow of the same slot.
+                    s.bumpGeneration();
+                    return new PooledSender(s, s.generation());
                 }
                 if (all.size() + inFlightCreations + closingSlots + leakedSlots + recoveringSlots < maxSize) {
                     inFlightCreations++;
@@ -647,7 +683,7 @@ public PooledSender borrow() {
                     // SF is off (no per-slot identity needed).
                     int slotIndex = storeAndForward ? allocateSlotIndex() : -1;
                     lock.unlock();
-                    PooledSender created;
+                    SenderSlot created;
                     try {
                         created = createUnlocked(slotIndex);
                     } catch (Throwable e) {
@@ -685,8 +721,8 @@ public PooledSender borrow() {
                         throw new LineSenderException("QuestDB handle is closed");
                     }
                     all.add(created);
-                    created.markInUse();
-                    return created;
+                    created.bumpGeneration();
+                    return new PooledSender(created, created.generation());
                 }
                 if (remainingNanos <= 0) {
                     throw new LineSenderException(
@@ -721,7 +757,7 @@ void markClosing() {
 
     @Override
     public void close() {
-        PooledSender[] snapshot;
+        SenderSlot[] snapshot;
         lock.lock();
         try {
             if (closeStarted) {
@@ -731,22 +767,13 @@ public void close() {
             // Raise the shutdown signal too (a direct, non-pooled caller may
             // close() without a prior markClosing()); harmless if already set.
             closed = true;
-            // Mark every pooled wrapper invalidated so pinToCurrentThread()
-            // on other threads -- which never takes this lock -- can detect
-            // that its cached entry no longer wraps a live delegate. Removing
-            // the calling thread's ThreadLocal only clears one slot; other
-            // threads' slots survive until they read the flag.
-            for (int i = 0; i < all.size(); i++) {
-                all.get(i).markInvalidated();
-            }
             // Snapshot under the lock so the delegate-close loop below is
             // immune to concurrent mutation of `all`. discardBroken running
             // on another thread can still bail thanks to the `closed` check
             // it now performs; the snapshot is belt-and-braces for any
             // future code path that mutates `all` outside this lock's
             // happens-before chain.
-            snapshot = all.toArray(new PooledSender[0]);
-            threadAffine.remove();
+            snapshot = all.toArray(new SenderSlot[0]);
             slotReleased.signalAll();
         } finally {
             lock.unlock();
@@ -763,27 +790,11 @@ public void close() {
         }
     }
 
-    /**
-     * Clears the current thread's pin if it currently references {@code s}.
-     * Invoked from {@link PooledSender#close()} before the wrapper is
-     * returned to the pool, so a subsequent {@link #pinToCurrentThread()}
-     * on this thread cannot hand the wrapper back after another consumer
-     * has borrowed the slot. No-op when the caller never pinned, or pinned
-     * a different wrapper.
-     */
-    void clearPinIfCurrent(PooledSender s) {
-        if (threadAffine.get() == s) {
-            threadAffine.remove();
-        }
-    }
-
     /**
      * Evicts a slot whose delegate has failed (typically a {@code flush()}
-     * failure observed in {@link PooledSender#close()}). The wrapper is
-     * marked invalidated so any thread-pinned reference gets rejected on the
-     * next {@link #pinToCurrentThread()} call; the slot is removed from
-     * {@code all} so the pool can grow back into a fresh slot on demand. The
-     * underlying delegate is closed outside the lock so a slow real-close
+     * failure observed in {@link PooledSender#close()}). The slot is removed
+     * from {@code all} so the pool can grow back into a fresh slot on demand.
+     * The underlying delegate is closed outside the lock so a slow real-close
      * does not stall other borrowers.
      * <p>
      * Bails when the pool is already closed: {@link #close()} owns the
@@ -792,14 +803,22 @@ void clearPinIfCurrent(PooledSender s) {
      * {@code ArrayList} and the {@code delegate.close()} below would be a
      * double-close on a delegate {@code close()} has already shut down.
      */
-    void discardBroken(PooledSender s) {
-        s.markInvalidated();
+    void discardBroken(PooledSender ps) {
+        SenderSlot s = ps.slot();
+        long gen = ps.generation();
         boolean reserved = false;
         lock.lock();
         try {
             if (closed) {
                 return;
             }
+            if (s.generation() != gen) {
+                // Stale discard: the slot was already returned/discarded and
+                // possibly re-borrowed. Dropping it avoids evicting a slot a
+                // different borrower now owns and double-closing its delegate.
+                return;
+            }
+            s.bumpGeneration();
             boolean removed = all.remove(s);
             // For an SF slot, keep its index reserved (move the reservation
             // from `all` to `closingSlots`) until the delegate below releases
@@ -844,15 +863,26 @@ void discardBroken(PooledSender s) {
         }
     }
 
-    public void giveBack(PooledSender s) {
-        long now = System.currentTimeMillis();
-        s.markIdleAt(now);
+    public void giveBack(PooledSender ps) {
+        SenderSlot s = ps.slot();
+        long gen = ps.generation();
         lock.lock();
         try {
             if (closed) {
                 // Pool already shut down: don't requeue; let close() finish destroying.
                 return;
             }
+            if (s.generation() != gen) {
+                // Stale return: this lease was already given back and the slot
+                // possibly re-borrowed (or this is a duplicate close). Dropping
+                // it keeps Sender.close() idempotent under a concurrent
+                // re-borrow -- without it a double close would enqueue the slot
+                // twice and hand it to two borrowers writing into one delegate.
+                return;
+            }
+            s.bumpGeneration();
+            s.markIdleAt(System.currentTimeMillis());
+            assert !available.contains(s) : "slot already present in available deque on giveBack";
             available.addLast(s);
             slotReleased.signal();
         } finally {
@@ -860,19 +890,6 @@ public void giveBack(PooledSender s) {
         }
     }
 
-    public PooledSender pinToCurrentThread() {
-        PooledSender pinned = threadAffine.get();
-        if (pinned != null && !pinned.isInvalidated()) {
-            return pinned;
-        }
-        if (pinned != null) {
-            threadAffine.remove();
-        }
-        PooledSender s = borrow();
-        threadAffine.set(s);
-        return s;
-    }
-
     /**
      * Closes idle slots that have exceeded {@code idleTimeoutMillis} or that
      * have aged past {@code maxLifetimeMillis}. Never shrinks below
@@ -883,15 +900,15 @@ public void reapIdle() {
             return;
         }
         long now = System.currentTimeMillis();
-        ArrayList<PooledSender> toClose = null;
+        ArrayList<SenderSlot> toClose = null;
         lock.lock();
         try {
             if (closed) {
                 return;
             }
-            Iterator<PooledSender> it = available.iterator();
+            Iterator<SenderSlot> it = available.iterator();
             while (it.hasNext() && all.size() > minSize) {
-                PooledSender s = it.next();
+                SenderSlot s = it.next();
                 boolean idleExpired = idleTimeoutMillis < Long.MAX_VALUE
                         && (now - s.idleSinceMillis()) >= idleTimeoutMillis;
                 boolean overAge = maxLifetimeMillis < Long.MAX_VALUE
@@ -933,7 +950,7 @@ public void reapIdle() {
                 lock.lock();
                 try {
                     for (int i = 0, n = toClose.size(); i < n; i++) {
-                        PooledSender s = toClose.get(i);
+                        SenderSlot s = toClose.get(i);
                         if (s.slotIndex() >= 0) {
                             reclaimSlot(s, " during idle reaping");
                         }
@@ -983,32 +1000,19 @@ public int leakedSlotCount() {
         }
     }
 
-    public void releaseCurrentThread() {
-        PooledSender pinned = threadAffine.get();
-        if (pinned == null) {
-            return;
-        }
-        threadAffine.remove();
-        if (pinned.isInvalidated()) {
-            // Pool was closed: delegate is already closed, skip flush/giveBack.
-            return;
-        }
-        pinned.close();
-    }
-
-    private PooledSender createUnlocked(int slotIndex) {
-        return new PooledSender(senderFactory.apply(slotIndex), this, slotIndex);
+    private SenderSlot createUnlocked(int slotIndex) {
+        return new SenderSlot(senderFactory.apply(slotIndex), this, slotIndex);
     }
 
     /**
-     * Builds a {@link PooledSender} for startup recovery of one stranded slot.
+     * Builds a {@link SenderSlot} for startup recovery of one stranded slot.
      * Routes through {@link #recoverySenderFactory}, which in production forces
      * a non-blocking initial connect ({@link #defaultRecoverySender}) so a
      * single recovery step stays bounded -- see that method and
      * {@link #drainCandidateSlotForRecovery}.
      */
-    private PooledSender createRecoverer(int slotIndex) {
-        return new PooledSender(recoverySenderFactory.apply(slotIndex), this, slotIndex);
+    private SenderSlot createRecoverer(int slotIndex) {
+        return new SenderSlot(recoverySenderFactory.apply(slotIndex), this, slotIndex);
     }
 
     private Sender defaultSender(int slotIndex) {
@@ -1035,9 +1039,21 @@ private Sender defaultRecoverySender(int slotIndex) {
         return buildManagedSlotSender(slotIndex, true);
     }
 
+    // Applies the user-supplied ingest callbacks to a sender builder. Null
+    // callbacks are skipped so the sender keeps its loud-not-silent default.
+    private Sender.LineSenderBuilder applyUserCallbacks(Sender.LineSenderBuilder builder) {
+        if (errorHandler != null) {
+            builder.errorHandler(errorHandler);
+        }
+        if (connectionListener != null) {
+            builder.connectionListener(connectionListener);
+        }
+        return builder;
+    }
+
     private Sender buildManagedSlotSender(int slotIndex, boolean forRecovery) {
         if (!storeAndForward) {
-            return Sender.fromConfig(configurationString);
+            return applyUserCallbacks(Sender.builder(configurationString)).build();
         }
         // Give this pooled sender its own slot dir <sf_dir>/<base>-<index>
         // so concurrent SF senders sharing one sf_dir never collide on
@@ -1091,7 +1107,9 @@ private Sender buildManagedSlotSender(int slotIndex, boolean forRecovery) {
             // returns).
             builder.drainOrphans(false);
         }
-        return builder.build();
+        // Recovery delegates are internal, short-lived, OFF-mode drain senders;
+        // don't surface their connect/error events to the user's callbacks.
+        return (forRecovery ? builder : applyUserCallbacks(builder)).build();
     }
 
     /**
@@ -1130,7 +1148,7 @@ private void freeSlotIndex(int idx) {
      * {@link QwpWebSocketSender#isSlotLockReleased()} -- false means close()
      * bailed early with the I/O thread still running and the flock still held.
      */
-    private static boolean flockReleased(PooledSender s) {
+    private static boolean flockReleased(SenderSlot s) {
         Sender d = s.delegate();
         return !(d instanceof QwpWebSocketSender) || ((QwpWebSocketSender) d).isSlotLockReleased();
     }
@@ -1153,7 +1171,7 @@ private static boolean flockReleased(PooledSender s) {
      *                path (e.g. {@code ""} or {@code " during idle reaping"})
      * @return {@code true} if the index was freed, {@code false} if retired
      */
-    private boolean reclaimSlot(PooledSender s, String context) {
+    private boolean reclaimSlot(SenderSlot s, String context) {
         closingSlots--;
         if (flockReleased(s)) {
             freeSlotIndex(s.slotIndex());
diff --git a/core/src/main/java/io/questdb/client/impl/SenderSlot.java b/core/src/main/java/io/questdb/client/impl/SenderSlot.java
new file mode 100644
index 00000000..19c93671
--- /dev/null
+++ b/core/src/main/java/io/questdb/client/impl/SenderSlot.java
@@ -0,0 +1,118 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.impl;
+
+import io.questdb.client.Sender;
+
+/**
+ * One reusable {@link SenderPool} slot: owns a real {@link Sender} delegate, its
+ * store-and-forward slot index, and the idle/age bookkeeping the pool needs.
+ * Pre-allocated once per slot and held in the pool's {@code all}/{@code
+ * available} collections across borrows; it is never handed to callers
+ * directly.
+ * <p>
+ * Each borrow wraps the slot in a fresh {@link PooledSender} stamped with the
+ * slot's current lease {@link #generation}. Because the slot is shared across
+ * borrows, a stale handle's {@code close()} or data write must not release, or
+ * write through, a slot a later borrower now owns. The generation -- mutated
+ * only under the pool lock when the slot is handed out and returned -- is what
+ * lets {@link #live(long)} and {@link SenderPool#giveBack}/{@link
+ * SenderPool#discardBroken} detect and drop such stale calls. This is the
+ * ingest-side mirror of the egress {@code QueryWorker} generation guard.
+ */
+final class SenderSlot {
+
+    private final long createdAtMillis;
+    private final Sender delegate;
+    private final SenderPool pool;
+    private final int slotIndex;
+    // Monotonic lease id. Mutated only under the SenderPool lock (bumped in
+    // borrow() when the slot is handed out and in giveBack()/discardBroken()
+    // when it is returned). A PooledSender wrapper captures it live for its
+    // borrow; once the slot is released or re-borrowed the captured id no
+    // longer matches. Volatile so a stale handle on another thread observes
+    // the latest value without taking the pool lock.
+    private volatile long generation;
+    private volatile long idleSinceMillis;
+
+    SenderSlot(Sender delegate, SenderPool pool, int slotIndex) {
+        this.delegate = delegate;
+        this.pool = pool;
+        this.slotIndex = slotIndex;
+        this.createdAtMillis = System.currentTimeMillis();
+        this.idleSinceMillis = this.createdAtMillis;
+    }
+
+    /**
+     * Advances the lease generation. Called by {@link SenderPool} under the
+     * pool lock when the slot is handed out (borrow) and when it is returned
+     * (giveBack/discardBroken).
+     */
+    void bumpGeneration() {
+        generation++;
+    }
+
+    long createdAtMillis() {
+        return createdAtMillis;
+    }
+
+    Sender delegate() {
+        return delegate;
+    }
+
+    long generation() {
+        return generation;
+    }
+
+    long idleSinceMillis() {
+        return idleSinceMillis;
+    }
+
+    /**
+     * Validates the borrowing lease's {@code gen} and returns the underlying
+     * delegate for a data-plane call. Throws if the lease is stale (the slot
+     * was returned to the pool or re-borrowed), so a stale handle cannot write
+     * into a slot a later borrower owns. Called by {@link PooledSender} on
+     * every operation.
+     */
+    Sender live(long gen) {
+        if (gen != generation) {
+            throw new IllegalStateException("sender handle is closed (returned to the pool)");
+        }
+        return delegate;
+    }
+
+    void markIdleAt(long nowMillis) {
+        idleSinceMillis = nowMillis;
+    }
+
+    SenderPool pool() {
+        return pool;
+    }
+
+    int slotIndex() {
+        return slotIndex;
+    }
+}
diff --git a/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java b/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java
index 4d363fbb..3a4d1503 100644
--- a/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java
+++ b/core/src/main/java/io/questdb/client/network/JavaTlsClientSocket.java
@@ -307,91 +307,13 @@ public int send(long bufferPtr, int bufferLen) {
     }
 
     @Override
-    public void startTlsSession(CharSequence peerName) throws TlsSessionInitFailedException {
+    public void startTlsSession(CharSequence peerName, SocketReadinessWaiter waiter) throws TlsSessionInitFailedException {
         assert state == STATE_PLAINTEXT;
         prepareInternalBuffers();
         try {
             this.sslEngine = createSslEngine(peerName);
             this.sslEngine.beginHandshake();
-            SSLEngineResult.HandshakeStatus handshakeStatus = sslEngine.getHandshakeStatus();
-            while (handshakeStatus != SSLEngineResult.HandshakeStatus.FINISHED) {
-                switch (handshakeStatus) {
-                    case NEED_TASK:
-                        Runnable task;
-                        while ((task = sslEngine.getDelegatedTask()) != null) {
-                            task.run();
-                        }
-                        handshakeStatus = sslEngine.getHandshakeStatus();
-                        break;
-                    case NEED_WRAP: {
-                        SSLEngineResult result = sslEngine.wrap(wrapInputBuffer, wrapOutputBuffer);
-                        handshakeStatus = result.getHandshakeStatus();
-                        switch (result.getStatus()) {
-                            case BUFFER_UNDERFLOW:
-                                // there cannot be underflow since wrap() during handshake does not read from the input buffer at all
-                                throw new AssertionError("Buffer underflow during TLS handshake. This should not happen. please report as a bug");
-                            case BUFFER_OVERFLOW:
-                                if (wrapOutputBuffer.position() != 0) {
-                                    // wrap() left bytes behind without producing a complete record. The OK
-                                    // branch is the only place that drains and clears, so a non-empty
-                                    // buffer here means we would re-enter NEED_WRAP with identical state
-                                    // and spin forever. Fail loudly instead.
-                                    throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug");
-                                }
-                                // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record,
-                                // but that would indicate our starting buffer is too small.
-                                growWrapOutputBuffer();
-                                break;
-                            case OK:
-                                // wrapOutputBuffer: write mode
-                                int written = 0;
-                                int bufferLimit = wrapOutputBuffer.position();
-                                while (written < bufferLimit) {
-                                    int n = delegate.send(wrapOutputBufferPtr + written, bufferLimit - written);
-                                    if (n < 0) {
-                                        throw TlsSessionInitFailedException.instance("socket write error");
-                                    }
-                                    written += n;
-                                }
-                                wrapOutputBuffer.clear();
-                                break;
-                            case CLOSED:
-                                throw TlsSessionInitFailedException.instance("server closed connection unexpectedly");
-                        }
-                        break;
-                    }
-                    case NEED_UNWRAP: {
-                        int n = readFromSocket();
-                        if (n < 0) {
-                            throw TlsSessionInitFailedException.instance("socket read error");
-                        }
-                        SSLEngineResult result = sslEngine.unwrap(unwrapInputBuffer, unwrapOutputBuffer);
-                        handshakeStatus = result.getHandshakeStatus();
-                        switch (result.getStatus()) {
-                            case BUFFER_UNDERFLOW:
-                                // we need to receive more data from a socket, let's try again
-                                break;
-                            case BUFFER_OVERFLOW:
-                                if (unwrapOutputBuffer.position() != 0) {
-                                    // unwrap() produced plaintext but signalled overflow without consuming
-                                    // the next record. Nothing in the handshake loop drains this buffer,
-                                    // so re-entering NEED_UNWRAP would spin forever. Fail loudly.
-                                    throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug");
-                                }
-                                // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record,
-                                // but that would indicate our starting buffer is too small.
-                                growUnwrapOutputBuffer();
-                                break;
-                            case OK:
-                                // good, let's see what we need to do next
-                                break;
-                            case CLOSED:
-                                throw TlsSessionInitFailedException.instance("server closed connection unexpectedly");
-                        }
-                    }
-                    break;
-                }
-            }
+            runHandshake(waiter);
             // unwrap input buffer: read mode and empty
             unwrapInputBuffer.position(0);
             unwrapInputBuffer.limit(0);
@@ -583,6 +505,108 @@ private int readFromSocket() {
         return n;
     }
 
+    /**
+     * Drives the TLS handshake state machine to completion. When the
+     * non-blocking socket would block, hands control to {@code waiter} (which
+     * parks on the event loop bounded by the connect deadline) instead of
+     * busy-spinning on read/write. Extracted from {@link #startTlsSession} so a
+     * stub {@code sslEngine} can exercise the wait paths in isolation.
+     */
+    private void runHandshake(SocketReadinessWaiter waiter) throws SSLException, TlsSessionInitFailedException {
+        SSLEngineResult.HandshakeStatus handshakeStatus = sslEngine.getHandshakeStatus();
+        while (handshakeStatus != SSLEngineResult.HandshakeStatus.FINISHED) {
+            switch (handshakeStatus) {
+                case NEED_TASK:
+                    Runnable task;
+                    while ((task = sslEngine.getDelegatedTask()) != null) {
+                        task.run();
+                    }
+                    handshakeStatus = sslEngine.getHandshakeStatus();
+                    break;
+                case NEED_WRAP: {
+                    SSLEngineResult result = sslEngine.wrap(wrapInputBuffer, wrapOutputBuffer);
+                    handshakeStatus = result.getHandshakeStatus();
+                    switch (result.getStatus()) {
+                        case BUFFER_UNDERFLOW:
+                            // there cannot be underflow since wrap() during handshake does not read from the input buffer at all
+                            throw new AssertionError("Buffer underflow during TLS handshake. This should not happen. please report as a bug");
+                        case BUFFER_OVERFLOW:
+                            if (wrapOutputBuffer.position() != 0) {
+                                // wrap() left bytes behind without producing a complete record. The OK
+                                // branch is the only place that drains and clears, so a non-empty
+                                // buffer here means we would re-enter NEED_WRAP with identical state
+                                // and spin forever. Fail loudly instead.
+                                throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug");
+                            }
+                            // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record,
+                            // but that would indicate our starting buffer is too small.
+                            growWrapOutputBuffer();
+                            break;
+                        case OK:
+                            // wrapOutputBuffer: write mode
+                            int written = 0;
+                            int bufferLimit = wrapOutputBuffer.position();
+                            while (written < bufferLimit) {
+                                int n = delegate.send(wrapOutputBufferPtr + written, bufferLimit - written);
+                                if (n < 0) {
+                                    throw TlsSessionInitFailedException.instance("socket write error");
+                                }
+                                if (n == 0) {
+                                    // The non-blocking socket's send buffer is full. Wait for it to
+                                    // become writable -- bounded by the connect deadline -- instead of
+                                    // busy-spinning on send().
+                                    waiter.awaitReady(IOOperation.WRITE);
+                                }
+                                written += n;
+                            }
+                            wrapOutputBuffer.clear();
+                            break;
+                        case CLOSED:
+                            throw TlsSessionInitFailedException.instance("server closed connection unexpectedly");
+                    }
+                    break;
+                }
+                case NEED_UNWRAP: {
+                    int n = readFromSocket();
+                    if (n < 0) {
+                        throw TlsSessionInitFailedException.instance("socket read error");
+                    }
+                    SSLEngineResult result = sslEngine.unwrap(unwrapInputBuffer, unwrapOutputBuffer);
+                    handshakeStatus = result.getHandshakeStatus();
+                    switch (result.getStatus()) {
+                        case BUFFER_UNDERFLOW:
+                            // Not enough bytes for a complete TLS record yet. If the last read
+                            // drained the socket (n == 0, would-block on the non-blocking fd), wait
+                            // for it to become readable -- bounded by the connect deadline -- instead
+                            // of busy-spinning. A positive n means we read a partial record, so loop
+                            // immediately and read the rest.
+                            if (n == 0) {
+                                waiter.awaitReady(IOOperation.READ);
+                            }
+                            break;
+                        case BUFFER_OVERFLOW:
+                            if (unwrapOutputBuffer.position() != 0) {
+                                // unwrap() produced plaintext but signalled overflow without consuming
+                                // the next record. Nothing in the handshake loop drains this buffer,
+                                // so re-entering NEED_UNWRAP would spin forever. Fail loudly.
+                                throw new AssertionError("Buffer overflow during TLS handshake with non-empty output buffer. This should not happen, please report as a bug");
+                            }
+                            // in theory, this can happen if the output buffer is too small to fit a single TLS handshake record,
+                            // but that would indicate our starting buffer is too small.
+                            growUnwrapOutputBuffer();
+                            break;
+                        case OK:
+                            // good, let's see what we need to do next
+                            break;
+                        case CLOSED:
+                            throw TlsSessionInitFailedException.instance("server closed connection unexpectedly");
+                    }
+                }
+                break;
+            }
+        }
+    }
+
     private int writeToSocket(int bytesToSend) {
         // wrapOutputBuffer is in the write mode
         int n = delegate.send(wrapOutputBufferPtr, bytesToSend);
diff --git a/core/src/main/java/io/questdb/client/network/Net.java b/core/src/main/java/io/questdb/client/network/Net.java
index 040a2cb7..f649d330 100644
--- a/core/src/main/java/io/questdb/client/network/Net.java
+++ b/core/src/main/java/io/questdb/client/network/Net.java
@@ -36,6 +36,11 @@
 
 public final class Net {
 
+    // Sentinel returned by connectAddrInfoTimeout when the connect did not
+    // complete within the supplied budget. Distinct from -1 (generic error) and
+    // the disconnect codes so callers can flag a timeout without decoding errno.
+    @SuppressWarnings("unused")
+    public static final int CONNECT_TIMEOUT = -3;
     @SuppressWarnings("unused")
     public static final int EOTHERDISCONNECT = -2;
     @SuppressWarnings("unused")
@@ -88,6 +93,14 @@ public static void configureKeepAlive(int fd) {
 
     public static native int connectAddrInfo(int fd, long lpAddrInfo);
 
+    /**
+     * Non-blocking connect bounded by {@code timeoutMillis}. Returns 0 on
+     * success, {@link #CONNECT_TIMEOUT} on timeout, or -1 on failure (errno set,
+     * readable via {@link io.questdb.client.std.Os#errno()}). The socket is left
+     * non-blocking on success.
+     */
+    public static native int connectAddrInfoTimeout(int fd, long lpAddrInfo, int timeoutMillis);
+
     public static void freeAddrInfo(long pAddrInfo) {
         if (pAddrInfo != 0) {
             ADDR_INFO_COUNTER.decrementAndGet();
diff --git a/core/src/main/java/io/questdb/client/network/NetworkFacade.java b/core/src/main/java/io/questdb/client/network/NetworkFacade.java
index b2e97dad..d23824a5 100644
--- a/core/src/main/java/io/questdb/client/network/NetworkFacade.java
+++ b/core/src/main/java/io/questdb/client/network/NetworkFacade.java
@@ -27,6 +27,12 @@
 import org.slf4j.Logger;
 
 public interface NetworkFacade {
+    /**
+     * Return value of {@link #connectAddrInfoTimeout(int, long, int)} when the
+     * connect did not complete within the supplied budget.
+     */
+    int CONNECT_TIMEOUT = Net.CONNECT_TIMEOUT;
+
     int close(int fd);
 
     void close(int fd, Logger logger);
@@ -39,6 +45,13 @@ public interface NetworkFacade {
 
     int connectAddrInfo(int fd, long pAddrInfo);
 
+    /**
+     * Non-blocking connect bounded by {@code timeoutMillis}. Returns 0 on
+     * success, {@link #CONNECT_TIMEOUT} on timeout, or -1 on failure (with
+     * {@link #errno()} set). The socket is left non-blocking on success.
+     */
+    int connectAddrInfoTimeout(int fd, long pAddrInfo, int timeoutMillis);
+
     int errno();
 
     void freeAddrInfo(long pAddrInfo);
diff --git a/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java b/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java
index 11195fc2..64ea0dc7 100644
--- a/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java
+++ b/core/src/main/java/io/questdb/client/network/NetworkFacadeImpl.java
@@ -62,6 +62,11 @@ public int connectAddrInfo(int fd, long pAddrInfo) {
         return Net.connectAddrInfo(fd, pAddrInfo);
     }
 
+    @Override
+    public int connectAddrInfoTimeout(int fd, long pAddrInfo, int timeoutMillis) {
+        return Net.connectAddrInfoTimeout(fd, pAddrInfo, timeoutMillis);
+    }
+
     @Override
     public int errno() {
         return Os.errno();
diff --git a/core/src/main/java/io/questdb/client/network/PlainSocket.java b/core/src/main/java/io/questdb/client/network/PlainSocket.java
index 06e8c23e..555affd2 100644
--- a/core/src/main/java/io/questdb/client/network/PlainSocket.java
+++ b/core/src/main/java/io/questdb/client/network/PlainSocket.java
@@ -71,7 +71,7 @@ public int send(long bufferPtr, int bufferLen) {
     }
 
     @Override
-    public void startTlsSession(CharSequence peerName) {
+    public void startTlsSession(CharSequence peerName, SocketReadinessWaiter waiter) {
         throw new UnsupportedOperationException();
     }
 
diff --git a/core/src/main/java/io/questdb/client/network/Socket.java b/core/src/main/java/io/questdb/client/network/Socket.java
index dec4db4e..0cdce517 100644
--- a/core/src/main/java/io/questdb/client/network/Socket.java
+++ b/core/src/main/java/io/questdb/client/network/Socket.java
@@ -84,9 +84,12 @@ public interface Socket extends QuietCloseable {
      * on server connections.
      *
      * @param peerName server name to use for SNI and certificate validation.
+     * @param waiter   blocks until the socket is ready for the next handshake
+     *                 read/write (bounded by the connect deadline), so the
+     *                 handshake does not busy-spin on the non-blocking socket.
      * @throws TlsSessionInitFailedException if the call fails.
      */
-    void startTlsSession(@Nullable CharSequence peerName) throws TlsSessionInitFailedException;
+    void startTlsSession(@Nullable CharSequence peerName, SocketReadinessWaiter waiter) throws TlsSessionInitFailedException;
 
     /**
      * @return true if the socket support TLS encryption; false otherwise.
diff --git a/core/src/main/java/io/questdb/client/network/SocketReadinessWaiter.java b/core/src/main/java/io/questdb/client/network/SocketReadinessWaiter.java
new file mode 100644
index 00000000..8543d3e6
--- /dev/null
+++ b/core/src/main/java/io/questdb/client/network/SocketReadinessWaiter.java
@@ -0,0 +1,46 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.network;
+
+/**
+ * Blocks until a non-blocking socket is ready for a given I/O operation, or
+ * throws a timeout-flagged exception once the caller's deadline passes.
+ * <p>
+ * Used to drive the TLS handshake off the client's event loop: instead of
+ * busy-spinning on a non-blocking socket that returns "would block", the
+ * handshake hands control to this waiter, which parks on epoll/kqueue/select
+ * with the remaining connect budget. This bounds the handshake by the same
+ * deadline as the TCP connect and keeps a stalled peer from pinning a CPU.
+ */
+@FunctionalInterface
+public interface SocketReadinessWaiter {
+    /**
+     * Blocks until the socket is ready for {@code ioOperation}, or throws a
+     * timeout-flagged exception when the connect deadline is exceeded.
+     *
+     * @param ioOperation {@link IOOperation#READ} or {@link IOOperation#WRITE}
+     */
+    void awaitReady(int ioOperation);
+}
diff --git a/core/src/main/resources/io/questdb/client/bin/darwin-aarch64/libquestdb.dylib b/core/src/main/resources/io/questdb/client/bin/darwin-aarch64/libquestdb.dylib
deleted file mode 100644
index 82d21e59..00000000
Binary files a/core/src/main/resources/io/questdb/client/bin/darwin-aarch64/libquestdb.dylib and /dev/null differ
diff --git a/core/src/main/resources/io/questdb/client/bin/darwin-x86-64/libquestdb.dylib b/core/src/main/resources/io/questdb/client/bin/darwin-x86-64/libquestdb.dylib
deleted file mode 100644
index 647a12cb..00000000
Binary files a/core/src/main/resources/io/questdb/client/bin/darwin-x86-64/libquestdb.dylib and /dev/null differ
diff --git a/core/src/main/resources/io/questdb/client/bin/linux-aarch64/libquestdb.so b/core/src/main/resources/io/questdb/client/bin/linux-aarch64/libquestdb.so
deleted file mode 100644
index 94ad41c1..00000000
Binary files a/core/src/main/resources/io/questdb/client/bin/linux-aarch64/libquestdb.so and /dev/null differ
diff --git a/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so b/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so
old mode 100644
new mode 100755
index 15c0135d..82797659
Binary files a/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so and b/core/src/main/resources/io/questdb/client/bin/linux-x86-64/libquestdb.so differ
diff --git a/core/src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll b/core/src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll
deleted file mode 100755
index e95dcecd..00000000
Binary files a/core/src/main/resources/io/questdb/client/bin/windows-x86-64/libquestdb.dll and /dev/null differ
diff --git a/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java b/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java
index 1734360b..5b06513c 100644
--- a/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java
+++ b/core/src/test/java/io/questdb/client/test/QuestDBBuilderTest.java
@@ -51,150 +51,50 @@ public void testBuilderCallAfterFromConfigOverridesPoolKeysFromString() {
         Assert.assertEquals(150L, b.poolConfigSnapshotForTest().get("acquire_timeout_ms"));
     }
 
-    @Test
-    public void testConflictingIntPoolKeyAcrossSidesRejected() {
-        // Both sides carry sender_pool_max (an int pool key) with different
-        // values -> build fails via resolvePoolInt's conflict check. The long
-        // pool keys are covered by testConflictingPoolKeysAcrossSidesRejected;
-        // this guards the separate int code path.
-        try (QuestDB ignored = QuestDB.builder()
-                .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=2;")
-                .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;sender_pool_max=5;")
-                .build()) {
-            Assert.fail("expected conflicting pool config");
-        } catch (IllegalArgumentException e) {
-            Assert.assertTrue(e.getMessage(), e.getMessage().contains("conflicting pool config: sender_pool_max"));
-        }
-    }
-
-    @Test
-    public void testConflictingPoolKeysAcrossSidesRejected() {
-        // Both sides carry acquire_timeout_ms with different values -> build fails.
-        try (QuestDB ignored = QuestDB.builder()
-                .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;acquire_timeout_ms=1000;")
-                .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;acquire_timeout_ms=2000;")
-                .build()) {
-            Assert.fail("expected conflicting pool config");
-        } catch (IllegalArgumentException e) {
-            Assert.assertTrue(e.getMessage(), e.getMessage().contains("conflicting pool config: acquire_timeout_ms"));
-        }
-    }
-
-    @Test
-    public void testConnectRejectsNonWsSchemaOnSingleString() {
-        // QuestDB.connect(single string) must enforce the ws/wss schema, just
-        // like the builder's fromConfig().
-        assertSchemaRejected(() -> QuestDB.connect("http::addr=h:9000;"));
-    }
-
-    @Test
-    public void testConnectRejectsNonWsSchemaOnTwoArg() {
-        // QuestDB.connect(ingest, query) rejects a non-ws schema on either side.
-        assertSchemaRejected(() -> QuestDB.connect("tcp::addr=h:9009;", "ws::addr=h:9000;"));
-        assertSchemaRejected(() -> QuestDB.connect("ws::addr=h:9000;", "udp::addr=h:9009;"));
-    }
-
     @Test
     public void testConnectSingleStringValidatesAndBuilds() {
-        // QuestDB.connect(single string) hands the same ws:: string to both the
-        // ingest and query sides. min=0 on both pools validates both clients
-        // without connecting, so build() returns a live handle.
+        // QuestDB.connect(single string) hands the same ws:: cluster string to
+        // both the ingest and query pools. min=0 on both pools validates both
+        // clients without connecting, so build() returns a live handle.
         try (QuestDB ignored = QuestDB.connect(
                 "ws::addr=127.0.0.1:1;sender_pool_min=0;query_pool_min=0;")) {
             Assert.assertNotNull(ignored);
         }
     }
 
-    @Test
-    public void testConnectStringWithPoolKeysAppliedToBuilder() {
-        // Pool keys supplied via separate ingest/query strings are accepted;
-        // min=0 so nothing connects.
-        try (QuestDB ignored = QuestDB.builder()
-                .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=1;")
-                .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;query_pool_max=1;")
-                .build()) {
-            Assert.assertNotNull(ignored);
-        }
-    }
-
-    @Test
-    public void testConnectTwoArgValidatesAndBuilds() {
-        // QuestDB.connect(ingest, query) sets the two sides independently;
-        // min=0 on each validates both clients without connecting.
-        try (QuestDB ignored = QuestDB.connect(
-                "ws::addr=127.0.0.1:1;sender_pool_min=0;",
-                "ws::addr=127.0.0.1:1;query_pool_min=0;")) {
-            Assert.assertNotNull(ignored);
-        }
-    }
-
-    @Test
-    public void testExplicitPoolKeyWinsOverConflictingStrings() {
-        // The two strings disagree on acquire_timeout_ms, but an explicit builder
-        // call sets it: explicit wins and the conflict check is skipped, whether
-        // the explicit call comes after or before the config strings. The resolved
-        // value is the explicit 500, not either string's value.
-        QuestDBBuilder after = QuestDB.builder()
-                .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;acquire_timeout_ms=1000;")
-                .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;acquire_timeout_ms=2000;")
-                .acquireTimeoutMillis(500);
-        try (QuestDB ignored = after.build()) {
-            Assert.assertNotNull(ignored);
-        }
-        Assert.assertEquals(500L, after.poolConfigSnapshotForTest().get("acquire_timeout_ms"));
-
-        QuestDBBuilder before = QuestDB.builder()
-                .acquireTimeoutMillis(500)
-                .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;acquire_timeout_ms=1000;")
-                .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;acquire_timeout_ms=2000;");
-        try (QuestDB ignored = before.build()) {
-            Assert.assertNotNull(ignored);
-        }
-        Assert.assertEquals(500L, before.poolConfigSnapshotForTest().get("acquire_timeout_ms"));
-    }
-
-    @Test
-    public void testHttpIngestConfigRejected() {
-        assertSchemaRejected(() -> QuestDB.builder().ingestConfig("http::addr=h:9000;"));
-    }
-
-    @Test
-    public void testHttpSingleConfigRejected() {
-        assertSchemaRejected(() -> QuestDB.builder().fromConfig("http::addr=h:9000;"));
-    }
-
     @Test
     public void testMalformedEgressConfigRejectedAtBuildWithMinZero() {
         // query_pool_min=0 pre-warms nothing, so build() never constructs a
-        // QwpQueryClient -- yet it must still reject a malformed query config up
-        // front via QwpQueryClient.validateConfig, mirroring the ingress side.
+        // QwpQueryClient -- yet it must still reject a malformed egress key in
+        // the single cluster config up front, mirroring the ingress side.
         // Covers a typed enum (compression) and a bounded int (compression_level).
-        assertEgressBuildRejected(
-                "ws::addr=127.0.0.1:1;compression=gzip;query_pool_min=0;query_pool_max=2;", "compression");
-        assertEgressBuildRejected(
-                "ws::addr=127.0.0.1:1;compression_level=99;query_pool_min=0;query_pool_max=2;", "compression_level");
+        assertBuildRejected(
+                "ws::addr=127.0.0.1:1;compression=gzip;sender_pool_min=0;query_pool_min=0;query_pool_max=2;",
+                "compression");
+        assertBuildRejected(
+                "ws::addr=127.0.0.1:1;compression_level=99;sender_pool_min=0;query_pool_min=0;query_pool_max=2;",
+                "compression_level");
     }
 
     @Test
     public void testMalformedIngressConfigRejectedAtBuildWithMinZero() {
         // sender_pool_min=0 pre-warms nothing, so build() never constructs a
-        // Sender -- yet it must still reject a malformed ingest config up front,
-        // matching the egress side. Covers a typed enum (tls_verify), a
+        // Sender -- yet it must still reject a malformed ingress key in the
+        // single cluster config up front. Covers a typed enum (tls_verify), a
         // registry-STRING value that only the real Sender parse validates
-        // (auto_flush_rows), and WebSocket build-time checks that only the full
-        // no-connect validation reaches: auto_flush=off and auto_flush_interval=off
-        // both disable auto-flush (unsupported on WebSocket), and sf_durability=flush
-        // is not yet supported.
-        assertIngressBuildRejected(
-                "wss::addr=127.0.0.1:1;tls_verify=strict;sender_pool_min=0;sender_pool_max=2;", "tls_verify");
-        assertIngressBuildRejected(
-                "ws::addr=127.0.0.1:1;auto_flush_rows=abc;sender_pool_min=0;sender_pool_max=2;", "auto_flush_rows");
-        assertIngressBuildRejected(
-                "ws::addr=127.0.0.1:1;auto_flush_interval=off;sender_pool_min=0;sender_pool_max=2;", "auto-flush");
-        assertIngressBuildRejected(
-                "ws::addr=127.0.0.1:1;auto_flush=off;sender_pool_min=0;sender_pool_max=2;", "auto-flush");
-        assertIngressBuildRejected(
-                "ws::addr=127.0.0.1:1;sf_durability=flush;sender_pool_min=0;sender_pool_max=2;", "not yet supported");
+        // (auto_flush_rows), and WebSocket build-time checks: auto_flush=off and
+        // auto_flush_interval=off both disable auto-flush (unsupported on
+        // WebSocket), and sf_durability=flush is not yet supported.
+        assertBuildRejected(
+                "wss::addr=127.0.0.1:1;tls_verify=strict;sender_pool_min=0;query_pool_min=0;", "tls_verify");
+        assertBuildRejected(
+                "ws::addr=127.0.0.1:1;auto_flush_rows=abc;sender_pool_min=0;query_pool_min=0;", "auto_flush_rows");
+        assertBuildRejected(
+                "ws::addr=127.0.0.1:1;auto_flush_interval=off;sender_pool_min=0;query_pool_min=0;", "auto-flush");
+        assertBuildRejected(
+                "ws::addr=127.0.0.1:1;auto_flush=off;sender_pool_min=0;query_pool_min=0;", "auto-flush");
+        assertBuildRejected(
+                "ws::addr=127.0.0.1:1;sf_durability=flush;sender_pool_min=0;query_pool_min=0;", "not yet supported");
     }
 
     @Test
@@ -212,22 +112,12 @@ public void testMalformedPoolValueRejectedAtBuild() {
     }
 
     @Test
-    public void testMissingIngestConfigThrows() {
-        try {
-            QuestDB.builder().queryConfig("ws::addr=h:9000;").build().close();
-            Assert.fail();
-        } catch (IllegalStateException e) {
-            Assert.assertTrue(e.getMessage().contains("ingest"));
-        }
-    }
-
-    @Test
-    public void testMissingQueryConfigThrows() {
+    public void testMissingConfigThrows() {
         try {
-            QuestDB.builder().ingestConfig("ws::addr=h:9000;").build().close();
+            QuestDB.builder().build().close();
             Assert.fail();
         } catch (IllegalStateException e) {
-            Assert.assertTrue(e.getMessage().contains("query"));
+            Assert.assertTrue(e.getMessage(), e.getMessage().contains("configuration"));
         }
     }
 
@@ -254,26 +144,37 @@ public void testNegativePoolSizesRejected() {
         }
     }
 
+    @Test
+    public void testNonWsSchemaRejected() {
+        // The single cluster config (and QuestDB.connect) must use ws/wss.
+        assertSchemaRejected(() -> QuestDB.builder().fromConfig("http::addr=h:9000;"));
+        assertSchemaRejected(() -> QuestDB.builder().fromConfig("tcp::addr=h:9009;"));
+        assertSchemaRejected(() -> QuestDB.builder().fromConfig("udp::addr=h:9009;"));
+        assertSchemaRejected(() -> QuestDB.connect("http::addr=h:9000;").close());
+    }
+
     @Test
     public void testQueryPoolBuildFailureUnwindsSenderPool() throws Exception {
-        // Sender pool builds against a healthy ws ingest endpoint; the query
-        // pool fails on a dead address. The handle must close the already-built
-        // sender pool (its connected senders) rather than leak them.
-        try (TestWebSocketServer ingest = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() {
+        // One server, one cluster config: the server accepts ingest write-path
+        // upgrades but rejects egress read-path upgrades, so the sender pool
+        // connects while the query pool's connect fails. The failed build() must
+        // close the already-built sender pool (its connected senders) rather than
+        // leak them.
+        try (TestWebSocketServer server = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() {
         })) {
-            ingest.start();
-            Assert.assertTrue(ingest.awaitStart(5, TimeUnit.SECONDS));
-            int port = ingest.getPort();
+            server.setRejectReadUpgrade(true);
+            server.start();
+            Assert.assertTrue(server.awaitStart(5, TimeUnit.SECONDS));
+            int port = server.getPort();
             try {
                 QuestDB.builder()
-                        .ingestConfig("ws::addr=localhost:" + port + ";")
-                        .queryConfig("ws::addr=127.0.0.1:1;auth_timeout_ms=200;")
+                        .fromConfig("ws::addr=localhost:" + port + ";auth_timeout_ms=200;")
                         .senderPoolSize(2)
                         .queryPoolSize(2)
                         .acquireTimeoutMillis(500)
                         .build()
                         .close();
-                Assert.fail("expected build to fail when query pool cannot connect");
+                Assert.fail("expected build to fail when the query pool cannot connect");
             } catch (RuntimeException expected) {
                 // The exact exception comes from QwpQueryClient.connect(). The
                 // build failing only proves the query pool gave up; the
@@ -284,75 +185,51 @@ public void testQueryPoolBuildFailureUnwindsSenderPool() throws Exception {
             // saw two ingest handshakes (proving the senders connected and the
             // assertion below is not vacuous)...
             awaitTrue("sender pool should have connected two ingest senders",
-                    () -> ingest.handshakeCount() >= 2);
+                    () -> server.handshakeCount() >= 2);
             // ...and the failed build() must have closed every one of them, so
             // no sender connection is left live on the server. The server
             // observes the client-side socket close asynchronously, so poll.
             awaitTrue("failed build() must close the already-built sender pool, leaving no live connection",
-                    () -> ingest.liveConnectionCount() == 0);
-        }
-    }
-
-    @Test
-    public void testSamePoolKeyValueAcrossSidesOk() {
-        // The same key at the same value on both sides builds cleanly.
-        try (QuestDB ignored = QuestDB.builder()
-                .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;query_pool_min=0;acquire_timeout_ms=1500;")
-                .queryConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;query_pool_min=0;acquire_timeout_ms=1500;")
-                .build()) {
-            Assert.assertNotNull(ignored);
+                    () -> server.liveConnectionCount() == 0);
         }
     }
 
     @Test
     public void testSharedVocabularyConnectsBothPoolsLive() throws Exception {
-        // The headline use case: one connect-string vocabulary carrying BOTH
+        // The headline use case: one cluster connect-string carrying BOTH
         // ingress-only keys (auto_flush_rows, sender_id) and egress-only keys
-        // (compression, max_batch_rows, target, failover) drives both LIVE
-        // clients through the facade -- each side applies the keys it owns and
-        // silently ignores the rest. Other tests cover this validate-only
-        // (min=0) or on a single side; this one pre-warms min=1 so both pools
-        // actually connect.
-        //
-        // The mock serves ingest (ACK) and query (SERVER_INFO) semantics on
-        // separate sockets, so ingest and query connect to separate servers. A
-        // single ws:: address serving both is exercised end-to-end against a
-        // real server in the parent repo.
-        try (TestWebSocketServer ingest = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() {
-        });
-             TestWebSocketServer query = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() {
-             })) {
-            ingest.start();
-            query.setSendServerInfo(true); // the egress client's connect() waits for SERVER_INFO
-            query.start();
-            Assert.assertTrue(ingest.awaitStart(5, TimeUnit.SECONDS));
-            Assert.assertTrue(query.awaitStart(5, TimeUnit.SECONDS));
-
-            // Identical vocabulary on both sides, differing only in addr -- the
-            // same mixed key set a single-string connect() would hand to both
-            // clients. The pool keys carry the same value on both sides, so the
-            // builder's cross-string conflict check passes.
-            String shared = "auto_flush_rows=100;sender_id=probe-1;"                          // ingress-only
-                    + "compression=auto;max_batch_rows=512;target=any;failover=off;"          // egress-only
-                    + "auth_timeout_ms=2000;"                                                 // COMMON
+        // (compression, max_batch_rows, target, failover) drives both LIVE pools
+        // -- each side applies the keys it owns and silently ignores the rest.
+        // One mock server serves both: an ACK stream on the ingest write path and
+        // a SERVER_INFO frame on the egress read path (the read path is gated so
+        // the ingest connection's ACK stream is never disturbed).
+        try (TestWebSocketServer server = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() {
+        })) {
+            server.setSendServerInfo(true); // the egress client's connect() waits for SERVER_INFO
+            server.start();
+            Assert.assertTrue(server.awaitStart(5, TimeUnit.SECONDS));
+
+            // A single cluster config carrying the mixed key set. The pools
+            // pre-warm min=1, so the shared vocabulary connects a live sender AND
+            // a live query client, not merely validates.
+            String cfg = "ws::addr=localhost:" + server.getPort() + ";"
+                    + "auto_flush_rows=100;sender_id=probe-1;"                          // ingress-only
+                    + "compression=auto;max_batch_rows=512;target=any;failover=off;"    // egress-only
+                    + "auth_timeout_ms=2000;"                                           // common
                     + "sender_pool_min=1;sender_pool_max=2;query_pool_min=1;query_pool_max=2;"; // pool
-            try (QuestDB db = QuestDB.builder()
-                    .ingestConfig("ws::addr=localhost:" + ingest.getPort() + ";" + shared)
-                    .queryConfig("ws::addr=localhost:" + query.getPort() + ";" + shared)
-                    .build()) {
-                // build() returned, so both pools pre-warmed their min=1 slot:
-                // the shared vocabulary connected a live sender AND a live query
-                // client, not merely validated.
+            try (QuestDB db = QuestDB.builder().fromConfig(cfg).build()) {
                 Assert.assertNotNull(db.borrowSender());
-                Assert.assertNotNull(db.query());
+                try (io.questdb.client.Query q = db.borrowQuery()) {
+                    Assert.assertNotNull(q);
+                }
             }
         }
     }
 
     @Test
     public void testSharedWsConfigWithPoolKeys() {
-        // A shared ws:: string carries pool keys; min=0 so build does only
-        // parse-only validation (no connect).
+        // A cluster ws:: string carries pool keys for both pools; min=0 so build
+        // does only parse-only validation (no connect).
         try (QuestDB ignored = QuestDB.builder()
                 .fromConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=3;"
                         + "query_pool_min=0;query_pool_max=2;acquire_timeout_ms=1234;")
@@ -361,41 +238,13 @@ public void testSharedWsConfigWithPoolKeys() {
         }
     }
 
-    @Test
-    public void testTcpIngestConfigRejected() {
-        assertSchemaRejected(() -> QuestDB.builder().ingestConfig("tcp::addr=h:9009;"));
-    }
-
-    @Test
-    public void testUdpIngestConfigRejected() {
-        assertSchemaRejected(() -> QuestDB.builder().queryConfig("udp::addr=h:9009;"));
-    }
-
-    private static void assertEgressBuildRejected(String query, String expectedFragment) {
-        try {
-            QuestDB.builder()
-                    .ingestConfig("ws::addr=127.0.0.1:1;sender_pool_min=0;sender_pool_max=2;")
-                    .queryConfig(query)
-                    .build()
-                    .close();
-            Assert.fail("expected build() to reject the malformed query config: " + query);
-        } catch (RuntimeException e) {
-            Assert.assertNotNull(e.getMessage());
-            Assert.assertTrue(e.getMessage(), e.getMessage().contains(expectedFragment));
-        }
-    }
-
-    private static void assertIngressBuildRejected(String ingest, String expectedFragment) {
+    private static void assertBuildRejected(String config, String expectedFragment) {
         try {
-            QuestDB.builder()
-                    .ingestConfig(ingest)
-                    .queryConfig("ws::addr=127.0.0.1:1;query_pool_min=0;query_pool_max=2;")
-                    .build()
-                    .close();
-            Assert.fail("expected build() to reject the malformed ingest config: " + ingest);
+            QuestDB.builder().fromConfig(config).build().close();
+            Assert.fail("expected build() to reject the malformed config: " + config);
         } catch (RuntimeException e) {
-            // Ingress value errors surface as LineSenderException; both it and the
-            // egress IllegalArgumentException are RuntimeException.
+            // Ingress value errors surface as LineSenderException; egress errors
+            // as IllegalArgumentException -- both are RuntimeException.
             Assert.assertNotNull(e.getMessage());
             Assert.assertTrue(e.getMessage(), e.getMessage().contains(expectedFragment));
         }
diff --git a/core/src/test/java/io/questdb/client/test/QuestDBFacadeCallbacksTest.java b/core/src/test/java/io/questdb/client/test/QuestDBFacadeCallbacksTest.java
new file mode 100644
index 00000000..93ecb9be
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/QuestDBFacadeCallbacksTest.java
@@ -0,0 +1,122 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test;
+
+import io.questdb.client.QuestDB;
+import io.questdb.client.SenderConnectionEvent;
+import io.questdb.client.SenderConnectionListener;
+import io.questdb.client.SenderError;
+import io.questdb.client.SenderErrorHandler;
+import io.questdb.client.test.cutlass.qwp.client.TestPorts;
+import org.jetbrains.annotations.NotNull;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Proves the ingest-side async callbacks exposed on the {@link QuestDB} facade
+ * ({@link io.questdb.client.QuestDBBuilder#errorHandler}/{@code connectionListener})
+ * actually reach the pooled {@link io.questdb.client.Sender}s -- not merely the
+ * lower-level {@code Sender.builder()}.
+ * <p>
+ * Each test eagerly prewarms one ingest sender ({@code sender_pool_min=1})
+ * pointed at a dead port in {@code initial_connect_retry=async} mode with a
+ * tight reconnect budget: the pool's I/O thread exhausts the budget in the
+ * background and surfaces the failure through whichever facade-wired callback is
+ * under test. No server is required.
+ */
+public class QuestDBFacadeCallbacksTest {
+
+    @Test
+    public void testFacadeConnectionListenerReceivesEvents() throws Exception {
+        int port = TestPorts.findUnusedPort();
+        CountDownLatch sawEvent = new CountDownLatch(1);
+        SenderConnectionListener listener = new SenderConnectionListener() {
+            @Override
+            public void onEvent(@NotNull SenderConnectionEvent event) {
+                sawEvent.countDown();
+            }
+        };
+        try (QuestDB ignored = QuestDB.builder()
+                .fromConfig(config(port))
+                .connectionListener(listener)
+                .build()) {
+            Assert.assertTrue(
+                    "facade-wired connectionListener must observe at least one connection event",
+                    sawEvent.await(5, TimeUnit.SECONDS));
+        }
+    }
+
+    @Test
+    public void testFacadeErrorHandlerReceivesAsyncIngestError() throws Exception {
+        int port = TestPorts.findUnusedPort();
+        ErrorInbox inbox = new ErrorInbox();
+        try (QuestDB ignored = QuestDB.builder()
+                .fromConfig(config(port))
+                .errorHandler(inbox)
+                .build()) {
+            Assert.assertTrue(
+                    "facade-wired errorHandler must receive the async budget-exhaustion SenderError",
+                    inbox.await(5, TimeUnit.SECONDS));
+            Assert.assertNotNull("a SenderError must be delivered", inbox.get());
+        }
+    }
+
+    // One cluster config drives both pools. Eagerly prewarm one sender
+    // (sender_pool_min=1) so build() exercises the production
+    // buildManagedSlotSender path that applies the facade callbacks; async + a
+    // tight budget -> the I/O thread fails fast against the dead port.
+    // query_pool_min=0 -> the query pool never connects, so the test is isolated
+    // to the ingest callbacks.
+    private static String config(int port) {
+        return "ws::addr=localhost:" + port + ";sender_pool_min=1;sender_pool_max=1"
+                + ";query_pool_min=0;query_pool_max=1"
+                + ";initial_connect_retry=async;reconnect_max_duration_millis=400"
+                + ";reconnect_initial_backoff_millis=10;reconnect_max_backoff_millis=50"
+                + ";close_flush_timeout_millis=0;";
+    }
+
+    private static final class ErrorInbox implements SenderErrorHandler {
+        private final CountDownLatch latch = new CountDownLatch(1);
+        private final AtomicReference<SenderError> first = new AtomicReference<>();
+
+        boolean await(long timeout, TimeUnit unit) throws InterruptedException {
+            return latch.await(timeout, unit);
+        }
+
+        SenderError get() {
+            return first.get();
+        }
+
+        @Override
+        public void onError(@NotNull SenderError error) {
+            first.compareAndSet(null, error);
+            latch.countDown();
+        }
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/QuestDBLazyConnectTest.java b/core/src/test/java/io/questdb/client/test/QuestDBLazyConnectTest.java
new file mode 100644
index 00000000..47dd5fa8
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/QuestDBLazyConnectTest.java
@@ -0,0 +1,150 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test;
+
+import io.questdb.client.QuestDB;
+import io.questdb.client.QuestDBBuilder;
+import io.questdb.client.Sender;
+import io.questdb.client.test.cutlass.qwp.client.TestPorts;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * {@code lazy_connect=true} makes a {@link QuestDB} facade tolerate the server
+ * being down at startup <em>without</em> disabling reads: the ingest side
+ * connects asynchronously (writes buffer until the wire is up) and the read pool
+ * connects lazily on first use. Reads stay enabled and connect once the server
+ * is up (the recovery lifecycle is covered end-to-end by
+ * {@link QuestDBServerRecoveryTest}).
+ * <p>
+ * Because both sides must start non-blocking, a knob that forces a blocking /
+ * fail-fast startup ({@code initial_connect_retry} other than {@code async}, or
+ * an explicit {@code query_pool_min > 0}) is a configuration conflict and is
+ * rejected up front with a clear remedy.
+ */
+public class QuestDBLazyConnectTest {
+
+    @Test(timeout = 30_000)
+    public void testLazyConnectStartsAndWritesWhileServerDown() {
+        int port = TestPorts.findUnusedPort();
+        // No server at `port`, sender_pool_min defaults to 1, and the only
+        // resilience knob is lazy_connect=true. (a) build() must return promptly
+        // -- the read pool defaults to min=0 and the ingest side goes async, so
+        // neither side fail-fasts -- and (b) a write must buffer without throwing.
+        try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port
+                + ";lazy_connect=true;reconnect_max_duration_millis=200"
+                + ";reconnect_initial_backoff_millis=10;reconnect_max_backoff_millis=50"
+                + ";close_flush_timeout_millis=0;")) {
+            Sender sender = db.borrowSender();
+            Assert.assertNotNull("a sender must be available with no server present", sender);
+            sender.table("t").longColumn("v", 1L).atNow();
+        }
+    }
+
+    @Test(timeout = 30_000)
+    public void testLazyConnectKeepsReadsEnabledWhileServerDown() {
+        int port = TestPorts.findUnusedPort();
+        // Reads are ENABLED, just deferred: under lazy_connect the read pool
+        // defaults to min=0, so build() does not eagerly connect or fail-fast
+        // while the server is down. The read client connects lazily on the
+        // first borrowQuery() once the server is up (covered end-to-end by
+        // QuestDBServerRecoveryTest). This is the whole point of lazy_connect
+        // over the old write-only mode, which disabled reads outright.
+        try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port
+                + ";lazy_connect=true;close_flush_timeout_millis=0;")) {
+            Assert.assertNotNull("the handle must build read-enabled while the server is down", db);
+        }
+    }
+
+    @Test
+    public void testLazyConnectAcceptsOnAndAllowsExplicitAsync() {
+        int port = TestPorts.findUnusedPort();
+        // lazy_connect accepts on/off as well as true/false, and an explicit
+        // initial_connect_retry=async is consistent with it (no conflict).
+        try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port
+                + ";lazy_connect=on;initial_connect_retry=async;query_pool_min=0"
+                + ";close_flush_timeout_millis=0;")) {
+            Assert.assertNotNull(db);
+        }
+    }
+
+    @Test
+    public void testLazyConnectConflictsWithBlockingInitialConnectRetry() {
+        // off/false (OFF) and on/true/sync (SYNC) all block or fail-fast at
+        // startup, so each conflicts with lazy_connect and must be rejected with
+        // a clear remedy.
+        assertLazyConflict("initial_connect_retry=off", "initial_connect_retry", "async");
+        assertLazyConflict("initial_connect_retry=sync", "initial_connect_retry", "async");
+        assertLazyConflict("initial_connect_retry=on", "initial_connect_retry", "async");
+    }
+
+    @Test
+    public void testLazyConnectConflictsWithExplicitQueryPoolMinInConfig() {
+        // An explicit query_pool_min > 0 makes the read pool eagerly fail-fast at
+        // startup, contradicting lazy_connect.
+        assertLazyConflict("query_pool_min=1", "query_pool_min", "0");
+        assertLazyConflict("query_pool_min=2", "query_pool_min", "0");
+        // query_pool_min=0 is exactly what lazy_connect wants -- no conflict.
+        int port = TestPorts.findUnusedPort();
+        try (QuestDB db = QuestDB.connect("ws::addr=localhost:" + port
+                + ";lazy_connect=true;query_pool_min=0;close_flush_timeout_millis=0;")) {
+            Assert.assertNotNull(db);
+        }
+    }
+
+    @Test
+    public void testLazyConnectConflictsWithExplicitQueryPoolMinFromBuilder() {
+        // The conflict also fires when query_pool_min > 0 comes from an explicit
+        // builder call (queryPoolMin / queryPoolSize), not just the connect string.
+        int port = TestPorts.findUnusedPort();
+        assertLazyConflict(QuestDB.builder()
+                .fromConfig("ws::addr=localhost:" + port + ";lazy_connect=true;close_flush_timeout_millis=0;")
+                .queryPoolMin(1), "query_pool_min", "0");
+        assertLazyConflict(QuestDB.builder()
+                .fromConfig("ws::addr=localhost:" + port + ";lazy_connect=true;close_flush_timeout_millis=0;")
+                .queryPoolSize(2), "query_pool_min", "0");
+    }
+
+    private static void assertLazyConflict(String extraKeys, String... expectedFragments) {
+        int port = TestPorts.findUnusedPort();
+        assertLazyConflict(QuestDB.builder().fromConfig("ws::addr=localhost:" + port
+                + ";lazy_connect=true;" + extraKeys + ";close_flush_timeout_millis=0;"), expectedFragments);
+    }
+
+    private static void assertLazyConflict(QuestDBBuilder builder, String... expectedFragments) {
+        try {
+            builder.build().close();
+            Assert.fail("expected lazy_connect configuration conflict");
+        } catch (IllegalArgumentException e) {
+            String msg = e.getMessage();
+            Assert.assertNotNull(msg);
+            Assert.assertTrue(msg, msg.contains("lazy_connect"));
+            for (int i = 0; i < expectedFragments.length; i++) {
+                Assert.assertTrue("'" + msg + "' should mention '" + expectedFragments[i] + "'",
+                        msg.contains(expectedFragments[i]));
+            }
+        }
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/QuestDBServerRecoveryTest.java b/core/src/test/java/io/questdb/client/test/QuestDBServerRecoveryTest.java
new file mode 100644
index 00000000..c68be090
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/QuestDBServerRecoveryTest.java
@@ -0,0 +1,114 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test;
+
+import io.questdb.client.QuestDB;
+import io.questdb.client.Sender;
+import io.questdb.client.test.cutlass.qwp.websocket.TestWebSocketServer;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.concurrent.TimeUnit;
+import java.util.function.BooleanSupplier;
+
+/**
+ * End-to-end resilience: the facade starts with the server down, the producer
+ * keeps writing (buffered), and once the server comes up the write side
+ * reconnects and the read side -- previously deferred so it could not fail-fast
+ * the build -- can connect.
+ * <p>
+ * The mock cannot answer a real SELECT (result frames are exercised against a
+ * real server in the parent repo), so the read step asserts the query client
+ * <em>connects</em> once the server is up, not the row contents.
+ */
+public class QuestDBServerRecoveryTest {
+
+    @Test(timeout = 60_000)
+    public void testFacadeStartsWhileServerDownThenWritesAndReaderConnectsOnRecovery() throws Exception {
+        // One mock server (the whole "cluster"), bound so the port is known but
+        // NOT accepting yet: the address is reachable but no WebSocket upgrade
+        // completes, so the server is effectively "down". It serves ingest ACK
+        // on the write path and a SERVER_INFO frame on the read path -- the read
+        // path is gated so the ingest connection's ACK stream is never disturbed.
+        try (TestWebSocketServer server = new TestWebSocketServer(new TestWebSocketServer.WebSocketServerHandler() {
+        })) {
+            server.setSendServerInfo(true); // the egress client's connect() waits for SERVER_INFO
+            // One cluster config drives both pools:
+            // lazy_connect=true expands to exactly this resilience: the ingest
+            // side goes async (the producer never blocks; writes buffer until the
+            // wire is up) and the read pool defaults to min=0 (the otherwise
+            // fail-fast reader never sinks the build while the server is down,
+            // and connects lazily on the first query).
+            String cfg = "ws::addr=localhost:" + server.getPort()
+                    + ";lazy_connect=true"
+                    + ";sender_pool_min=1;sender_pool_max=1;query_pool_max=1"
+                    + ";auth_timeout_ms=2000;reconnect_initial_backoff_millis=20"
+                    + ";reconnect_max_backoff_millis=100;reconnect_max_duration_millis=600000"
+                    + ";close_flush_timeout_millis=1000;";
+
+            // (1) server down + (2) client starts:
+            try (QuestDB db = QuestDB.builder().fromConfig(cfg).build()) {
+                Assert.assertEquals("no handshake while the server is down", 0, server.handshakeCount());
+
+                // lazy_connect keeps reads ENABLED, just deferred: the read pool
+                // defaults to min=0, so nothing connects while the server is
+                // down. The read client connects lazily on the first
+                // borrowQuery() once the server is up (step 5).
+
+                // (3) client writes -> buffers in the cursor SF engine; the call
+                // must not throw even though the server is down.
+                Sender sender = db.borrowSender();
+                sender.table("t").longColumn("v", 1L).atNow();
+
+                // (4) server starts:
+                server.start();
+                Assert.assertTrue(server.awaitStart(5, TimeUnit.SECONDS));
+
+                // The write side reconnects on its own once the server is up.
+                awaitTrue("ingest must connect after the server comes up",
+                        () -> server.handshakeCount() >= 1);
+
+                // (5) client can now read: the deferred reader connects on the
+                // first borrowQuery() (the mock does not serve rows, so we
+                // assert the connection, not the result).
+                int handshakesBeforeQuery = server.handshakeCount();
+                db.borrowQuery().close();
+                awaitTrue("query client must connect after the server comes up",
+                        () -> server.handshakeCount() >= handshakesBeforeQuery + 1);
+            }
+        }
+    }
+
+    private static void awaitTrue(String message, BooleanSupplier condition) throws InterruptedException {
+        long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(15);
+        while (System.nanoTime() < deadline) {
+            if (condition.getAsBoolean()) {
+                return;
+            }
+            Thread.sleep(20);
+        }
+        Assert.assertTrue(message, condition.getAsBoolean());
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java b/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java
index cf121d8c..7dc2810b 100644
--- a/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java
+++ b/core/src/test/java/io/questdb/client/test/cutlass/http/client/WebSocketClientTest.java
@@ -31,6 +31,7 @@
 import io.questdb.client.cutlass.http.client.WebSocketSendBuffer;
 import io.questdb.client.network.PlainSocketFactory;
 import io.questdb.client.network.Socket;
+import io.questdb.client.network.SocketReadinessWaiter;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -263,7 +264,7 @@ public int send(long bufferPtr, int bufferLen) {
         }
 
         @Override
-        public void startTlsSession(CharSequence peerName) {
+        public void startTlsSession(CharSequence peerName, SocketReadinessWaiter waiter) {
             throw new UnsupportedOperationException();
         }
 
diff --git a/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/QwpQueryClientConnectTimeoutTest.java b/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/QwpQueryClientConnectTimeoutTest.java
new file mode 100644
index 00000000..e0435b72
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/QwpQueryClientConnectTimeoutTest.java
@@ -0,0 +1,88 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test.cutlass.qwp.client;
+
+import io.questdb.client.cutlass.http.client.HttpClientException;
+import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+
+public class QwpQueryClientConnectTimeoutTest {
+
+    /**
+     * A connect-phase timeout must be reported as a connect_timeout failure, not
+     * relabeled as an "exceeded auth_timeout" overage.
+     * <p>
+     * {@code QwpQueryClient.runUpgradeWithTimeout} used to wrap the {@code connect()}
+     * and {@code upgrade()} calls in one try block, so the timeout-flagged exception
+     * thrown by the (in-diff) connect_timeout path was caught by the {@code isTimeout()}
+     * branch intended for upgrade() and rewritten with the (much larger, and wrong)
+     * auth_timeout value -- e.g. a connect that bailed after 500 ms reported
+     * "exceeded auth_timeout=15000ms". The ingest side never had this because it
+     * routes through {@code QwpUpgradeFailures.classify}, which leaves the
+     * connect-timeout exception unmodified.
+     */
+    @Test(timeout = 30_000)
+    public void testConnectTimeoutNotReportedAsAuthTimeout() {
+        // 192.0.2.0/24 is TEST-NET-1 (RFC 5737): on a normal network the SYN is
+        // silently dropped, so the TCP connect stalls and our application-level
+        // connect_timeout (500 ms) fires -- long before auth_timeout_ms (15000 ms).
+        // The WebSocket upgrade phase is never reached.
+        try (QwpQueryClient client = QwpQueryClient.fromConfig(
+                "ws::addr=192.0.2.1:9009;connect_timeout=500;auth_timeout_ms=15000;failover=off;target=any;")) {
+            long start = System.currentTimeMillis();
+            try {
+                client.connect();
+                Assert.fail("expected connect to fail");
+            } catch (HttpClientException ex) {
+                long elapsed = System.currentTimeMillis() - start;
+                String msg = ex.getMessage();
+
+                // The connect_timeout path is only exercised when the runner routes
+                // TEST-NET-1 into a black hole (dropped SYN). Skip -- rather than
+                // flake -- on the other two outcomes:
+                //  - no route: a fast ENETUNREACH surfaces as "could not connect".
+                //  - (rare) the host accepts the connect: the upgrade then runs the
+                //    full auth_timeout, so elapsed ~ auth_timeout (>5 s).
+                // Neither gate keys on the connect-vs-auth label, so neither can mask
+                // the regression: a black-holed connect always bails at ~500 ms with
+                // a message that is "connect timed out" (fixed) or "...auth_timeout..."
+                // (the bug) -- both reach the assertions below.
+                Assume.assumeFalse("no route to TEST-NET-1 black hole on this runner: " + msg,
+                        msg.contains("could not connect"));
+                Assume.assumeTrue("TEST-NET-1 is not a black hole on this runner (elapsed=" + elapsed + "ms): " + msg,
+                        elapsed < 5_000);
+
+                // It bailed at connect_timeout=500 ms, nowhere near auth_timeout=15000 ms.
+                // Regression: name the connect phase, never auth_timeout.
+                Assert.assertFalse("connect-phase timeout misreported as auth_timeout: " + msg,
+                        msg.contains("auth_timeout"));
+                Assert.assertTrue("expected a connect-timeout diagnostic, got: " + msg,
+                        msg.contains("connect timed out"));
+            }
+        }
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java b/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java
index 806d3750..92e4a648 100644
--- a/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java
+++ b/core/src/test/java/io/questdb/client/test/cutlass/qwp/websocket/TestWebSocketServer.java
@@ -83,6 +83,12 @@ public class TestWebSocketServer implements Closeable {
     // QwpQueryClient tests enable this; ingress sender tests leave it off so their
     // connections carry only ACK frames.
     private volatile boolean sendServerInfo;
+    // When true, the server fails the WebSocket upgrade on the egress read path
+    // (/read...) by dropping the connection before the 101, while still serving
+    // the ingest write path (/write...) normally. Lets one server + one cluster
+    // config drive a build where the sender pool connects but the query pool
+    // cannot. Set via setRejectReadUpgrade().
+    private volatile boolean rejectReadUpgrade;
     // When non-null the next handshake responds with HTTP 421 Misdirected
     // Request + X-QuestDB-Role: <rejectingRole>, mimicking a server whose
     // QwpServerInfoProvider reports REPLICA / PRIMARY_CATCHUP. Set after
@@ -208,6 +214,18 @@ public void setRejectWithRole(String role) {
         this.rejectingRole = role;
     }
 
+    /**
+     * When enabled, the server fails the WebSocket upgrade on the egress read
+     * path ({@code /read/...}) while still serving the ingest write path
+     * ({@code /write/...}) normally. This lets a single server, addressed by a
+     * single cluster config, accept ingest senders but reject query clients --
+     * e.g. to exercise build()'s unwind of an already-built sender pool when the
+     * query pool fails.
+     */
+    public void setRejectReadUpgrade(boolean rejectReadUpgrade) {
+        this.rejectReadUpgrade = rejectReadUpgrade;
+    }
+
     /**
      * Configure the server to reject the next handshake with an arbitrary
      * HTTP status code (e.g. 401, 403, 404, 426, 503). Pass {@code 0} to
@@ -221,9 +239,12 @@ public void setRejectWithStatus(int statusCode, String reasonPhrase) {
 
     /**
      * When enabled, the server sends a {@code SERVER_INFO} frame immediately
-     * after a successful 101 upgrade, the way a real egress endpoint does. The
-     * advertised role follows {@link #setAdvertisedRole}, defaulting to
-     * {@code STANDALONE}. Leave disabled for ingress (Sender) tests.
+     * after a successful 101 upgrade on the egress read path ({@code /read/...}),
+     * the way a real egress endpoint does. Ingest write-path ({@code /write/...})
+     * connections never receive it -- their ACK-only response stream would choke
+     * on an unexpected frame -- so one server can serve both an ingest and a
+     * query pool from a single cluster config. The advertised role follows
+     * {@link #setAdvertisedRole}, defaulting to {@code STANDALONE}.
      */
     public void setSendServerInfo(boolean sendServerInfo) {
         this.sendServerInfo = sendServerInfo;
@@ -251,6 +272,10 @@ private static byte[] buildServerInfoFrame(byte role) {
         return bb.array();
     }
 
+    private static boolean isReadPath(String path) {
+        return path != null && path.startsWith("/read");
+    }
+
     private static byte roleByte(String role) {
         if (role == null) {
             return 0; // ROLE_STANDALONE
@@ -313,6 +338,10 @@ public class ClientHandler implements Closeable {
         private boolean isClosed;
         private OutputStream out;
         private Thread readThread;
+        // Request path from the WebSocket upgrade GET line (e.g. /write/v4,
+        // /read/v1). Captured during the handshake so the post-upgrade logic can
+        // distinguish ingest from egress connections.
+        private String requestPath = "";
 
         ClientHandler(Socket socket) {
             this.socket = socket;
@@ -459,7 +488,15 @@ private boolean performHandshake() throws IOException {
             }
 
             String key = null;
-            for (String line : request.toString().split("\r\n")) {
+            String[] lines = request.toString().split("\r\n");
+            if (lines.length > 0) {
+                // GET <path> HTTP/1.1
+                String[] parts = lines[0].split(" ");
+                if (parts.length >= 2) {
+                    requestPath = parts[1];
+                }
+            }
+            for (String line : lines) {
                 if (line.toLowerCase().startsWith("sec-websocket-key:")) {
                     key = line.substring(18).trim();
                     break;
@@ -470,6 +507,13 @@ private boolean performHandshake() throws IOException {
                 return false;
             }
 
+            // Read-path reject: drop the egress upgrade before the 101 so the
+            // query pool's connect fails fast, while ingest write-path upgrades
+            // still complete on this same server.
+            if (rejectReadUpgrade && isReadPath(requestPath)) {
+                return false;
+            }
+
             // Arbitrary-status reject path: tests use setRejectWithStatus
             // to drive the failover loop's terminal-vs-transient
             // classification (failover.md §6).
@@ -566,7 +610,11 @@ void start() {
                     liveConnections.incrementAndGet();
 
                     try {
-                        if (sendServerInfo) {
+                        // SERVER_INFO is an egress-only frame: send it only on a
+                        // read-path (query) connection. An ingest write-path
+                        // connection parses every inbound frame as an ACK and
+                        // would fail on it.
+                        if (sendServerInfo && isReadPath(requestPath)) {
                             sendBinary(buildServerInfoFrame(roleByte(advertisedRole)));
                         }
 
diff --git a/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java b/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java
index bd3e944a..1aa681f4 100644
--- a/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java
+++ b/core/src/test/java/io/questdb/client/test/example/QuestDBExamples.java
@@ -44,11 +44,11 @@
 public class QuestDBExamples {
 
     public static void main(String[] args) throws Exception {
-        // 1. Connect with a single configuration string. Both sides run over
-        //    QWP/WebSocket, so one ws:: string configures ingest and egress.
-        try (QuestDB db = QuestDB.connect("ws::addr=localhost:9000;")) {
+        // 1. Connect with a single configuration string for the whole cluster.
+        //    Both sides run over QWP/WebSocket, so one ws:: string configures
+        //    ingest and egress; list every node in one addr server list.
+        try (QuestDB db = QuestDB.connect("ws::addr=node1:9000,node2:9000,node3:9000;")) {
             ingestWithBorrowedSender(db);
-            ingestWithThreadAffineSender(db);
             queryOneShot(db);
             queryWithBinds(db);
             cancelExample(db);
@@ -59,21 +59,24 @@ public static void main(String[] args) throws Exception {
         try (QuestDB db = QuestDB.connect(
                 "wss::addr=db.questdb.cloud:9000;token=YOUR_TOKEN_HERE;")) {
             // ... use db ...
-            db.executeSql("SELECT 1", new PrintingHandler()).await();
+            try (Query q = db.borrowQuery()) {
+                q.sql("SELECT 1").handler(new PrintingHandler()).submit().await();
+            }
         }
 
-        // 3. Custom pool sizing and timeouts via the builder. Use this when
-        //    ingest and egress use separate address lists, or when you need to
-        //    override defaults.
+        // 3. Custom pool sizing and timeouts via the builder. One cluster config
+        //    (a single addr server list) drives both pools; use the builder to
+        //    override pool/timeout defaults.
         try (QuestDB db = QuestDB.builder()
-                .ingestConfig("ws::addr=ingest.cluster:9000;")
-                .queryConfig("ws::addr=read-replica.cluster:9000;")
+                .fromConfig("ws::addr=node1.cluster:9000,node2.cluster:9000;")
                 .senderPoolSize(8)
                 .queryPoolSize(4)
                 .acquireTimeoutMillis(10_000)
                 .build()) {
             // ... use db ...
-            db.executeSql("SELECT 1", new PrintingHandler()).await();
+            try (Query q = db.borrowQuery()) {
+                q.sql("SELECT 1").handler(new PrintingHandler()).submit().await();
+            }
         }
     }
 
@@ -84,15 +87,17 @@ public static void main(String[] args) throws Exception {
      * returns normally; either way the Completion reaches a terminal state.
      */
     static void cancelExample(QuestDB db) {
-        Completion c = db.executeSql(
-                "SELECT * FROM big_table ORDER BY ts",
-                new PrintingHandler());
-        // ... some condition decides to abort ...
-        c.cancel();
-        try {
-            c.await();
-        } catch (Exception cancelled) {
-            // expected when cancel won the race
+        try (Query q = db.borrowQuery()) {
+            Completion c = q.sql("SELECT * FROM big_table ORDER BY ts")
+                    .handler(new PrintingHandler())
+                    .submit();
+            // ... some condition decides to abort ...
+            c.cancel();
+            try {
+                c.await();
+            } catch (Exception cancelled) {
+                // expected when cancel won the race
+            }
         }
     }
 
@@ -113,62 +118,42 @@ static void ingestWithBorrowedSender(QuestDB db) {
     }
 
     /**
-     * Thread-affine Sender: the first call on a thread leases one and pins it;
-     * subsequent calls on the same thread return the same instance with zero
-     * borrow overhead. Best for long-lived dedicated producer threads.
-     * <p>
-     * Call {@link QuestDB#releaseSender()} on threads borrowed from pools you
-     * don't own (Netty event loops, etc.) before they're recycled.
-     */
-    static void ingestWithThreadAffineSender(QuestDB db) {
-        Sender s = db.sender();
-        for (int i = 0; i < 1_000; i++) {
-            s.table("trades")
-                    .symbol("symbol", "BTC-USD")
-                    .doubleColumn("price", 42_500.50 + i)
-                    .longColumn("size", 100)
-                    .atNow();
-        }
-        s.flush();
-        // Not strictly required: db.close() reaps pinned Senders. Call it
-        // only when handing this thread back to a foreign pool.
-        // db.releaseSender();
-    }
-
-    /**
-     * One-shot query, no bind parameters. {@link QuestDB#executeSql} returns
-     * a {@link Completion} that you can {@code await()} synchronously, time
+     * One-shot query, no bind parameters. Borrow a {@link Query} handle,
+     * submit, await, and close it (try-with-resources). {@code submit()}
+     * returns a {@link Completion} you can {@code await()} synchronously, time
      * out on, or cancel.
      */
     static void queryOneShot(QuestDB db) throws InterruptedException {
-        Completion c = db.executeSql(
-                "SELECT price FROM trades WHERE symbol = 'BTC-USD' LIMIT 10",
-                new PrintingHandler());
-        c.await();
+        try (Query q = db.borrowQuery()) {
+            q.sql("SELECT price FROM trades WHERE symbol = 'BTC-USD' LIMIT 10")
+                    .handler(new PrintingHandler())
+                    .submit()
+                    .await();
+        }
     }
 
     /**
-     * Query with bind parameters. Use {@link QuestDB#query()} to get the
-     * per-thread Query builder, then set SQL, binds (via QwpBindSetter), and
-     * handler.
+     * Query with bind parameters. Borrow a {@link Query} handle, then set SQL,
+     * binds (via QwpBindSetter), and handler.
      * <p>
      * The same SQL text reuses the server's compiled-factory cache -- bind
      * values supply the per-call inputs. Interpolating values into the SQL
      * string defeats that cache.
      */
     static void queryWithBinds(QuestDB db) throws InterruptedException {
-        Query q = db.query()
-                .sql("SELECT price FROM trades WHERE symbol = $1 LIMIT $2")
-                .binds(binds -> {
-                    binds.setVarchar(0, "BTC-USD");
-                    binds.setLong(1, 10L);
-                })
-                .handler(new PrintingHandler());
-        Completion c = q.submit();
-        // Optional timeout: returns false if the query is still in flight.
-        if (!c.await(5, TimeUnit.SECONDS)) {
-            c.cancel();
-            c.await();
+        try (Query q = db.borrowQuery()) {
+            q.sql("SELECT price FROM trades WHERE symbol = $1 LIMIT $2")
+                    .binds(binds -> {
+                        binds.setVarchar(0, "BTC-USD");
+                        binds.setLong(1, 10L);
+                    })
+                    .handler(new PrintingHandler());
+            Completion c = q.submit();
+            // Optional timeout: returns false if the query is still in flight.
+            if (!c.await(5, TimeUnit.SECONDS)) {
+                c.cancel();
+                c.await();
+            }
         }
     }
 
diff --git a/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java b/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java
index 34ba4d1a..82b997c5 100644
--- a/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/PoolConfigHonoredTest.java
@@ -54,6 +54,7 @@ public void testEveryPoolKeyIsHonored() {
         expected.put("query_pool_min", 0);
         expected.put("query_pool_max", 5);
         expected.put("acquire_timeout_ms", 1234L);
+        expected.put("query_close_timeout_ms", 2468L);
         expected.put("idle_timeout_ms", 4321L);
         expected.put("max_lifetime_ms", 98765L);
         expected.put("housekeeper_interval_ms", 222L);
@@ -74,6 +75,12 @@ public void testEveryPoolKeyIsHonored() {
         // the assertions above, so a new pool key with no assertion trips this.
         for (ConfigSchema.KeySpec spec : ConfigSchema.all()) {
             if (spec.side() == Side.POOL) {
+                // lazy_connect is a facade flag (build()'s tolerant-startup
+                // branch, covered by QuestDBLazyConnectTest), not a numeric
+                // pool-sizing knob resolved into the snapshot.
+                if ("lazy_connect".equals(spec.name())) {
+                    continue;
+                }
                 Assert.assertTrue("registry pool key '" + spec.name() + "' has no honored assertion",
                         expected.containsKey(spec.name()));
             }
diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java
index 3994a1d2..fae24fb2 100644
--- a/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/QueryClientPoolErrorSafetyTest.java
@@ -33,8 +33,6 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
 
@@ -44,8 +42,8 @@
 // OutOfMemoryError); the old catches let that Error skip cleanup.
 //
 // QwpQueryClient is a concrete class with no fake seam, so these tests inject an
-// Error at the real connect step via the package-private connectHook constructor
-// (reached by reflection -- the main module is declared `open`). fromConfig()
+// Error at the real connect step via the public connectHook constructor.
+// fromConfig()
 // still runs for real, committing the NATIVE_DEFAULT scratch the cleanup must
 // reclaim, so the memory assertions are meaningful.
 public class QueryClientPoolErrorSafetyTest {
@@ -232,30 +230,21 @@ private static Consumer<QueryWorker> alwaysThrowStart() {
         };
     }
 
-    private static int inFlightCreations(QueryClientPool pool) throws Exception {
-        Method m = QueryClientPool.class.getDeclaredMethod("inFlightCreations");
-        m.setAccessible(true);
-        return (int) m.invoke(pool);
+    private static int inFlightCreations(QueryClientPool pool) {
+        return pool.inFlightCreations();
     }
 
     private static QueryClientPool newPool(
             String cfg, int min, int max, long acquireMs, Consumer<QwpQueryClient> connectHook
-    ) throws Exception {
-        Constructor<QueryClientPool> c = QueryClientPool.class.getDeclaredConstructor(
-                String.class, int.class, int.class, long.class, long.class, long.class, Consumer.class);
-        c.setAccessible(true);
-        return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, connectHook);
+    ) {
+        return new QueryClientPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, connectHook);
     }
 
     private static QueryClientPool newPool(
             String cfg, int min, int max, long acquireMs,
             Consumer<QwpQueryClient> connectHook, Consumer<QueryWorker> startHook
-    ) throws Exception {
-        Constructor<QueryClientPool> c = QueryClientPool.class.getDeclaredConstructor(
-                String.class, int.class, int.class, long.class, long.class, long.class,
-                Consumer.class, Consumer.class);
-        c.setAccessible(true);
-        return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE,
+    ) {
+        return new QueryClientPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE,
                 connectHook, startHook);
     }
 }
diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryCloseDrainTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryCloseDrainTest.java
new file mode 100644
index 00000000..7831c02c
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/impl/QueryCloseDrainTest.java
@@ -0,0 +1,167 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test.impl;
+
+import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
+import io.questdb.client.impl.QueryClientPool;
+import io.questdb.client.impl.QueryWorker;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.function.Consumer;
+
+/**
+ * Regression tests for the bounded, interruptible {@code Query.close()} drain.
+ * When a submit is still in flight at close() time, the old drain blocked the
+ * caller unbounded and uninterruptibly on the terminal event (and could hang
+ * forever if a racing {@code QuestDB.close()} stranded it). The drain now waits
+ * at most {@code closeQueryTimeoutMillis}, an interrupt aborts it, and a worker
+ * that fails to drain in time is discarded -- its connection may still carry
+ * late frames for the abandoned query -- rather than returned to the pool.
+ * <p>
+ * White-box style: a no-op connect hook builds workers without a network, and
+ * the in-flight state is simulated by setting {@code QueryImpl.done=false}
+ * reflectively, so no server or real {@code execute()} is needed to exercise
+ * the close() drain logic.
+ */
+public class QueryCloseDrainTest {
+
+    private static final String CFG = "ws::addr=127.0.0.1:1;";
+    private static final Consumer<QwpQueryClient> NO_CONNECT = c -> {
+    };
+
+    @Test(timeout = 30_000)
+    public void testCloseDiscardsWorkerWhenDrainTimesOut() throws Exception {
+        try (QueryClientPool pool = new QueryClientPool(
+                CFG, 0, 2, 1_000L, Long.MAX_VALUE, Long.MAX_VALUE, NO_CONNECT)) {
+            setCloseQueryTimeout(pool, 150L);
+            QueryWorker w = pool.acquire();
+            long gen = generation(w);
+            setDone(w, false); // pretend a submit is in flight; nothing will ever signal done
+
+            long startNanos = System.nanoTime();
+            closeQuery(w, gen);
+            long elapsedMs = (System.nanoTime() - startNanos) / 1_000_000;
+
+            Assert.assertTrue("close() must wait about the close budget, elapsed=" + elapsedMs,
+                    elapsedMs >= 120);
+            Assert.assertTrue("close() must be bounded, not block unbounded, elapsed=" + elapsedMs,
+                    elapsedMs < 5_000);
+            Assert.assertFalse("a worker that did not drain must be discarded, not returned to the pool",
+                    allWorkers(pool).contains(w));
+            Assert.assertEquals("the discarded worker must leave the pool so it can grow a fresh one",
+                    0, allWorkers(pool).size());
+            Assert.assertFalse("the discarded worker's dispatch thread must have exited",
+                    dispatchThread(w).isAlive());
+        }
+    }
+
+    @Test(timeout = 30_000)
+    public void testCloseIsInterruptible() throws Exception {
+        try (QueryClientPool pool = new QueryClientPool(
+                CFG, 0, 2, 1_000L, Long.MAX_VALUE, Long.MAX_VALUE, NO_CONNECT)) {
+            // A long budget: the only way close() can return promptly is by
+            // honoring the caller's interrupt.
+            setCloseQueryTimeout(pool, 60_000L);
+            QueryWorker w = pool.acquire();
+            long gen = generation(w);
+            setDone(w, false);
+
+            Thread.currentThread().interrupt();
+            long startNanos = System.nanoTime();
+            closeQuery(w, gen);
+            long elapsedMs = (System.nanoTime() - startNanos) / 1_000_000;
+
+            Assert.assertTrue("close() must preserve the caller's interrupt flag", Thread.interrupted());
+            Assert.assertTrue("interrupt must abort the drain promptly, elapsed=" + elapsedMs,
+                    elapsedMs < 5_000);
+            Assert.assertFalse("an interrupted close() must discard the worker",
+                    allWorkers(pool).contains(w));
+        }
+    }
+
+    @Test(timeout = 30_000)
+    public void testCloseReturnsWorkerWhenAlreadyDrained() throws Exception {
+        try (QueryClientPool pool = new QueryClientPool(
+                CFG, 0, 2, 1_000L, Long.MAX_VALUE, Long.MAX_VALUE, NO_CONNECT)) {
+            setCloseQueryTimeout(pool, 150L);
+            QueryWorker w = pool.acquire();
+            long gen = generation(w);
+            // done stays true (no in-flight submit): close() must take the fast
+            // path and return the worker to the pool for reuse, not discard it.
+            closeQuery(w, gen);
+            Assert.assertTrue("an already-drained worker must be returned to the pool, not discarded",
+                    allWorkers(pool).contains(w));
+        }
+    }
+
+    @SuppressWarnings("unchecked")
+    private static ArrayList<QueryWorker> allWorkers(QueryClientPool pool) throws Exception {
+        Field f = QueryClientPool.class.getDeclaredField("all");
+        f.setAccessible(true);
+        return (ArrayList<QueryWorker>) f.get(pool);
+    }
+
+    private static void closeQuery(QueryWorker w, long gen) throws Exception {
+        Object impl = queryImpl(w);
+        Method close = impl.getClass().getDeclaredMethod("close", long.class);
+        close.setAccessible(true);
+        close.invoke(impl, gen);
+    }
+
+    private static Thread dispatchThread(QueryWorker w) throws Exception {
+        Field f = QueryWorker.class.getDeclaredField("thread");
+        f.setAccessible(true);
+        return (Thread) f.get(w);
+    }
+
+    private static long generation(QueryWorker w) throws Exception {
+        Method m = QueryWorker.class.getDeclaredMethod("generation");
+        m.setAccessible(true);
+        return (long) m.invoke(w);
+    }
+
+    private static Object queryImpl(QueryWorker w) throws Exception {
+        Field queryF = QueryWorker.class.getDeclaredField("query");
+        queryF.setAccessible(true);
+        return queryF.get(w);
+    }
+
+    private static void setCloseQueryTimeout(QueryClientPool pool, long millis) throws Exception {
+        Field f = QueryClientPool.class.getDeclaredField("closeQueryTimeoutMillis");
+        f.setAccessible(true);
+        f.setLong(pool, millis);
+    }
+
+    private static void setDone(QueryWorker w, boolean done) throws Exception {
+        Object impl = queryImpl(w);
+        Field doneF = impl.getClass().getDeclaredField("done");
+        doneF.setAccessible(true);
+        doneF.setBoolean(impl, done);
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java
index 1ff33b76..f9cd8bc0 100644
--- a/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/QueryImplResetTest.java
@@ -24,11 +24,11 @@
 
 package io.questdb.client.test.impl;
 
-import io.questdb.client.Query;
 import io.questdb.client.cutlass.qwp.client.QwpBindSetter;
 import io.questdb.client.cutlass.qwp.client.QwpColumnBatch;
 import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler;
 import io.questdb.client.cutlass.qwp.client.QwpServerInfo;
+import io.questdb.client.std.str.StringSink;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -39,58 +39,52 @@
 public class QueryImplResetTest {
 
     /**
-     * Regression test for the state-carryover bug between consecutive
-     * submits on the per-thread {@code QuestDB#query()} handle.
+     * The Javadoc on both {@code Query} and {@code QuestDB#borrowQuery()}
+     * promises the leased handle is handed out "reset to empty". The reset is
+     * {@code QueryImpl.resetForBorrow()}, invoked from {@code QueryWorker.lease()}
+     * when {@code borrowQuery()} hands the pre-allocated handle out. It must
+     * clear the builder state (SQL, binds, handler) so a follow-up
+     * {@code submit()} cannot silently reuse a prior borrow's handler/binds,
+     * and it must leave the handle idle (done).
      * <p>
-     * The Javadoc on both {@code Query} and {@code QuestDB#query()} promises
-     * that the returned instance is "reset to empty" / "in a reset state".
-     * Before the fix, {@code QuestDBImpl.query()} returned the bare
-     * thread-local without nulling {@code userHandler} / {@code userBinds},
-     * so the second call below would silently reuse {@code h1}:
-     * <pre>
-     *   db.query().sql("SELECT 1").handler(h1).submit().await();
-     *   db.query().sql("SELECT 2").submit();    // no .handler() -- reuses h1
-     * </pre>
-     * The {@code if (userHandler == null)} check in {@code submit()} could
-     * not catch the misuse because the field was still set from the prior
-     * submit.
-     * <p>
-     * The fix is {@code QueryImpl.resetIfDone()}, invoked from
-     * {@code QuestDBImpl.query()} before the per-thread handle is returned.
-     * This test reaches into {@code QueryImpl} via reflection (the class is
-     * package-private and lives in a different package from this test) and
-     * asserts the reset clears all three configured fields when the prior
-     * run is in a terminal state.
+     * The reset is unconditional: the leased worker was just acquired from the
+     * pool, so it is always idle (done) at borrow time. This test reaches into
+     * {@code QueryImpl} by reflection (the class is package-private and lives
+     * in a different package from this test). Builder state is seeded directly
+     * via reflection rather than through the {@code Query} API because the
+     * lease-generation guard on the setters would dereference the (null) worker.
      */
     @Test
-    public void testResetIfDoneClearsBuilderStateInTerminalState() throws Exception {
+    public void testResetForBorrowClearsBuilderState() throws Exception {
         Class<?> queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl");
-        Class<?> poolClass = Class.forName("io.questdb.client.impl.QueryClientPool");
+        Class<?> workerClass = Class.forName("io.questdb.client.impl.QueryWorker");
 
-        Constructor<?> ctor = queryImplClass.getDeclaredConstructor(poolClass);
+        Constructor<?> ctor = queryImplClass.getDeclaredConstructor(workerClass);
         ctor.setAccessible(true);
-        // QueryImpl never dereferences the pool outside of submit(); a null
-        // pool is fine for this state-only test.
-        Query q = (Query) ctor.newInstance(new Object[]{null});
-
-        // Mirror the post-submit().await() state: builder fields set,
-        // done flag true (the constructor default).
-        QwpColumnBatchHandler h = new NoopHandler();
-        QwpBindSetter b = values -> {
-            // no-op
-        };
-        q.sql("SELECT 1").binds(b).handler(h);
-
-        Method reset = queryImplClass.getDeclaredMethod("resetIfDone");
-        reset.setAccessible(true);
-        reset.invoke(q);
+        // resetForBorrow() never dereferences the worker; a null worker is fine
+        // for this state-only test.
+        Object q = ctor.newInstance(new Object[]{null});
 
         Field handlerF = queryImplClass.getDeclaredField("userHandler");
         Field bindsF = queryImplClass.getDeclaredField("userBinds");
         Field sqlBufF = queryImplClass.getDeclaredField("sqlBuffer");
+        Field doneF = queryImplClass.getDeclaredField("done");
         handlerF.setAccessible(true);
         bindsF.setAccessible(true);
         sqlBufF.setAccessible(true);
+        doneF.setAccessible(true);
+
+        // Seed builder state as a prior borrow would have left it.
+        handlerF.set(q, new NoopHandler());
+        bindsF.set(q, (QwpBindSetter) values -> {
+            // no-op
+        });
+        ((StringSink) sqlBufF.get(q)).put("SELECT 1");
+        doneF.setBoolean(q, false);
+
+        Method reset = queryImplClass.getDeclaredMethod("resetForBorrow");
+        reset.setAccessible(true);
+        reset.invoke(q);
 
         Assert.assertNull("userHandler must be cleared so a follow-up submit() without .handler() fails fast",
                 handlerF.get(q));
@@ -99,53 +93,43 @@ public void testResetIfDoneClearsBuilderStateInTerminalState() throws Exception
         CharSequence sqlBuffer = (CharSequence) sqlBufF.get(q);
         Assert.assertEquals("sqlBuffer must be empty so a follow-up submit() without .sql() throws 'sql is required'",
                 0, sqlBuffer.length());
+        Assert.assertTrue("done must be true so the handle starts idle, not in flight",
+                doneF.getBoolean(q));
     }
 
     /**
-     * Symmetric guard: when a submit is in flight ({@code done == false}),
-     * {@code resetIfDone()} must NOT touch the configured fields. The
-     * dispatched worker thread is reading {@code sqlBuffer} in
-     * {@code runOn()} and {@code userHandler} via the wrapping handler;
-     * clearing them mid-flight would race.
+     * {@code QuestDB#borrowQuery()} returns a thin lease that is freshly
+     * allocated per borrow, but the heavy state it wraps -- the per-worker
+     * {@code QueryImpl} -- is pre-allocated once and reused across borrows. This
+     * pins that contract: two {@code lease()} calls on the same worker return
+     * distinct lease wrappers that delegate to the same pooled {@code QueryImpl}.
+     * Reaches both package-private classes by reflection.
      */
     @Test
-    public void testResetIfDoneIsNoOpWhileSubmitInFlight() throws Exception {
-        Class<?> queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl");
+    public void testLeaseWrapsSamePooledQueryImpl() throws Exception {
+        Class<?> workerClass = Class.forName("io.questdb.client.impl.QueryWorker");
         Class<?> poolClass = Class.forName("io.questdb.client.impl.QueryClientPool");
+        Class<?> clientClass = Class.forName("io.questdb.client.cutlass.qwp.client.QwpQueryClient");
+        Class<?> leaseClass = Class.forName("io.questdb.client.impl.QueryLease");
 
-        Constructor<?> ctor = queryImplClass.getDeclaredConstructor(poolClass);
+        // lease() never dereferences the client or pool (it only resets the
+        // reused QueryImpl and stamps the current generation), so nulls are fine
+        // for this structure-only test -- mirrors the null-worker shortcut above.
+        Constructor<?> ctor = workerClass.getDeclaredConstructor(clientClass, poolClass, int.class);
         ctor.setAccessible(true);
-        Query q = (Query) ctor.newInstance(new Object[]{null});
+        Object worker = ctor.newInstance(null, null, 0);
 
-        QwpColumnBatchHandler h = new NoopHandler();
-        QwpBindSetter b = values -> {
-            // no-op
-        };
-        q.sql("SELECT 1").binds(b).handler(h);
-
-        // Flip the in-flight flag by setting done=false directly.
-        Field doneF = queryImplClass.getDeclaredField("done");
-        doneF.setAccessible(true);
-        doneF.setBoolean(q, false);
+        Method leaseM = workerClass.getDeclaredMethod("lease");
+        leaseM.setAccessible(true);
+        Object leaseA = leaseM.invoke(worker);
+        Object leaseB = leaseM.invoke(worker);
 
-        Method reset = queryImplClass.getDeclaredMethod("resetIfDone");
-        reset.setAccessible(true);
-        reset.invoke(q);
+        Assert.assertNotSame("each borrow must hand back a fresh lease wrapper", leaseA, leaseB);
 
-        Field handlerF = queryImplClass.getDeclaredField("userHandler");
-        Field bindsF = queryImplClass.getDeclaredField("userBinds");
-        Field sqlBufF = queryImplClass.getDeclaredField("sqlBuffer");
-        handlerF.setAccessible(true);
-        bindsF.setAccessible(true);
-        sqlBufF.setAccessible(true);
-
-        Assert.assertSame("userHandler must survive resetIfDone() while a submit is in flight",
-                h, handlerF.get(q));
-        Assert.assertSame("userBinds must survive resetIfDone() while a submit is in flight",
-                b, bindsF.get(q));
-        CharSequence sqlBuffer = (CharSequence) sqlBufF.get(q);
-        Assert.assertEquals("sqlBuffer must survive resetIfDone() while a submit is in flight",
-                "SELECT 1", sqlBuffer.toString());
+        Field implF = leaseClass.getDeclaredField("impl");
+        implF.setAccessible(true);
+        Assert.assertSame("both leases must wrap the same pooled QueryImpl (zero-allocation reuse of the heavy state)",
+                implF.get(leaseA), implF.get(leaseB));
     }
 
     private static final class NoopHandler implements QwpColumnBatchHandler {
diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryLeaseGenerationTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryLeaseGenerationTest.java
new file mode 100644
index 00000000..f9e83fb7
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/impl/QueryLeaseGenerationTest.java
@@ -0,0 +1,273 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test.impl;
+
+import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
+import io.questdb.client.impl.QueryClientPool;
+import io.questdb.client.impl.QueryWorker;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.ArrayDeque;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * Regression tests for M1: a stale {@code Query} lease (held after close, or a
+ * cached {@code Completion}) must not disturb a later borrow of the same
+ * worker. The reused per-worker {@code QueryImpl} alone cannot distinguish a
+ * stale handle from a live one -- the fix stamps each borrow with a monotonic
+ * generation under the pool lock and validates it on close/cancel/release.
+ * <p>
+ * These exercise the package-private internals by reflection (the same
+ * white-box style as the other tests in this package). They construct workers
+ * with a non-connected {@code newPlainText} client and never start the worker
+ * thread, so no network or I/O thread is involved.
+ */
+public class QueryLeaseGenerationTest {
+
+    /**
+     * A stale {@code Completion.cancel()} (its lease long since released and the
+     * worker re-borrowed) must NOT reach the worker's client -- otherwise it
+     * would cancel whatever query the current borrower is running. We observe
+     * "reached the client" via the client's pending-cancel latch, which
+     * {@code QwpQueryClient.cancel()} sets first thing.
+     */
+    @Test
+    public void testStaleCancelDoesNotReachClient() throws Exception {
+        Class<?> workerClass = Class.forName("io.questdb.client.impl.QueryWorker");
+        Class<?> queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl");
+        Method bump = workerClass.getDeclaredMethod("bumpGeneration");
+        bump.setAccessible(true);
+        Field queryF = workerClass.getDeclaredField("query");
+        queryF.setAccessible(true);
+        Field doneF = queryImplClass.getDeclaredField("done");
+        doneF.setAccessible(true);
+        Method cancel = queryImplClass.getDeclaredMethod("cancel", long.class);
+        cancel.setAccessible(true);
+
+        // cancel(gen) validates the generation under the pool lock, so the
+        // worker needs a real pool to lock on (the worker thread is never
+        // started, so no network or I/O thread is involved).
+        QueryClientPool pool = new QueryClientPool(
+                "ws::addr=localhost:9000;",
+                /*minSize*/ 0, /*maxSize*/ 2,
+                /*acquireTimeoutMillis*/ 1_000L,
+                /*idleTimeoutMillis*/ Long.MAX_VALUE,
+                /*maxLifetimeMillis*/ Long.MAX_VALUE);
+        try {
+            // Live lease: generation 1 (one acquire), query in flight -> cancel(1)
+            // must reach the client.
+            try (QwpQueryClient live = QwpQueryClient.newPlainText("localhost", 9000)) {
+                QueryWorker w = new QueryWorker(live, pool, 0);
+                bump.invoke(w); // generation -> 1 (acquire stamp)
+                Object impl = queryF.get(w);
+                doneF.setBoolean(impl, false); // pretend a submit is in flight
+                cancel.invoke(impl, 1L);
+                Assert.assertTrue("cancel() on the live lease must reach the client",
+                        live.isPendingCancelForTest());
+            }
+
+            // Stale lease: the worker was borrowed (gen 1), released and re-borrowed
+            // (gen now 3). A cancel from the old lease (gen 1) must be dropped, even
+            // though the current query is in flight.
+            try (QwpQueryClient reused = QwpQueryClient.newPlainText("localhost", 9000)) {
+                QueryWorker w = new QueryWorker(reused, pool, 0);
+                bump.invoke(w); // -> 1 (first acquire)
+                bump.invoke(w); // -> 2 (release)
+                bump.invoke(w); // -> 3 (second acquire by a new borrower)
+                Object impl = queryF.get(w);
+                doneF.setBoolean(impl, false); // the new borrower's query is in flight
+                cancel.invoke(impl, 1L); // stale lease cancels
+                Assert.assertFalse("a stale lease's cancel() must NOT reach the client and "
+                                + "cancel a different borrower's in-flight query",
+                        reused.isPendingCancelForTest());
+            }
+        } finally {
+            pool.close();
+        }
+    }
+
+    /**
+     * The TOCTOU the locked cancel closes: a cross-thread watchdog calls
+     * {@code cancel(gen)} while its lease is live, but the lease goes stale (the
+     * worker is released and re-borrowed) before the wire cancel fires. The
+     * cancel must re-validate the generation atomically with the cancel, under
+     * the pool lock, or it would abort the new borrower's query.
+     * <p>
+     * Driven deterministically: the test thread holds the pool lock, so the
+     * watchdog's cancel parks inside the pool's generation re-check. We then
+     * advance the generation (release + re-borrow) under the lock and release
+     * it. The parked cancel must observe the new generation and drop. An
+     * unlocked check-then-cancel would not park, would pass its check at the
+     * still-live generation, and would fire the wire cancel.
+     */
+    @Test
+    public void testConcurrentCancelDoesNotReachClientAfterReborrow() throws Exception {
+        Method bump = QueryWorker.class.getDeclaredMethod("bumpGeneration");
+        bump.setAccessible(true);
+        Field queryF = QueryWorker.class.getDeclaredField("query");
+        queryF.setAccessible(true);
+        Class<?> queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl");
+        Field doneF = queryImplClass.getDeclaredField("done");
+        doneF.setAccessible(true);
+        Method cancel = queryImplClass.getDeclaredMethod("cancel", long.class);
+        cancel.setAccessible(true);
+        Field poolLockF = QueryClientPool.class.getDeclaredField("lock");
+        poolLockF.setAccessible(true);
+
+        QueryClientPool pool = new QueryClientPool(
+                "ws::addr=localhost:9000;",
+                /*minSize*/ 0, /*maxSize*/ 2,
+                /*acquireTimeoutMillis*/ 1_000L,
+                /*idleTimeoutMillis*/ Long.MAX_VALUE,
+                /*maxLifetimeMillis*/ Long.MAX_VALUE);
+        QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000);
+        try {
+            final QueryWorker w = new QueryWorker(client, pool, 0);
+            bump.invoke(w); // generation -> 1; the watchdog's lease captured 1
+            final Object impl = queryF.get(w);
+            doneF.setBoolean(impl, false); // a query is in flight
+
+            ReentrantLock poolLock = (ReentrantLock) poolLockF.get(pool);
+            final CountDownLatch atCancel = new CountDownLatch(1);
+            final CountDownLatch cancelReturned = new CountDownLatch(1);
+            final AtomicReference<Throwable> err = new AtomicReference<>();
+
+            // Hold the pool lock so the watchdog's cancel cannot finish its
+            // generation re-check + wire cancel until we let go.
+            poolLock.lock();
+            Thread watchdog = new Thread(() -> {
+                atCancel.countDown();
+                try {
+                    cancel.invoke(impl, 1L); // lease generation captured at borrow = 1
+                } catch (Throwable t) {
+                    err.set(t);
+                } finally {
+                    cancelReturned.countDown();
+                }
+            }, "watchdog-cancel");
+            watchdog.start();
+            Assert.assertTrue("watchdog must start", atCancel.await(5, TimeUnit.SECONDS));
+
+            // With the locked cancel, cancel() parks on the pool lock and cannot
+            // return while we hold it. An unlocked check-then-cancel would have
+            // already fired the wire cancel and returned.
+            Assert.assertFalse("cancel() must re-check the generation under the pool "
+                            + "lock, so it cannot complete while the lock is held",
+                    cancelReturned.await(200, TimeUnit.MILLISECONDS));
+
+            // The lease goes stale underneath the parked cancel: released (-> 2)
+            // and re-borrowed by a new owner (-> 3).
+            bump.invoke(w);
+            bump.invoke(w);
+            poolLock.unlock();
+
+            Assert.assertTrue("cancel() must return once the pool lock is free",
+                    cancelReturned.await(5, TimeUnit.SECONDS));
+            if (err.get() != null) {
+                throw new AssertionError("cancel() threw", err.get());
+            }
+            Assert.assertFalse("a cancel whose lease went stale while parked on the pool "
+                            + "lock must NOT reach the client and abort the new borrower's query",
+                    client.isPendingCancelForTest());
+        } finally {
+            client.close();
+            pool.close();
+        }
+    }
+
+    /**
+     * The pool-wide blast radius of M1: a stale (duplicate / post-reborrow)
+     * release must never enqueue a worker that a live borrower owns, otherwise
+     * the worker sits in {@code available} twice and is handed to two borrowers
+     * at once. The generation captured at borrow time, re-checked under the pool
+     * lock, makes this impossible.
+     */
+    @Test
+    @SuppressWarnings("unchecked")
+    public void testStaleReleaseDoesNotEnqueueWorkerTwice() throws Exception {
+        Class<?> poolClass = Class.forName("io.questdb.client.impl.QueryClientPool");
+        Method release = poolClass.getDeclaredMethod("release", QueryWorker.class, long.class);
+        release.setAccessible(true);
+        Field availableF = poolClass.getDeclaredField("available");
+        availableF.setAccessible(true);
+        Method bump = QueryWorker.class.getDeclaredMethod("bumpGeneration");
+        bump.setAccessible(true);
+        Method generation = QueryWorker.class.getDeclaredMethod("generation");
+        generation.setAccessible(true);
+
+        QueryClientPool pool = new QueryClientPool(
+                "ws::addr=localhost:9000;",
+                /*minSize*/ 0, /*maxSize*/ 2,
+                /*acquireTimeoutMillis*/ 1_000L,
+                /*idleTimeoutMillis*/ Long.MAX_VALUE,
+                /*maxLifetimeMillis*/ Long.MAX_VALUE);
+        QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000);
+        try {
+            ArrayDeque<QueryWorker> available = (ArrayDeque<QueryWorker>) availableF.get(pool);
+            QueryWorker w = new QueryWorker(client, pool, 0);
+
+            // acquire #1 stamps generation 1; the lease (A) captures 1.
+            bump.invoke(w);
+            Assert.assertEquals(1L, generation.invoke(w));
+
+            // close A -> release(w, 1): matches, enqueues once.
+            release.invoke(pool, w, 1L);
+            Assert.assertEquals("valid release must enqueue the worker once", 1, available.size());
+
+            // close A again (duplicate, e.g. explicit close + try-with-resources)
+            // -> release(w, 1): generation already bumped to 2, so it is dropped.
+            release.invoke(pool, w, 1L);
+            Assert.assertEquals("duplicate release of the same lease must be dropped",
+                    1, available.size());
+
+            // acquire #2 hands the worker to a new borrower (B): pull it out and
+            // stamp generation 3.
+            available.pollFirst();
+            bump.invoke(w);
+            Assert.assertEquals(3L, generation.invoke(w));
+
+            // A stray close from the long-dead lease A -> release(w, 1): dropped,
+            // so B's worker is NOT re-enqueued while B still owns it.
+            release.invoke(pool, w, 1L);
+            Assert.assertEquals("a post-reborrow stale release must NOT enqueue the "
+                            + "worker while another borrower owns it",
+                    0, available.size());
+
+            // B's own close -> release(w, 3): matches, enqueues legitimately.
+            release.invoke(pool, w, 3L);
+            Assert.assertEquals("the current borrower's release must still work",
+                    1, available.size());
+        } finally {
+            client.close();
+            pool.close();
+        }
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java b/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java
index e9041448..5a3093a0 100644
--- a/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/QueryWorkerTest.java
@@ -26,16 +26,35 @@
 
 import io.questdb.client.Completion;
 import io.questdb.client.cutlass.qwp.client.QwpQueryClient;
+import io.questdb.client.impl.QueryClientPool;
 import io.questdb.client.impl.QueryWorker;
 import org.junit.Assert;
 import org.junit.Test;
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 
+/**
+ * Unit tests for {@link QueryWorker}.
+ * <p>
+ * Coverage boundary: the lost-dispatch fix for the single-flight-reuse race
+ * (clearing {@code current} under {@code signalLock} at the moment of
+ * consumption rather than in a post-{@code runOn()} finally) has no
+ * deterministic unit reproduction here. Reproducing the clobber needs the
+ * worker to be mid-{@code runOn(client)} when the user thread re-dispatches on
+ * the same lease, which requires a live query client to drive
+ * {@code client.execute(...)} to its terminal callback. That regression is
+ * guarded end-to-end by {@code QuestDBFacadeE2ETest.testSustainedMixedConcurrency}
+ * in the parent questdb repo (more threads than pool slots, repeated
+ * submit/await per lease). {@link #testShutdownRacingDispatchMustNotStrandCaller()}
+ * below covers the adjacent but distinct shutdown-vs-dispatch branch only --
+ * reverting the lost-dispatch hunk would not fail it.
+ */
 public class QueryWorkerTest {
 
     /**
@@ -68,14 +87,14 @@ public void testClientGetterReturnsConstructorInstance() {
      * state directly: it parks the worker on its condition, then takes the
      * worker's own {@code signalLock} and atomically sets both
      * {@code current} and {@code shuttingDown} before signalling. After the
-     * worker thread exits, the test asserts the {@link Completion} has been
-     * signalled. Today the assertion fails because the run loop's early
-     * return strands the {@code QueryImpl}.
+     * worker thread exits, the test asserts the {@code QueryImpl} was signalled
+     * to done. Without the fix the assertion fails because the run loop's early
+     * return strands the {@code QueryImpl} with {@code done==false}, so any
+     * caller blocked in {@code Completion.await()} would hang forever.
      */
     @Test(timeout = 30_000)
     public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception {
         Class<?> queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl");
-        Class<?> poolClass = Class.forName("io.questdb.client.impl.QueryClientPool");
 
         Field lockF = QueryWorker.class.getDeclaredField("signalLock");
         Field condF = QueryWorker.class.getDeclaredField("signalCondition");
@@ -87,9 +106,9 @@ public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception {
         }
 
         Field doneF = queryImplClass.getDeclaredField("done");
-        Field completionF = queryImplClass.getDeclaredField("completion");
+        Field unexpectedF = queryImplClass.getDeclaredField("unexpectedError");
         doneF.setAccessible(true);
-        completionF.setAccessible(true);
+        unexpectedF.setAccessible(true);
 
         // No QwpQueryClient is constructed here: runLoop exits at the
         // shuttingDown check before reaching the first reference to
@@ -123,11 +142,10 @@ public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception {
 
         // Construct a QueryImpl with done=false, mimicking the state set up
         // by QueryImpl.submit() just before it calls worker.dispatch().
-        Constructor<?> ctor = queryImplClass.getDeclaredConstructor(poolClass);
+        Constructor<?> ctor = queryImplClass.getDeclaredConstructor(QueryWorker.class);
         ctor.setAccessible(true);
         Object queryImpl = ctor.newInstance(new Object[]{null});
         doneF.setBoolean(queryImpl, false);
-        Completion completion = (Completion) completionF.get(queryImpl);
 
         // Atomically force the racy state under the worker's own lock:
         // current set AND shuttingDown set before the worker wakes.
@@ -145,20 +163,99 @@ public void testShutdownRacingDispatchMustNotStrandCaller() throws Exception {
         Assert.assertFalse("worker thread did not exit after shuttingDown=true",
                 t.isAlive());
 
-        // The Completion must have been signalled. Without the fix, await(2s)
-        // returns false because signalDone is never called.
-        boolean completed;
-        try {
-            completed = completion.await(2, TimeUnit.SECONDS);
-        } catch (RuntimeException expectedAfterFix) {
-            // Once fixed, the worker is expected to call signalUnexpected
-            // with a QueryException("QuestDB handle is closed") which
-            // await() rethrows. Either form of "completed" is acceptable;
-            // the bug is the silent hang.
-            completed = true;
-        }
+        // The QueryImpl must have been signalled to done. Without the fix,
+        // done stays false because signalDone is never called, so a caller in
+        // Completion.await() would hang forever. The worker reaches the
+        // shutdown-race branch and calls signalUnexpected("QuestDB handle is
+        // closed"), which sets done=true and records the unexpected error.
         Assert.assertTrue("BUG: QueryWorker.runLoop returned with shuttingDown=true "
                 + "while current!=null, never invoking runOn or signalUnexpected. "
-                + "The caller's Completion.await() hangs forever.", completed);
+                + "The caller's Completion.await() hangs forever.", doneF.getBoolean(queryImpl));
+        Assert.assertNotNull("signalUnexpected must record the closed-handle error",
+                unexpectedF.get(queryImpl));
+    }
+
+    /**
+     * Result handlers (onBatch/onEnd/onError) run inline on the worker's
+     * dispatch thread. The blocking lease ops -- {@code close()} and the two
+     * {@code await()} variants -- would there wait on a terminal event that
+     * only this same thread can deliver, a permanent self-deadlock. The
+     * reentrancy guard must turn that into an immediate IllegalStateException.
+     * <p>
+     * The guard compares {@code Thread.currentThread()} to the worker's
+     * dispatch thread, so this test points that field at the test thread (the
+     * worker is never started) to stand in for a reentrant in-handler call.
+     * Without the guard, {@code close()}/{@code await()} would park forever and
+     * the method-level timeout would fail the test.
+     */
+    @Test(timeout = 30_000)
+    public void testCloseAndAwaitFromWorkerThreadThrowInsteadOfDeadlocking() throws Exception {
+        Class<?> queryImplClass = Class.forName("io.questdb.client.impl.QueryImpl");
+        Field queryF = QueryWorker.class.getDeclaredField("query");
+        queryF.setAccessible(true);
+        Field threadF = QueryWorker.class.getDeclaredField("thread");
+        threadF.setAccessible(true);
+        Field doneF = queryImplClass.getDeclaredField("done");
+        doneF.setAccessible(true);
+        Method bump = QueryWorker.class.getDeclaredMethod("bumpGeneration");
+        bump.setAccessible(true);
+        Method isWorker = QueryWorker.class.getDeclaredMethod("isCurrentThreadWorker");
+        isWorker.setAccessible(true);
+        Method close = queryImplClass.getDeclaredMethod("close", long.class);
+        close.setAccessible(true);
+        Method awaitNoTimeout = queryImplClass.getDeclaredMethod("await", long.class);
+        awaitNoTimeout.setAccessible(true);
+        Method awaitTimed = queryImplClass.getDeclaredMethod("await", long.class, long.class, TimeUnit.class);
+        awaitTimed.setAccessible(true);
+
+        QueryClientPool pool = new QueryClientPool(
+                "ws::addr=localhost:9000;",
+                /*minSize*/ 0, /*maxSize*/ 2,
+                /*acquireTimeoutMillis*/ 1_000L,
+                /*idleTimeoutMillis*/ Long.MAX_VALUE,
+                /*maxLifetimeMillis*/ Long.MAX_VALUE);
+        QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000);
+        try {
+            QueryWorker w = new QueryWorker(client, pool, 0);
+            bump.invoke(w); // generation -> 1: a live lease
+            Object impl = queryF.get(w);
+            doneF.setBoolean(impl, false); // a submit is in flight, as during a handler
+
+            // Off the worker thread the guard must NOT fire.
+            Assert.assertFalse("guard must not fire on a normal caller thread",
+                    (Boolean) isWorker.invoke(w));
+
+            // Stand in for a reentrant call from inside a result handler: the
+            // guard compares Thread.currentThread() to the worker's dispatch
+            // thread, so point that field at this thread.
+            threadF.set(w, Thread.currentThread());
+            Assert.assertTrue((Boolean) isWorker.invoke(w));
+
+            assertThrowsHandlerReentry("close", () -> close.invoke(impl, 1L));
+            assertThrowsHandlerReentry("await", () -> awaitNoTimeout.invoke(impl, 1L));
+            assertThrowsHandlerReentry("await(timeout)",
+                    () -> awaitTimed.invoke(impl, 1L, 5L, TimeUnit.SECONDS));
+        } finally {
+            client.close();
+            pool.close();
+        }
+    }
+
+    private static void assertThrowsHandlerReentry(String op, ReflectiveCall call) throws Exception {
+        try {
+            call.run();
+            Assert.fail(op + "() from the worker thread must throw, not block/deadlock");
+        } catch (InvocationTargetException e) {
+            Throwable cause = e.getCause();
+            Assert.assertTrue(op + "(): expected IllegalStateException, was " + cause,
+                    cause instanceof IllegalStateException);
+            Assert.assertTrue(op + "(): message must point at cancel(), was: " + cause.getMessage(),
+                    cause.getMessage().contains("cancel()"));
+        }
+    }
+
+    @FunctionalInterface
+    private interface ReflectiveCall {
+        void run() throws Exception;
     }
 }
diff --git a/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java b/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java
index 93b10301..533360be 100644
--- a/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/QuestDBImplErrorSafetyTest.java
@@ -30,8 +30,6 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Proxy;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.Consumer;
@@ -48,9 +46,9 @@
 //
 // Sender is an interface, faked with a Proxy whose close() flips a flag, injected
 // via the SenderPool senderFactory seam. The connect Error is injected via the
-// QueryClientPool connectHook seam. Both are passed through the package-private
-// QuestDBImpl seam constructor (reached by reflection -- the main module is
-// declared `open`); production callers pass null for both.
+// QueryClientPool connectHook seam. Both are passed through the @TestOnly public
+// QuestDBImpl seam constructor; production uses the public overload that passes
+// null for both.
 public class QuestDBImplErrorSafetyTest {
 
     // Non-SF http config: the SenderPool factory replaces the build, but the
@@ -122,33 +120,15 @@ private static Sender fakeSender(AtomicBoolean closedFlag) {
 
     private static QuestDBImpl newQuestDB(
             IntFunction<Sender> senderFactory, Consumer<QwpQueryClient> connectHook
-    ) throws Exception {
-        Constructor<QuestDBImpl> c = QuestDBImpl.class.getDeclaredConstructor(
-                String.class, String.class, int.class, int.class, int.class, int.class,
-                long.class, long.class, long.class, long.class,
-                IntFunction.class, Consumer.class);
-        c.setAccessible(true);
-        try {
-            return c.newInstance(
-                    SENDER_CFG, QUERY_CFG,
-                    /*senderMin*/ 1, /*senderMax*/ 1,
-                    /*queryMin*/ 1, /*queryMax*/ 1,
-                    /*acquireTimeoutMillis*/ 250L,
-                    /*idleTimeoutMillis*/ Long.MAX_VALUE,
-                    /*maxLifetimeMillis*/ Long.MAX_VALUE,
-                    /*housekeeperIntervalMillis*/ Long.MAX_VALUE,
-                    senderFactory, connectHook);
-        } catch (InvocationTargetException e) {
-            // Unwrap so the caller sees the real construction failure (Error or
-            // RuntimeException), matching a direct constructor invocation.
-            Throwable cause = e.getCause();
-            if (cause instanceof RuntimeException) {
-                throw (RuntimeException) cause;
-            }
-            if (cause instanceof Error) {
-                throw (Error) cause;
-            }
-            throw e;
-        }
+    ) {
+        return new QuestDBImpl(
+                SENDER_CFG, QUERY_CFG,
+                /*senderMin*/ 1, /*senderMax*/ 1,
+                /*queryMin*/ 1, /*queryMax*/ 1,
+                /*acquireTimeoutMillis*/ 250L,
+                /*idleTimeoutMillis*/ Long.MAX_VALUE,
+                /*maxLifetimeMillis*/ Long.MAX_VALUE,
+                /*housekeeperIntervalMillis*/ Long.MAX_VALUE,
+                senderFactory, connectHook);
     }
 }
diff --git a/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java b/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java
index c5c5edb7..e6f5eb69 100644
--- a/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/QwpQueryClientConfigHonoredTest.java
@@ -67,6 +67,7 @@ public void testEveryEgressKeyIsHonored() {
         assertHonored("zone=us-east", "zone", "us-east");
         // COMMON applied by egress.
         assertHonored("auth_timeout_ms=7777", "auth_timeout_ms", 7777L);
+        assertHonored("connect_timeout=6000", "connect_timeout", 6000);
 
         // Credentials become the Authorization header, including the user/pass aliases.
         String basic = "Basic " + Base64.getEncoder()
diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderLeaseGenerationTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderLeaseGenerationTest.java
new file mode 100644
index 00000000..5deeb5ac
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/impl/SenderLeaseGenerationTest.java
@@ -0,0 +1,147 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test.impl;
+
+import io.questdb.client.Sender;
+import io.questdb.client.impl.PooledSender;
+import io.questdb.client.impl.SenderPool;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.ArrayDeque;
+
+/**
+ * Ingest-side mirror of {@code QueryLeaseGenerationTest}: a stale pooled-Sender
+ * handle (held after close, with the slot since re-borrowed) must not disturb a
+ * later borrow of the same slot. {@code PooledSender} is now a fresh per-borrow
+ * wrapper carrying the lease generation; the reused {@code SenderSlot} validates
+ * it under the pool lock so a stale close/write is dropped.
+ * <p>
+ * Reaches package-private internals by reflection (same white-box style as the
+ * other tests here); {@code SenderSlot} is constructed with a {@code null}
+ * delegate, which the paths under test never dereference.
+ */
+public class SenderLeaseGenerationTest {
+
+    private static final String DEAD_HTTP_CONFIG =
+            "http::addr=127.0.0.1:1;protocol_version=2;auto_flush=off;";
+
+    /**
+     * The pool-wide blast radius: a stale (duplicate / post-reborrow) close must
+     * never enqueue a slot a live borrower owns, or two borrowers would write
+     * into one delegate's buffer at once. {@code giveBack} validates the lease
+     * generation under the pool lock, so this is impossible.
+     */
+    @Test
+    @SuppressWarnings("unchecked")
+    public void testStaleGiveBackDoesNotEnqueueSlotTwice() throws Exception {
+        Class<?> slotClass = Class.forName("io.questdb.client.impl.SenderSlot");
+        Constructor<?> slotCtor = slotClass.getDeclaredConstructor(Sender.class, SenderPool.class, int.class);
+        slotCtor.setAccessible(true);
+        Method bump = slotClass.getDeclaredMethod("bumpGeneration");
+        bump.setAccessible(true);
+        Method generation = slotClass.getDeclaredMethod("generation");
+        generation.setAccessible(true);
+        Constructor<PooledSender> leaseCtor =
+                PooledSender.class.getDeclaredConstructor(slotClass, long.class);
+        leaseCtor.setAccessible(true);
+        Field availableF = SenderPool.class.getDeclaredField("available");
+        availableF.setAccessible(true);
+
+        try (SenderPool pool = new SenderPool(
+                DEAD_HTTP_CONFIG, /*minSize*/ 0, /*maxSize*/ 2,
+                /*acquireTimeoutMillis*/ 1_000L,
+                /*idleTimeoutMillis*/ Long.MAX_VALUE,
+                /*maxLifetimeMillis*/ Long.MAX_VALUE)) {
+            ArrayDeque<Object> available = (ArrayDeque<Object>) availableF.get(pool);
+            Object slot = slotCtor.newInstance(null, pool, -1);
+
+            // borrow #1 stamps generation 1; lease A captures 1.
+            bump.invoke(slot);
+            Assert.assertEquals(1L, generation.invoke(slot));
+            PooledSender leaseA = leaseCtor.newInstance(slot, 1L);
+
+            // close A -> giveBack(A): matches, enqueues once.
+            pool.giveBack(leaseA);
+            Assert.assertEquals("valid close must enqueue the slot once", 1, available.size());
+
+            // duplicate close A (e.g. explicit close + try-with-resources)
+            // -> giveBack(A): generation already bumped to 2, so it is dropped.
+            pool.giveBack(leaseA);
+            Assert.assertEquals("duplicate close of the same lease must be dropped",
+                    1, available.size());
+
+            // borrow #2 hands the slot to a new borrower B: pull it out, stamp 3.
+            available.pollFirst();
+            bump.invoke(slot);
+            Assert.assertEquals(3L, generation.invoke(slot));
+            PooledSender leaseB = leaseCtor.newInstance(slot, 3L);
+
+            // A stray close from the long-dead lease A -> dropped, so B's slot is
+            // NOT re-enqueued while B still owns it.
+            pool.giveBack(leaseA);
+            Assert.assertEquals("a post-reborrow stale close must NOT enqueue the slot "
+                    + "while another borrower owns it", 0, available.size());
+
+            // B's own close -> giveBack(B): matches, enqueues legitimately.
+            pool.giveBack(leaseB);
+            Assert.assertEquals("the current borrower's close must still work",
+                    1, available.size());
+        }
+    }
+
+    /**
+     * A stale lease's data write must be rejected (not silently land in a slot a
+     * later borrower now owns). The generation guard in
+     * {@code SenderSlot.live()} throws before the delegate is touched.
+     */
+    @Test
+    public void testStaleWriteIsRejected() throws Exception {
+        Class<?> slotClass = Class.forName("io.questdb.client.impl.SenderSlot");
+        Constructor<?> slotCtor = slotClass.getDeclaredConstructor(Sender.class, SenderPool.class, int.class);
+        slotCtor.setAccessible(true);
+        Method bump = slotClass.getDeclaredMethod("bumpGeneration");
+        bump.setAccessible(true);
+        Constructor<PooledSender> leaseCtor =
+                PooledSender.class.getDeclaredConstructor(slotClass, long.class);
+        leaseCtor.setAccessible(true);
+
+        Object slot = slotCtor.newInstance(null, null, -1);
+        bump.invoke(slot); // generation -> 1, lease A captures 1
+        PooledSender leaseA = leaseCtor.newInstance(slot, 1L);
+        bump.invoke(slot); // released
+        bump.invoke(slot); // re-borrowed -> generation 3
+
+        try {
+            leaseA.table("x");
+            Assert.fail("a stale lease's write must throw, not reach the re-borrowed slot");
+        } catch (IllegalStateException expected) {
+            Assert.assertTrue(expected.getMessage(), expected.getMessage().contains("closed"));
+        }
+    }
+}
diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java
index b7b56e7a..6c4ae2d5 100644
--- a/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/SenderPoolErrorSafetyTest.java
@@ -29,7 +29,6 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.lang.reflect.Constructor;
 import java.lang.reflect.Proxy;
 import java.nio.file.Paths;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -246,10 +245,7 @@ private static Sender fakeSender(AtomicBoolean closedFlag) {
 
     private static SenderPool newPool(
             String cfg, int min, int max, long acquireMs, IntFunction<Sender> senderFactory
-    ) throws Exception {
-        Constructor<SenderPool> c = SenderPool.class.getDeclaredConstructor(
-                String.class, int.class, int.class, long.class, long.class, long.class, IntFunction.class);
-        c.setAccessible(true);
-        return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory);
+    ) {
+        return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory);
     }
 }
diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java
index e4b2b49a..2c76997d 100644
--- a/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/SenderPoolSfTest.java
@@ -43,7 +43,6 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
@@ -207,7 +206,10 @@ public void testReturnedSenderReusesSameSlot() throws Exception {
                     first.close();
                     PooledSender second = pool.borrow();
                     try {
-                        Assert.assertSame("returned slot must be recycled", first, second);
+                        // borrow() now returns a fresh wrapper each time; the
+                        // recycled thing is the underlying slot.
+                        Assert.assertSame("returned slot must be recycled",
+                                getField(first, "slot"), getField(second, "slot"));
                         Assert.assertEquals("no new slot dir on recycle", 1, countSlotDirs());
                         Assert.assertTrue(Files.exists(slot("default-0")));
                     } finally {
@@ -1883,9 +1885,12 @@ private static void rmDir(String dir) {
     }
 
     private static Sender getDelegate(PooledSender ps) throws Exception {
-        Field f = PooledSender.class.getDeclaredField("delegate");
+        Field slotF = PooledSender.class.getDeclaredField("slot");
+        slotF.setAccessible(true);
+        Object slot = slotF.get(ps);
+        Field f = slot.getClass().getDeclaredField("delegate");
         f.setAccessible(true);
-        return (Sender) f.get(ps);
+        return (Sender) f.get(slot);
     }
 
     // Invokes one of the pool's private managed-slot delegate factories
@@ -1931,27 +1936,20 @@ private static void invokeDiscardBroken(SenderPool pool, PooledSender ps) throws
         m.invoke(pool, ps);
     }
 
-    // Reaches the package-private senderFactory test seam by reflection so a
-    // test can inject a fake/forged delegate (mirrors SenderPoolErrorSafetyTest).
+    // Uses the @TestOnly senderFactory seam so a test can inject a fake/forged
+    // delegate (mirrors SenderPoolErrorSafetyTest).
     private static SenderPool newPoolWithFactory(
             String cfg, int min, int max, long acquireMs, IntFunction<Sender> senderFactory
-    ) throws Exception {
-        Constructor<SenderPool> c = SenderPool.class.getDeclaredConstructor(
-                String.class, int.class, int.class, long.class, long.class, long.class, IntFunction.class);
-        c.setAccessible(true);
-        return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory);
+    ) {
+        return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, senderFactory);
     }
 
-    // Reaches the package-private 8-arg constructor (deferStartupRecovery=true)
-    // by reflection so a test can build a pool whose SF startup recovery is NOT
-    // run inline -- mirroring the pooled QuestDB handle, which defers it to the
-    // housekeeper. senderFactory=null -> the real defaultSender().
-    private static SenderPool newDeferredPool(String cfg, int min, int max, long acquireMs) throws Exception {
-        Constructor<SenderPool> c = SenderPool.class.getDeclaredConstructor(
-                String.class, int.class, int.class, long.class, long.class, long.class,
-                IntFunction.class, boolean.class);
-        c.setAccessible(true);
-        return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, null, true);
+    // Uses the @TestOnly 8-arg constructor (deferStartupRecovery=true) so a test
+    // can build a pool whose SF startup recovery is NOT run inline -- mirroring
+    // the pooled QuestDB handle, which defers it to the housekeeper.
+    // senderFactory=null -> the real defaultSender().
+    private static SenderPool newDeferredPool(String cfg, int min, int max, long acquireMs) {
+        return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, null, true);
     }
 
     // Drives a deferred pool's startup recovery to completion (the housekeeper
@@ -1982,12 +1980,8 @@ private static void invokeMarkClosing(SenderPool pool) throws Exception {
     // test can drive the housekeeper recovery path against fully controlled
     // (fake) recoverers.
     private static SenderPool newDeferredPoolWithFactory(
-            String cfg, int min, int max, long acquireMs, IntFunction<Sender> factory) throws Exception {
-        Constructor<SenderPool> c = SenderPool.class.getDeclaredConstructor(
-                String.class, int.class, int.class, long.class, long.class, long.class,
-                IntFunction.class, boolean.class);
-        c.setAccessible(true);
-        return c.newInstance(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, factory, true);
+            String cfg, int min, int max, long acquireMs, IntFunction<Sender> factory) {
+        return new SenderPool(cfg, min, max, acquireMs, Long.MAX_VALUE, Long.MAX_VALUE, factory, true);
     }
 
     // Fake Sender whose drain() (for slot 0 only) parks until released, opening a
diff --git a/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java b/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java
index 85952f85..3f16b965 100644
--- a/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/SenderPoolTest.java
@@ -34,10 +34,7 @@
 
 import java.lang.reflect.Field;
 import java.lang.reflect.Proxy;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
 
 /**
  * Unit tests for the {@link SenderPool} borrow/return semantics. Uses the
@@ -57,26 +54,36 @@ public class SenderPoolTest {
             "http::addr=127.0.0.1:1;protocol_version=2;auto_flush=off;";
 
     @Test
-    public void testBorrowReturnRecyclesSameDecorator() {
+    public void testBorrowReturnRecyclesSameDecorator() throws Exception {
         try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE)) {
             Sender first = pool.borrow();
             first.close();
             Sender second = pool.borrow();
-            Assert.assertSame("returned decorator should be reused after close()", first, second);
+            // Each borrow is a fresh PooledSender wrapper; what the pool recycles
+            // is the underlying slot, so compare those rather than the handles.
+            Assert.assertSame("returned slot should be recycled after close()",
+                    slotOf(first), slotOf(second));
             second.close();
         }
     }
 
+    private static Object slotOf(Sender pooledWrapper) throws Exception {
+        Field f = PooledSender.class.getDeclaredField("slot");
+        f.setAccessible(true);
+        return f.get(pooledWrapper);
+    }
+
     @Test
-    public void testBrokenSenderIsNotReturnedToPool() {
+    public void testBrokenSenderIsNotReturnedToPool() throws Exception {
         // Borrowing, buffering a row, and then closing forces flush() against
-        // the unreachable address, which throws. The broken wrapper must not
-        // be returned to the pool: its delegate's buffer still holds the
-        // failed row, and on transports with terminal-failure semantics the
-        // delegate is also unusable. Either way, the next borrower must get
-        // a fresh wrapper.
+        // the unreachable address, which throws. The broken slot must not be
+        // returned to the pool: its delegate's buffer still holds the failed
+        // row, and on transports with terminal-failure semantics the delegate
+        // is also unusable. Either way, the next borrower must get a fresh
+        // slot, not the broken one.
         try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE)) {
             Sender first = pool.borrow();
+            Object firstSlot = slotOf(first);
             first.table("t").longColumn("v", 1).atNow();
             try {
                 first.close();
@@ -86,11 +93,23 @@ public void testBrokenSenderIsNotReturnedToPool() {
             }
             Sender second = pool.borrow();
             try {
-                Assert.assertNotSame("broken sender must not be handed back to next borrower",
-                        first, second);
+                // borrow() always hands out a FRESH PooledSender wrapper, so
+                // assertNotSame(first, second) on the wrappers is vacuously
+                // true and proves nothing -- it stays true whether or not the
+                // broken slot was discarded. What the pool recycles is the
+                // underlying slot, so a broken slot leaking back to the next
+                // borrower shows up as the SAME slot. Assert the slot differs.
+                Assert.assertNotSame("broken slot must not be handed back to next borrower",
+                        firstSlot, slotOf(second));
             } finally {
-                if (second != first) {
+                // On the failing path (broken slot recycled) second.close()
+                // re-throws, since its delegate's buffer still holds the
+                // failed row; swallow it so the assertion above is what
+                // surfaces rather than this incidental close() failure.
+                try {
                     second.close();
+                } catch (LineSenderException ignored) {
+                    // expected only when the regression is present
                 }
             }
         }
@@ -319,180 +338,6 @@ public void testReapIdleRespectsMinSize() throws InterruptedException {
         }
     }
 
-    @Test
-    public void testPinAfterCloseRejectsStaleEntry() throws Exception {
-        // Pin from a worker thread, close the pool from main. The worker's
-        // ThreadLocal still references its PooledSender, but the underlying
-        // delegate has been closed. The next pinToCurrentThread() on the
-        // worker must reject the stale entry instead of handing it back.
-        SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE);
-        CountDownLatch pinned = new CountDownLatch(1);
-        CountDownLatch closed = new CountDownLatch(1);
-        AtomicReference<Throwable> secondCallError = new AtomicReference<>();
-        Thread worker = new Thread(() -> {
-            try {
-                pool.pinToCurrentThread();
-                pinned.countDown();
-                Assert.assertTrue(closed.await(2, TimeUnit.SECONDS));
-                try {
-                    pool.pinToCurrentThread();
-                    secondCallError.set(new AssertionError("pinToCurrentThread after close must throw"));
-                } catch (LineSenderException e) {
-                    // expected
-                }
-            } catch (Throwable t) {
-                secondCallError.set(t);
-            }
-        });
-        worker.start();
-        Assert.assertTrue(pinned.await(2, TimeUnit.SECONDS));
-        pool.close();
-        closed.countDown();
-        worker.join(2_000);
-        if (secondCallError.get() != null) {
-            throw new AssertionError(secondCallError.get());
-        }
-    }
-
-    @Test
-    public void testPinAfterUserCloseDoesNotShareWrapper() {
-        // Same-thread reproducer for the pinToCurrentThread() sharing bug.
-        // The user closes a pinned Sender (the natural try-with-resources
-        // idiom on the public Sender API), then another consumer borrows
-        // the slot. pinToCurrentThread() must not hand that wrapper back:
-        // it is now owned by the second consumer.
-        //
-        // Pool size 1 collapses the race window into a linear sequence:
-        // the second borrower deterministically receives the same slot
-        // that was just returned, so the bug is observable at the
-        // wrapper-identity level without timing.
-        try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 100, Long.MAX_VALUE, Long.MAX_VALUE)) {
-            Sender pinned = pool.pinToCurrentThread();
-            pinned.close();                                   // pool slot returned; ThreadLocal still points at it
-            Sender stolen = pool.borrow();                    // pollFirst hands the same wrapper to a new consumer
-            try {
-                Sender repinned = pool.pinToCurrentThread();
-                Assert.fail("pinToCurrentThread() returned wrapper " + repinned
-                        + " already borrowed by another consumer " + stolen);
-            } catch (LineSenderException expected) {
-                // After fix: TL cleared (or owner-thread invalidated) on close;
-                // re-pin tries to borrow, pool is empty, acquireTimeout fires.
-            } finally {
-                stolen.close();
-            }
-        }
-    }
-
-    @Test
-    public void testPinAfterUserCloseDoesNotShareWrapperCrossThread() throws InterruptedException {
-        // Cross-thread variant of the same bug, mirroring the originally
-        // reported trigger: Thread A pins, closes, then re-pins while
-        // Thread B has borrowed the slot in between. A's ThreadLocal still
-        // references the wrapper, and pinToCurrentThread() hands it back --
-        // so A and B end up writing to the same underlying Sender.
-        try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 100, Long.MAX_VALUE, Long.MAX_VALUE)) {
-            CountDownLatch aClosed = new CountDownLatch(1);
-            CountDownLatch bBorrowed = new CountDownLatch(1);
-            AtomicReference<Sender> bSender = new AtomicReference<>();
-            AtomicReference<Throwable> failure = new AtomicReference<>();
-
-            Thread a = new Thread(() -> {
-                try {
-                    Sender s = pool.pinToCurrentThread();
-                    s.close();
-                    aClosed.countDown();
-                    Assert.assertTrue(bBorrowed.await(2, TimeUnit.SECONDS));
-                    try {
-                        Sender repinned = pool.pinToCurrentThread();
-                        failure.compareAndSet(null, new AssertionError(
-                                "pinToCurrentThread() returned wrapper " + repinned
-                                        + " already borrowed by another thread " + bSender.get()));
-                    } catch (LineSenderException expected) {
-                        // After fix: re-pin tries to borrow, pool is empty, times out.
-                    }
-                } catch (Throwable t) {
-                    failure.compareAndSet(null, t);
-                }
-            });
-            Thread b = new Thread(() -> {
-                try {
-                    Assert.assertTrue(aClosed.await(2, TimeUnit.SECONDS));
-                    bSender.set(pool.borrow());
-                } catch (Throwable t) {
-                    failure.compareAndSet(null, t);
-                } finally {
-                    bBorrowed.countDown();
-                }
-            });
-
-            a.start();
-            b.start();
-            a.join(4_000);
-            b.join(4_000);
-
-            if (bSender.get() != null) {
-                bSender.get().close();
-            }
-            if (failure.get() != null) {
-                throw new AssertionError(failure.get());
-            }
-        }
-    }
-
-    @Test
-    public void testReleaseAfterCloseIsSafe() throws Exception {
-        // Same setup as the pin test, but exercise releaseCurrentThread()
-        // instead. With a closed delegate underneath, the release path must
-        // not invoke flush() on the dead Sender.
-        SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 1, 1, 1_000, Long.MAX_VALUE, Long.MAX_VALUE);
-        CountDownLatch pinned = new CountDownLatch(1);
-        CountDownLatch closed = new CountDownLatch(1);
-        AtomicReference<Throwable> releaseError = new AtomicReference<>();
-        Thread worker = new Thread(() -> {
-            try {
-                pool.pinToCurrentThread();
-                pinned.countDown();
-                Assert.assertTrue(closed.await(2, TimeUnit.SECONDS));
-                pool.releaseCurrentThread();
-            } catch (Throwable t) {
-                releaseError.set(t);
-            }
-        });
-        worker.start();
-        Assert.assertTrue(pinned.await(2, TimeUnit.SECONDS));
-        pool.close();
-        closed.countDown();
-        worker.join(2_000);
-        if (releaseError.get() != null) {
-            throw new AssertionError(releaseError.get());
-        }
-    }
-
-    @Test
-    public void testThreadAffinityIsPerThread() throws InterruptedException {
-        try (SenderPool pool = new SenderPool(DEAD_HTTP_CONFIG, 2, 2, 1_000, Long.MAX_VALUE, Long.MAX_VALUE)) {
-            Sender mainPinned = pool.pinToCurrentThread();
-            Assert.assertSame("re-pin on same thread returns same instance",
-                    mainPinned, pool.pinToCurrentThread());
-
-            AtomicReference<Sender> otherPinned = new AtomicReference<>();
-            CountDownLatch done = new CountDownLatch(1);
-            Thread t = new Thread(() -> {
-                try {
-                    otherPinned.set(pool.pinToCurrentThread());
-                } finally {
-                    done.countDown();
-                }
-            });
-            t.start();
-            Assert.assertTrue(done.await(2, TimeUnit.SECONDS));
-            Assert.assertNotSame("different threads must get different pinned Senders",
-                    mainPinned, otherPinned.get());
-
-            pool.releaseCurrentThread();
-        }
-    }
-
     // ----------------------------------------------------------------------
     // Teardown robustness: a delegate close() can throw an Error (e.g. an
     // -ea AssertionError), not just a RuntimeException. The pool's best-effort
@@ -578,9 +423,12 @@ public void testCloseSurvivesDelegateCloseError() throws Exception {
      * while the test does not leak native memory.
      */
     private static void installFailingCloseDelegate(PooledSender ps, AtomicInteger closeAttempts) throws Exception {
-        Field f = PooledSender.class.getDeclaredField("delegate");
+        Field slotF = PooledSender.class.getDeclaredField("slot");
+        slotF.setAccessible(true);
+        Object slot = slotF.get(ps);
+        Field f = slot.getClass().getDeclaredField("delegate");
         f.setAccessible(true);
-        Sender real = (Sender) f.get(ps);
+        Sender real = (Sender) f.get(slot);
         Sender failing = (Sender) Proxy.newProxyInstance(
                 Sender.class.getClassLoader(),
                 new Class[]{Sender.class},
@@ -601,6 +449,6 @@ private static void installFailingCloseDelegate(PooledSender ps, AtomicInteger c
                     }
                     return method.invoke(real, args);
                 });
-        f.set(ps, failing);
+        f.set(slot, failing);
     }
 }
diff --git a/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java b/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java
index 69453c77..51003bfc 100644
--- a/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java
+++ b/core/src/test/java/io/questdb/client/test/impl/WsSenderConfigHonoredTest.java
@@ -77,6 +77,7 @@ public void testEveryIngressKeyIsHonored() {
         assertHonored("connection_listener_inbox_capacity=64", "connection_listener_inbox_capacity", 64);
         assertHonored("token=ey.abc", "token", "ey.abc");
         assertHonored("auth_timeout_ms=4321", "auth_timeout_ms", 4321L);
+        assertHonored("connect_timeout=7000", "connect_timeout", 7000);
 
         // username/password together (both-or-neither), and the user/pass aliases.
         Map<String, Object> creds = snapshot("ws::addr=h:9000;username=alice;password=secret;");
diff --git a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java
index 25b138bd..8d4ca755 100644
--- a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java
+++ b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketHandshakeOverflowTest.java
@@ -81,7 +81,8 @@ public void testHandshakeWrapOverflowWithNonEmptyBufferShouldNotLoopForever() th
                 CountDownLatch done = new CountDownLatch(1);
                 t = new Thread(() -> {
                     try {
-                        socket.startTlsSession("test.host");
+                        socket.startTlsSession("test.host", op -> {
+                        });
                     } catch (Throwable ignored) {
                         // Expected: a healthy handshake loop should fail loudly here,
                         // not spin forever. Any exception (AssertionError, SSLException,
diff --git a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java
index 506ce783..af5de346 100644
--- a/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java
+++ b/core/src/test/java/io/questdb/client/test/network/JavaTlsClientSocketTest.java
@@ -25,9 +25,11 @@
 package io.questdb.client.test.network;
 
 import io.questdb.client.ClientTlsConfiguration;
+import io.questdb.client.network.IOOperation;
 import io.questdb.client.network.JavaTlsClientSocket;
 import io.questdb.client.network.NetworkFacade;
 import io.questdb.client.network.NetworkFacadeImpl;
+import io.questdb.client.network.SocketReadinessWaiter;
 import io.questdb.client.std.MemoryTag;
 import io.questdb.client.std.Unsafe;
 import io.questdb.client.test.tools.TestUtils;
@@ -40,9 +42,11 @@
 import javax.net.ssl.SSLParameters;
 import javax.net.ssl.SSLSession;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.BiFunction;
 
 import static org.junit.Assert.assertEquals;
@@ -190,6 +194,89 @@ public void testRecvProcessesBufferedRecordAfterEmptyOkUnwrap() throws Exception
         }
     }
 
+    /**
+     * Regression test for the TLS handshake busy-spin / unbounded handshake.
+     * On a non-blocking socket, a peer that completes TCP but stalls before
+     * sending its half of the handshake leaves the engine in NEED_UNWRAP with
+     * the socket returning "would block" (recv == 0). The handshake must hand
+     * control to the readiness waiter -- which in production parks on the event
+     * loop bounded by the connect deadline -- instead of re-reading in a tight
+     * loop. Here the waiter stands in for that deadline: it records the wait
+     * and then throws, exactly as the bounded ioWait() does once the budget is
+     * spent. The method-level timeout fails the test if the handshake ever
+     * busy-spins past the waiter (i.e. if the deadline-aware wait is removed).
+     */
+    @Test(timeout = 30_000)
+    public void testHandshakeWaitsForReadabilityInsteadOfBusySpinning() throws Exception {
+        TestUtils.assertMemoryLeak(() -> {
+            try (JavaTlsClientSocket socket = newSocket()) {
+                invoke(socket, "prepareInternalBuffers");
+                setField(socket, "sslEngine", new StallingUnwrapSslEngine());
+
+                Method runHandshake = JavaTlsClientSocket.class.getDeclaredMethod(
+                        "runHandshake", SocketReadinessWaiter.class);
+                runHandshake.setAccessible(true);
+
+                AtomicInteger readWaits = new AtomicInteger();
+                AtomicInteger writeWaits = new AtomicInteger();
+                SocketReadinessWaiter waiter = op -> {
+                    if (op == IOOperation.READ) {
+                        readWaits.incrementAndGet();
+                    } else {
+                        writeWaits.incrementAndGet();
+                    }
+                    // Stand in for the connect deadline firing inside ioWait().
+                    throw new DeadlineReached();
+                };
+
+                try {
+                    runHandshake.invoke(socket, waiter);
+                    Assert.fail("runHandshake must not complete the handshake against a stalled peer");
+                } catch (InvocationTargetException e) {
+                    Assert.assertTrue(
+                            "handshake must surface the readiness waiter's deadline, was: " + e.getCause(),
+                            e.getCause() instanceof DeadlineReached);
+                }
+
+                Assert.assertEquals(
+                        "handshake must wait for the socket to become readable instead of busy-spinning",
+                        1, readWaits.get());
+                Assert.assertEquals(
+                        "a NEED_UNWRAP stall must not trigger a write wait", 0, writeWaits.get());
+            }
+        });
+    }
+
+    /**
+     * Happy-path guard for the refactor: when the engine makes progress (a
+     * complete record is available, unwrap returns OK and the handshake
+     * finishes), runHandshake must complete without ever parking on socket
+     * readiness. The would-block waits only fire on recv/send == 0, so a
+     * responsive peer never triggers them.
+     */
+    @Test(timeout = 30_000)
+    public void testHandshakeCompletesWithoutWaitingWhenEngineMakesProgress() throws Exception {
+        TestUtils.assertMemoryLeak(() -> {
+            try (JavaTlsClientSocket socket = newSocket()) {
+                invoke(socket, "prepareInternalBuffers");
+                setField(socket, "sslEngine", new ProgressingUnwrapSslEngine());
+
+                Method runHandshake = JavaTlsClientSocket.class.getDeclaredMethod(
+                        "runHandshake", SocketReadinessWaiter.class);
+                runHandshake.setAccessible(true);
+
+                AtomicInteger waits = new AtomicInteger();
+                SocketReadinessWaiter waiter = op -> waits.incrementAndGet();
+
+                runHandshake.invoke(socket, waiter); // must return normally (handshake finished)
+
+                Assert.assertEquals(
+                        "a handshake that makes progress must not wait on socket readiness",
+                        0, waits.get());
+            }
+        });
+    }
+
     private static void assertBytes(String expected, long ptr, int len) {
         Assert.assertEquals(expected.length(), len);
         for (int i = 0; i < len; i++) {
@@ -333,6 +420,48 @@ public SSLEngineResult unwrap(ByteBuffer src, ByteBuffer[] dsts, int offset, int
         }
     }
 
+    private static final class DeadlineReached extends RuntimeException {
+    }
+
+    private static final class ProgressingUnwrapSslEngine extends StubSslEngine {
+        @Override
+        public SSLEngineResult.HandshakeStatus getHandshakeStatus() {
+            return SSLEngineResult.HandshakeStatus.NEED_UNWRAP;
+        }
+
+        @Override
+        public SSLEngineResult unwrap(ByteBuffer src, ByteBuffer[] dsts, int offset, int length) {
+            // A complete record was available: consume it and finish the
+            // handshake, so the loop exits without waiting.
+            return new SSLEngineResult(
+                    SSLEngineResult.Status.OK,
+                    SSLEngineResult.HandshakeStatus.FINISHED,
+                    0,
+                    0
+            );
+        }
+    }
+
+    private static final class StallingUnwrapSslEngine extends StubSslEngine {
+        @Override
+        public SSLEngineResult.HandshakeStatus getHandshakeStatus() {
+            return SSLEngineResult.HandshakeStatus.NEED_UNWRAP;
+        }
+
+        @Override
+        public SSLEngineResult unwrap(ByteBuffer src, ByteBuffer[] dsts, int offset, int length) {
+            // No complete TLS record buffered yet: ask for more bytes from the
+            // socket. The stalled peer never sends them, so the handshake must
+            // wait on readability rather than spin.
+            return new SSLEngineResult(
+                    SSLEngineResult.Status.BUFFER_UNDERFLOW,
+                    SSLEngineResult.HandshakeStatus.NEED_UNWRAP,
+                    0,
+                    0
+            );
+        }
+    }
+
     private static abstract class StubSslEngine extends SSLEngine {
         @Override
         public void beginHandshake() {
diff --git a/core/src/test/java/io/questdb/client/test/network/NetConnectTimeoutTest.java b/core/src/test/java/io/questdb/client/test/network/NetConnectTimeoutTest.java
new file mode 100644
index 00000000..b5d2c5d0
--- /dev/null
+++ b/core/src/test/java/io/questdb/client/test/network/NetConnectTimeoutTest.java
@@ -0,0 +1,118 @@
+/*+*****************************************************************************
+ *     ___                  _   ____  ____
+ *    / _ \ _   _  ___  ___| |_|  _ \| __ )
+ *   | | | | | | |/ _ \/ __| __| | | |  _ \
+ *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
+ *    \__\_\\__,_|\___||___/\__|____/|____/
+ *
+ *  Copyright (c) 2014-2019 Appsicle
+ *  Copyright (c) 2019-2026 QuestDB
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+package io.questdb.client.test.network;
+
+import io.questdb.client.network.NetworkFacade;
+import io.questdb.client.network.NetworkFacadeImpl;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+
+import java.net.InetSocketAddress;
+import java.net.ServerSocket;
+
+/**
+ * Exercises the native non-blocking connect-with-timeout primitive
+ * ({@link NetworkFacade#connectAddrInfoTimeout}).
+ */
+public class NetConnectTimeoutTest {
+
+    private static final NetworkFacade NF = NetworkFacadeImpl.INSTANCE;
+
+    @Test
+    public void testConnectRefusedReturnsErrorNotTimeout() throws Exception {
+        // Bind then immediately close to obtain a port with no listener; a
+        // connect to it is refused (RST) rather than timed out.
+        int port;
+        try (ServerSocket ss = new ServerSocket()) {
+            ss.bind(new InetSocketAddress("127.0.0.1", 0));
+            port = ss.getLocalPort();
+        }
+
+        long addrInfo = NF.getAddrInfo("127.0.0.1", port);
+        Assert.assertNotEquals(-1, addrInfo);
+        int fd = NF.socketTcp(true);
+        try {
+            int rc = NF.connectAddrInfoTimeout(fd, addrInfo, 5_000);
+            Assert.assertNotEquals("refused connect must not report success", 0, rc);
+            Assert.assertNotEquals("refused connect must not be reported as a timeout",
+                    NetworkFacade.CONNECT_TIMEOUT, rc);
+        } finally {
+            NF.freeAddrInfo(addrInfo);
+            NF.close(fd);
+        }
+    }
+
+    @Test
+    public void testConnectSucceedsWithinTimeout() throws Exception {
+        try (ServerSocket ss = new ServerSocket()) {
+            ss.bind(new InetSocketAddress("127.0.0.1", 0));
+            int port = ss.getLocalPort();
+
+            long addrInfo = NF.getAddrInfo("127.0.0.1", port);
+            Assert.assertNotEquals(-1, addrInfo);
+            int fd = NF.socketTcp(true);
+            try {
+                int rc = NF.connectAddrInfoTimeout(fd, addrInfo, 5_000);
+                Assert.assertEquals("loopback connect should succeed", 0, rc);
+            } finally {
+                NF.freeAddrInfo(addrInfo);
+                NF.close(fd);
+            }
+        }
+    }
+
+    @Test
+    public void testConnectToBlackholeTimesOut() {
+        // 192.0.2.0/24 is TEST-NET-1 (RFC 5737); packets are silently dropped on
+        // a normal network, so the SYN goes unanswered and the timeout fires
+        // instead of the (much longer) OS connect timeout.
+        long addrInfo = NF.getAddrInfo("192.0.2.1", 9009);
+        Assert.assertNotEquals(-1, addrInfo);
+        int fd = NF.socketTcp(true);
+        try {
+            long start = System.nanoTime();
+            int rc = NF.connectAddrInfoTimeout(fd, addrInfo, 500);
+            long elapsedMs = (System.nanoTime() - start) / 1_000_000L;
+
+            // Whatever the outcome, the key guarantee is that we never blocked
+            // on the (multi-minute) OS connect timeout.
+            Assert.assertTrue("connect must return near the budget, was " + elapsedMs + "ms", elapsedMs < 5_000);
+
+            // The deterministic outcome depends on the runner's routing for
+            // TEST-NET-1: a dropped SYN yields a real timeout (the path under
+            // test), while a runner with no route to 192.0.2.0/24 fails fast
+            // with ENETUNREACH/EHOSTUNREACH (rc == -1) and a rare appliance may
+            // even accept it (rc == 0). Only the timeout case is assertable; the
+            // others can't exercise the timeout, so skip rather than flake.
+            Assume.assumeTrue("no route to blackhole on this runner (rc=" + rc + ")",
+                    rc == NetworkFacade.CONNECT_TIMEOUT);
+            Assert.assertEquals("blackhole connect should time out", NetworkFacade.CONNECT_TIMEOUT, rc);
+        } finally {
+            NF.freeAddrInfo(addrInfo);
+            NF.close(fd);
+        }
+    }
+}
diff --git a/design/qwp-client-ergonomics-issues.md b/design/qwp-client-ergonomics-issues.md
new file mode 100644
index 00000000..fb5ed951
--- /dev/null
+++ b/design/qwp-client-ergonomics-issues.md
@@ -0,0 +1,214 @@
+# QWP client startup/failover — ergonomics issues
+
+Tracked sharp edges surfaced while reviewing
+[`qwp-client-startup-failover-behavior.md`](./qwp-client-startup-failover-behavior.md).
+Each entry is grounded in source. "Candidate" = likely defect worth changing;
+"Intended (revisit)" = deliberate contract that may still deserve reconsideration.
+
+Severity legend: **P1** user-visible footgun likely to cause an outage or hang ·
+**P2** confusing/surprising but recoverable · **P3** polish.
+
+---
+
+## ERG-1 — `initial_connect_retry` is implicitly promoted to SYNC (P1, Candidate)
+
+**Symptom.** A user sets `reconnect_max_duration_millis` for resilience and,
+without setting `initial_connect_retry`, their application now **blocks** on
+startup for the entire budget when the server is down.
+
+**Source.** `Sender.java` (~line 1451): if `initialConnectMode == null` and any
+`reconnect_*` knob is set, the mode resolves to `SYNC`.
+
+**Why it's bad.** Mode is inferred from an unrelated knob. The "make me more
+resilient" action produces a "hang my boot" side effect. The code comment itself
+acknowledges the knob "reads as a generic retry budget but the underlying path
+only governs reconnects."
+
+**Proposed fix.**
+- Make initial-connect mode an explicit, independent choice; stop inferring it.
+- If inference must stay for back-compat, log a `WARN` when a `reconnect_*` knob
+  flips startup to `SYNC`, naming the knob and the resulting blocking behavior.
+
+**Acceptance.** With only `reconnect_max_duration_millis` set and the server
+down, `build()` either returns promptly (OFF default) or logs an explicit
+warning before blocking. A test asserts the warning / non-blocking default.
+
+---
+
+## ERG-2 — `reconnect_max_duration_millis`: misleading name + inconsistent `0` (P2, Candidate)
+
+**Symptom.** Two confusions:
+1. The name implies "reconnect only" but it also bounds the **initial** connect
+   in SYNC/ASYNC modes.
+2. `reconnect_max_duration_millis=0` means **give up immediately**, whereas
+   `idle_timeout_ms=0` and `max_lifetime_ms=0` in the same config surface mean
+   **infinite**. There is no infinite-retry mode at all.
+
+**Source.** `CursorWebSocketSendLoop.java:827` — `deadlineNanos = start + dur*1e6`,
+loop `while (now < deadline)`; `0` ⇒ zero iterations. Contrast
+`QuestDBBuilder.idleTimeoutMillis/maxLifetimeMillis` (`0 ⇒ Long.MAX_VALUE`).
+
+**Why it's bad.** Same `0` token, opposite semantics depending on the knob;
+tolerating a long maintenance window forces magic numbers like `86400000`.
+
+**Proposed fix.**
+- Adopt one `0` convention. Recommended: `0 ⇒ infinite`, matching the pool
+  knobs, which also gives a real infinite-retry mode.
+- Consider an alias `connect_retry_budget_ms` that reflects it covers initial +
+  reconnect; keep the old key as a deprecated alias.
+
+**Acceptance.** Documented, consistent `0` semantics across the config surface;
+test covering `=0` behavior and (if added) infinite mode.
+
+---
+
+## ERG-3 — `failover` does not cover startup; queries have no async connect (P2, Candidate)
+
+**Symptom.** Users expect `failover=on` to make startup resilient. It does not —
+it only governs reconnect+replay during `execute()` after a connection exists.
+Query initial connect is always synchronous and blocking, with no async/lazy
+mode (unlike ingest).
+
+**Source.** `QwpQueryClient.connect()` is synchronous; `failover_*` defaults at
+`QwpQueryClient.java:139-141`; spec "Query client behavior".
+
+**Why it's bad.** Expectation mismatch on a safety-critical knob; asymmetry
+between ingest (3 modes) and query (1 mode) forces two mental models.
+
+**Proposed fix.**
+- Document `failover`'s scope prominently (done in the rewrite).
+- Evaluate an async/lazy initial-connect mode for the query client to match
+  ingest, or a unified `initial_connect` setting shared by both sides.
+
+**Acceptance.** Either query supports a documented non-blocking initial-connect
+mode, or the docs make the scope unambiguous and the limitation is explicitly
+accepted.
+
+---
+
+## ERG-4 — No first-class write-only facade (P2, Candidate)
+
+**Symptom.** A write-only user of `QuestDB` must still supply a query config they
+never use **and** remember `query_pool_min=0` to avoid a build-time query
+connection.
+
+**Source.** `QuestDBBuilder.build()` hard-requires both `ingestConfig` and
+`queryConfig`; no write-only path.
+
+**Why it's bad.** Leaky and error-prone; the doc's own recommendation is "prefer
+direct `Sender`," which is an admission the facade is awkward here.
+
+**Proposed fix.**
+- Add `QuestDB.builder().ingestConfig(...).writeOnly()` (or a `writeOnly()`
+  shortcut) that skips the query pool entirely.
+- Symmetric `readOnly()` is a natural follow-up.
+
+**Acceptance.** A write-only facade builds with no query config and creates no
+query pool; documented and tested.
+
+---
+
+## ERG-5 — A single endpoint's `401`/`403` aborts the whole walk (P2, Intended, revisit)
+
+**Symptom.** One misconfigured endpoint returning `401`/`403` blocks startup
+even when other listed endpoints would accept the credentials. Applies to both
+ingest and query walks, including at startup.
+
+**Source.** Ingest/query endpoint matrices; `CursorWebSocketSendLoop` treats
+`QwpAuthFailedException` as terminal across all endpoints.
+
+**Why it's debatable.** "Fail fast on bad credentials" is reasonable, but it is
+asymmetric with how every *transport* failure is tolerated, and surprising
+during rolling credential rotation or a single bad node.
+
+**Proposed fix (revisit).**
+- Keep terminal-on-auth as the contract, but make it a deliberately documented
+  contract (done in the rewrite).
+- Consider an opt-in (e.g. `auth_failure=continue`) that demotes auth failure to
+  a per-endpoint skip for heterogeneous fleets.
+
+**Acceptance.** Behavior documented as intentional; decision recorded on whether
+an opt-in continue mode is warranted.
+
+---
+
+## ERG-6 — Facade can't reach error handler / connection listener / serverInfoTimeout (P2, Candidate)
+
+**Symptom.** Through the `QuestDB` facade you cannot install a
+`SenderErrorHandler` or `SenderConnectionListener` (ingest), nor set
+`serverInfoTimeoutMs` (query). The latter has no config key at all.
+
+**Source.** `LineSenderBuilder.errorHandler()/connectionListener()` exist only on
+the direct sender builder; `serverInfoTimeoutMs` is a `QwpQueryClient` builder
+field with no `ConfigSchema` key (`ConfigSchema.java` EGRESS section).
+
+**Why it's bad.** The facade is the recommended high-level entry point, yet it
+cannot configure observability hooks or a documented query timeout.
+
+**Proposed fix.**
+- Expose ingest error handler / connection listener on `QuestDBBuilder`
+  (per-pool or shared).
+- Add a `server_info_timeout_ms` config key so it is reachable from any conn
+  string (and therefore the facade).
+
+**Acceptance.** Both hooks and the timeout are reachable from the facade;
+documented in the knob-availability matrix.
+
+---
+
+## ERG-7 — Simplest API has the worst error visibility (P1, Candidate)
+
+**Symptom.** `Sender.fromConfig(cfg)` with `initial_connect_retry=async` swallows
+terminal startup failures — they surface only on a later producer call or at
+`close()`. The visible path requires switching to `Sender.builder(...)` and
+installing a handler.
+
+**Source.** Async terminal `SenderError` delivered to a configured
+`SenderErrorHandler`; "even without a handler they are surfaced by later producer
+calls or close-time safety net behavior."
+
+**Why it's bad.** The nicest ergonomics and the worst observability are
+inversely correlated for the single most important question: "did my writer ever
+connect?"
+
+**Proposed fix.**
+- Default to a sane error sink (e.g. `WARN`/`ERROR` log on terminal async
+  failure) even without a registered handler.
+- Provide a lightweight status accessor (e.g. `wasEverConnected()` /
+  `lastError()`) on the public `Sender` surface for poll-based checks.
+
+**Acceptance.** A terminal async failure is observable without installing a
+custom handler; documented and tested.
+
+---
+
+## ERG-8 — No client-side TCP connect timeout (P2, Intended, revisit)
+
+**Symptom.** A black-holed host in the `addr` list blocks the endpoint walk
+until the OS connect timeout, undercutting the resilience value of listing
+multiple endpoints.
+
+**Source.** `auth_timeout_ms` bounds only the post-connect upgrade/auth phase;
+no separate application-level TCP connect timeout in the transport.
+
+**Why it's debatable.** It is a transport limitation, but it directly defeats the
+multi-endpoint failover use case at startup.
+
+**Proposed fix (revisit).**
+- Add a client-side connect timeout so the walk can abandon black-holed hosts
+  and proceed to the next endpoint.
+
+**Acceptance.** A black-holed first endpoint no longer blocks past a configurable
+bound before the walk advances; documented and tested.
+
+---
+
+## Suggested sequencing
+
+1. **ERG-1** and **ERG-7** (both P1) — they cause hangs and silent failures.
+2. **ERG-2**, **ERG-4**, **ERG-6** (P2 Candidate) — naming/consistency and
+   facade completeness.
+3. **ERG-3**, **ERG-8** (P2, need design) — async query connect and connect
+   timeout.
+4. **ERG-5** — confirm/record the auth-terminal contract; opt-in continue mode
+   only if a concrete fleet use case justifies it.
diff --git a/design/qwp-client-startup-failover-behavior.md b/design/qwp-client-startup-failover-behavior.md
new file mode 100644
index 00000000..01d53233
--- /dev/null
+++ b/design/qwp-client-startup-failover-behavior.md
@@ -0,0 +1,443 @@
+# QWP client startup, pooling, failover, and store-and-forward
+
+This document describes how the Java QWP client behaves at **startup**, under
+**connection loss**, and with **store-and-forward (SF)** durability. It is
+written for client *users* first: the [Quick start](#quick-start) and
+[Mental model](#mental-model) sections are enough to configure a correct client.
+The [Reference](#reference) section is the exhaustive behavior matrix. The
+[Implementation appendix](#implementation-appendix) documents internals for
+maintainers.
+
+It is descriptive — it records what the code does today, including current
+sharp edges. Where a behavior is a likely footgun, it is marked
+**⚠ Sharp edge** and tracked in
+[`qwp-client-ergonomics-issues.md`](./qwp-client-ergonomics-issues.md).
+
+---
+
+## Quick start
+
+### Write-only client that tolerates the server being down at startup
+
+Use the direct `Sender` API (not the `QuestDB` facade — see
+[sharp edge #4](#sharp-edges)).
+
+```java
+String cfg = "ws::addr=db-a:9000,db-b:9000;"
+        + "sf_dir=/var/lib/my-app/questdb-sf;"   // opt into disk durability
+        + "sender_id=writer-1;"                  // unique per process per sf_dir
+        + "initial_connect_retry=async;"         // non-blocking startup
+        + "reconnect_max_duration_millis=86400000;" // outage budget (24h)
+        + "sf_max_total_bytes=100g;";
+
+// For production, prefer the builder so you can install an error handler:
+try (Sender sender = Sender.builder(cfg)
+        .errorHandler(myErrorHandler)            // see "Error visibility" below
+        .connectionListener(myConnectionListener)
+        .build()) {
+    sender.table("telemetry").longColumn("v", 42).atNow();
+    sender.flush(); // persists to SF storage; wire ACK is asynchronous
+}
+```
+
+Why each line matters:
+
+- `sf_dir` is the **only** SF enable switch — there is no boolean flag.
+- `initial_connect_retry=async` is what makes `build()` return without a live
+  socket. Without it, startup is blocking (see [Mental model](#mental-model)).
+- `reconnect_max_duration_millis` is the outage budget for **both** the initial
+  connect and later reconnects. If it expires, the sender latches terminal and
+  stops; data already in `sf_dir` survives for a future sender on the same slot.
+
+**Error visibility ⚠:** the simplest path (`Sender.fromConfig(...)` + async)
+surfaces terminal async failures only *later*, through a producer call or at
+`close()`. For production, use `Sender.builder(...)` and install a
+`SenderErrorHandler` / `SenderConnectionListener`
+([sharp edge #7](#sharp-edges)).
+
+### Read client that only reads from replicas
+
+```java
+String cfg = "ws::addr=replica-a:9000,replica-b:9000,replica-c:9000;"
+        + "target=replica;"   // without this, the client may bind a primary
+        + "failover=on;";      // default; affects execute()-time recovery only
+
+try (QuestDB db = QuestDB.connect(cfg)) {
+    try (Query q = db.borrowQuery()) {
+        q.sql("select * from telemetry limit 10").handler(myBatchHandler).submit().await();
+    }
+}
+```
+
+Why each line matters:
+
+- `target=replica` is required to avoid binding a primary/standalone server.
+  The default `target=any` will accept any role.
+- `failover=on` is the default. It does **not** affect startup; it only governs
+  reconnect+replay after a query connection that was already established later
+  fails during `execute()`.
+
+---
+
+## Mental model
+
+### Three independent "connect" models live in one client
+
+A `QuestDB` facade owns an **ingest pool** and a **query pool**. They do not
+share a startup model. You must hold all three in mind:
+
+| Concern | Controlled by | Startup is... |
+| --- | --- | --- |
+| Ingest sender initial connect | `initial_connect_retry` = `off` / `sync` / `async` | one-shot / blocking-retry / background-retry |
+| Query client initial connect | (no mode; always synchronous) | always blocking |
+| Facade prewarm (how many of each connect at `build()`) | `sender_pool_min`, `query_pool_min` | eager if `min>0`, lazy if `min=0` |
+
+`failover=on` (query default) is **not** a startup setting — it only affects
+query execution after a connection exists. This naming trips people up
+([sharp edge #3](#sharp-edges)).
+
+### Ingest initial-connect modes
+
+| `initial_connect_retry` | Mode | `build()` behavior on a down server |
+| --- | --- | --- |
+| `off` / `false` | `OFF` | one attempt on caller thread; throws immediately |
+| `on` / `true` / `sync` | `SYNC` | retry loop on caller thread, bounded by `reconnect_max_duration_millis` (blocks) |
+| `async` | `ASYNC` | returns immediately; I/O thread retries in background |
+
+**Default resolution ⚠:** if you don't set `initial_connect_retry` explicitly but
+you *do* set any `reconnect_*` knob, the mode becomes `SYNC` — so a "resilience"
+knob silently turns startup into a multi-minute **blocking** retry. If no
+`reconnect_*` knob is set either, the mode is `OFF`. Always set
+`initial_connect_retry` explicitly to avoid this ([sharp edge #1](#sharp-edges)).
+
+### Facade prewarm
+
+`QuestDBBuilder.build()` validates both configs (without connecting), then
+eagerly creates `min` connections per pool. Consequences:
+
+| Configuration | Build-time network behavior |
+| --- | --- |
+| defaults (`min=1` both) | creates one sender + one query client; build fails if either cannot connect — unless ingest uses `initial_connect_retry=async` |
+| `sender_pool_min=0` | no sender at build; first `borrowSender()` creates it (then follows the ingest initial-connect mode) |
+| `query_pool_min=0` | no query client at build; first query `submit()` creates it |
+| both mins `0` | config-only validation at build; all network work is lazy |
+
+After prewarm, both pools grow lazily up to `max` on demand, and shrink back to
+`min` when idle. Growth uses the same real connect path as prewarm. At `max`,
+callers block up to `acquire_timeout_ms` then throw.
+
+---
+
+## Defaults (single source of truth)
+
+### Pool (facade only)
+
+| Key / builder | Default |
+| --- | ---: |
+| `sender_pool_min` | `1` |
+| `sender_pool_max` | `4` |
+| `query_pool_min` | `1` |
+| `query_pool_max` | `4` |
+| `acquire_timeout_ms` | `5000` |
+| `idle_timeout_ms` | `60000` (`0` ⇒ infinite) |
+| `max_lifetime_ms` | `1800000` (`0` ⇒ infinite) |
+| `housekeeper_interval_ms` | `5000` |
+
+### Ingest sender (SF + reconnect)
+
+| Key | Default |
+| --- | ---: |
+| `sender_id` | `default` |
+| `sf_max_bytes` (segment size) | `4 MiB` |
+| `sf_max_total_bytes` (SF mode) | `10 GiB` |
+| `sf_durability` | `MEMORY` |
+| `sf_append_deadline_millis` | `30000` |
+| `reconnect_max_duration_millis` | `300000` (`0` ⇒ **give up immediately**, not infinite ⚠) |
+| `reconnect_initial_backoff_millis` | `100` |
+| `reconnect_max_backoff_millis` | `5000` |
+| `close_flush_timeout_millis` | `60000` |
+| `auth_timeout_ms` | `15000` |
+
+### Query client
+
+| Key | Default |
+| --- | ---: |
+| `target` | `any` |
+| `failover` | `on` |
+| `failover_max_attempts` | `8` (incl. original) |
+| `failover_max_duration_ms` | `30000` (`0` disables the duration cap) |
+| `failover_backoff_initial_ms` | `50` |
+| `failover_backoff_max_ms` | `1000` |
+| `auth_timeout_ms` | `15000` |
+| `serverInfoTimeoutMs` | `5000` (builder API only — no config key ⚠) |
+
+Note the inconsistent `0` convention: `idle_timeout_ms=0`/`max_lifetime_ms=0`
+mean *infinite*, but `reconnect_max_duration_millis=0` means *give up now*
+([sharp edge #2](#sharp-edges)).
+
+---
+
+## Knob availability by surface
+
+Three configuration surfaces exist. Not every knob is reachable from every
+surface — this matrix shows where each lives.
+
+- **Conn string**: a `ws`/`wss` config string. Works for `Sender.fromConfig`,
+  `QwpQueryClient.fromConfig`, and `QuestDB.connect(...)`.
+- **Sender builder**: `Sender.builder(...)` (`LineSenderBuilder`) — direct
+  ingest only.
+- **Facade builder**: `QuestDB.builder()` (`QuestDBBuilder`) — pool knobs only;
+  query/ingest behavior must come from the conn string.
+
+| Knob | Conn string | Sender builder | Facade builder |
+| --- | :---: | :---: | :---: |
+| `addr` | ✅ | ✅ `address()/port()` | via conn string |
+| `username`/`password`/`token` | ✅ | ✅ | via conn string |
+| `tls_verify`/`tls_roots` | ✅ | ✅ | via conn string |
+| `auth_timeout_ms` | ✅ | ✅ | via conn string |
+| `initial_connect_retry` | ✅ | ✅ `initialConnectMode()` | via conn string |
+| `reconnect_*` | ✅ | ✅ | via conn string |
+| `sf_dir`/`sender_id`/`sf_*` | ✅ | ✅ | via conn string |
+| `request_durable_ack` | ✅ | ✅ | via conn string |
+| `close_flush_timeout_millis` | ✅ | ✅ | via conn string |
+| `SenderErrorHandler` | ❌ | ✅ `errorHandler()` | ❌ (not reachable) |
+| `SenderConnectionListener` | ❌ | ✅ `connectionListener()` | ❌ (not reachable) |
+| `target` | ✅ | n/a | via conn string |
+| `failover`/`failover_*` | ✅ | n/a | via conn string |
+| `serverInfoTimeoutMs` | ❌ | n/a | ❌ (QwpQueryClient builder only) |
+| `sender_pool_*`/`query_pool_*` | ✅ | n/a | ✅ |
+| `acquire_timeout_ms`/`idle_timeout_ms`/`max_lifetime_ms` | ✅ | n/a | ✅ |
+
+⚠ Gaps worth noting: the ingest **error handler / connection listener** cannot
+be installed through the facade at all, and **`serverInfoTimeoutMs`** has no
+config key, so a facade query client cannot tune it
+([sharp edge #6](#sharp-edges)).
+
+---
+
+## Known sharp edges
+
+Each item links to a tracked issue in
+[`qwp-client-ergonomics-issues.md`](./qwp-client-ergonomics-issues.md).
+"Intended" means it is a deliberate contract; "Candidate" means it is a likely
+ergonomic defect worth changing.
+
+| # | Sharp edge | Status |
+| --- | --- | --- |
+| 1 | `initial_connect_retry` is implicitly promoted to `SYNC` when any `reconnect_*` knob is set — a resilience knob silently makes startup block. | Candidate |
+| 2 | `reconnect_max_duration_millis` name implies "reconnect only" but also governs initial connect; `0` means "give up now" while sibling `0`s mean "infinite"; no infinite mode exists. | Candidate |
+| 3 | `failover` sounds like it covers startup but only affects post-connect query `execute()`. Queries have no async/lazy initial connect at all. | Candidate |
+| 4 | No first-class write-only facade: a write-only user must still supply a query config and remember `query_pool_min=0`. | Candidate |
+| 5 | A single endpoint returning `401`/`403` is treated as cluster-wide terminal and aborts the whole endpoint walk, even at startup, even if other endpoints would accept the credentials. | Intended (documented), revisit |
+| 6 | Ingest `errorHandler`/`connectionListener` and query `serverInfoTimeoutMs` are unreachable from the facade. | Candidate |
+| 7 | The simplest API (`fromConfig` + async) has the worst error visibility — terminal async failures surface only on later producer calls or at `close()`. | Candidate |
+| 8 | No client-side TCP connect timeout: a black-holed host in `addr` blocks the endpoint walk until the OS connect timeout. | Intended (transport limitation), revisit |
+
+---
+
+## Reference
+
+### Store-and-forward semantics
+
+`sf_dir=...` enables SF. There is no separate boolean enable flag.
+
+- The sender owns one slot: `<sf_dir>/<sender_id>/`. Default `sender_id` is
+  `default`.
+- Multiple independent senders sharing one `sf_dir` must use distinct
+  `sender_id` values, else the second fails because the slot lock is held.
+- In pooled `QuestDB` usage, `SenderPool` derives per-slot IDs from the base:
+  `<base>-0`, `<base>-1`, … so pooled senders never collide.
+- On restart, the cursor engine opens existing segment files and replays
+  unacknowledged frames; acknowledged/truncated frames are not replayed.
+
+`flush()` semantics (QWP sender):
+
+- Encodes pending rows into the cursor engine.
+- In SF mode, data is persisted to mmap-backed segment files before `flush()`
+  returns.
+- `flush()` does **not** wait for server ACKs unless backpressure requires
+  space. The I/O thread sends frames and trims ACKed frames asynchronously.
+- `drain(timeoutMillis)` flushes and waits for the server to ACK all currently
+  published frames, up to the timeout.
+- `close()` flushes then waits up to `close_flush_timeout_millis` for ACKs,
+  unless that timeout is `<= 0`.
+
+### Async initial connect (ingest)
+
+With `initial_connect_retry=async`:
+
+- `build()` returns without a live socket; `wasEverConnected()` is `false`.
+- Producer calls and `flush()` can run before the server exists; frames
+  accumulate in the cursor engine (and on disk with `sf_dir`).
+- The I/O thread retries in the background using the same loop used after wire
+  failure.
+- If a server appears before the budget expires, buffered frames are
+  sent/replayed and ACK-driven trimming begins.
+- If the budget expires before any connection, the sender latches a terminal
+  `SenderError` whose message contains `never-connected-budget-exhausted`.
+- If it connected at least once and a later outage exhausts the budget, the
+  message contains `connection-lost-budget-exhausted`.
+- Terminal async errors go to a configured `SenderErrorHandler`; without one
+  they surface on later producer calls or at close-time.
+
+There is no infinite-retry mode. For long maintenance windows, set a large
+`reconnect_max_duration_millis`. On budget exhaustion the current sender stops;
+persisted `sf_dir` data remains for a future sender on the same slot.
+
+### Ingest endpoint walk (`addr=a:9000,b:9000,...`)
+
+| Per-endpoint result | Sender behavior |
+| --- | --- |
+| DNS failure | transport error; try next endpoint |
+| TCP connect failure | transport error; try next endpoint |
+| TLS session/certificate failure | transport error; try next endpoint |
+| HTTP upgrade timeout / non-auth transport error | try next endpoint |
+| `421` with `X-QuestDB-Role: REPLICA` | role reject; try next endpoint |
+| `401` / `403` auth failure | **terminal**; do not try later endpoints ⚠ |
+| durable-ack requested but unsupported | terminal mismatch |
+| successful write upgrade | bind this endpoint |
+| all endpoints fail transport | throw / retry per initial/reconnect mode |
+| all endpoints role-reject as replicas | `QwpRoleMismatchException` |
+
+### Query client initial connect
+
+`QwpQueryClient.connect()` is synchronous. Per endpoint it: opens TCP/TLS,
+performs the WebSocket upgrade to `/read/v1`, reads the initial `SERVER_INFO`
+frame, applies the `target=` role filter, and starts the egress I/O thread on
+the first match. If no endpoint can be used, it throws. There is no async
+initial-connect mode for queries.
+
+`target=` matching:
+
+| Target | Accepted roles |
+| --- | --- |
+| `any` | any role |
+| `primary` | `PRIMARY`, `PRIMARY_CATCHUP`, `STANDALONE` |
+| `replica` | `REPLICA` only |
+
+Query initial-connect endpoint matrix:
+
+| Per-endpoint result | Behavior |
+| --- | --- |
+| DNS / TCP / TLS failure | record transport error; try next endpoint |
+| HTTP upgrade timeout | transport error; try next endpoint |
+| HTTP `401` / `403` | **terminal** `QwpAuthFailedException`; do not try later ⚠ |
+| HTTP `421` + role header | role reject; try next endpoint |
+| upgrade ok but no `SERVER_INFO` before timeout | transport error; try next |
+| `SERVER_INFO` role ≠ `target` | role reject; try next endpoint |
+| endpoint matches target | bind and return success |
+| all endpoints transport-fail | `HttpClientException: all QWP endpoints unreachable ...` |
+| all endpoints role-reject | `QwpRoleMismatchException` |
+
+`auth_timeout_ms` bounds the upgrade/auth phase **after** TCP connect. There is
+no separate client-side TCP connect timeout, so a black-holed connect blocks
+until the OS timeout before the walk advances ⚠.
+
+### Query execution-time failover
+
+With `failover=on`:
+
+- A transport/protocol terminal failure during `execute()` is intercepted; the
+  client reconnects via the host tracker and re-submits.
+- The handler receives `onFailoverReset(...)` before replayed batches.
+- Bounded by `failover_max_attempts` (default `8`, incl. original) **and**
+  `failover_max_duration_ms` (default `30000`; `0` disables the duration cap).
+- Backoff: `failover_backoff_initial_ms=50`, `failover_backoff_max_ms=1000`.
+- Auth failure during failover reconnect is terminal and reported to the handler.
+
+With `failover=off`, a transport failure is reported to the handler with no
+reconnect/replay.
+
+### Scenario matrix
+
+#### Facade startup
+
+| Scenario | Config | Result |
+| --- | --- | --- |
+| Default `connect`, all servers down | default mins | build fails |
+| Default `connect`, first endpoint down, second works | multi-addr | build can succeed; each prewarmed client walks endpoints |
+| Write-only-ish startup while down | `query_pool_min=0` + sender async | build returns |
+| Fully lazy startup | both mins `0` | build returns after validation only |
+| Query first use after lazy startup while down | `query_pool_min=0` | first `submit()` throws |
+| Sender first use after lazy startup while down | `sender_pool_min=0` | first sender creation follows ingest initial mode |
+
+#### Direct sender startup
+
+| Scenario | Config | Result |
+| --- | --- | --- |
+| server down, default mode | no `reconnect_*`, no async | one attempt; build throws |
+| server down, reconnect duration set, no mode | `reconnect_max_duration_millis=...` | **synchronous** retry; build blocks ⚠ |
+| server down, async | `initial_connect_retry=async` | build returns; I/O thread retries |
+| server returns `401`/`403` | any mode | terminal auth failure; no endpoint continuation |
+| server appears before async budget | async + budget | buffered frames sent and ACKed |
+| server appears after async budget | async + exhausted | sender terminal; new sender/restart needed |
+
+#### Read-replica startup (one bad endpoint, another replica works)
+
+| Bad endpoint type | Continue to working replica? | Notes |
+| --- | --- | --- |
+| DNS failure | Yes | transport error |
+| TCP refused/unreachable | Yes | transport error; black-hole waits for OS timeout |
+| TLS handshake failure | Yes | transport error |
+| HTTP upgrade timeout | Yes | after `auth_timeout_ms` |
+| upgrades but no `SERVER_INFO` | Yes | after `serverInfoTimeoutMs` (builder only) |
+| primary/standalone while `target=replica` | Yes | role mismatch |
+| `421` role reject | Yes | try next |
+| `401`/`403` | **No** | auth treated as cluster-wide terminal ⚠ |
+| broken shared TLS/trust store | No | every endpoint fails |
+| all endpoints down | No | `all QWP endpoints unreachable` |
+| reachable but none match `target` | No | `QwpRoleMismatchException` |
+
+---
+
+## Implementation appendix
+
+For maintainers. Primary source areas:
+
+- `io.questdb.client.QuestDB` / `QuestDBBuilder`
+- `io.questdb.client.impl.SenderPool` / `QueryClientPool` / `PoolHousekeeper`
+- `io.questdb.client.Sender.LineSenderBuilder`
+- `io.questdb.client.cutlass.qwp.client.QwpWebSocketSender`
+- `io.questdb.client.cutlass.qwp.client.QwpQueryClient`
+- `io.questdb.client.cutlass.qwp.client.sf.cursor.CursorSendEngine`
+- `io.questdb.client.cutlass.qwp.client.sf.cursor.CursorWebSocketSendLoop`
+- `io.questdb.client.cutlass.qwp.client.QwpHostHealthTracker`
+- `io.questdb.client.impl.ConfigSchema` (the single key registry)
+
+### `QuestDBBuilder.build()` steps
+
+1. Require both ingest and query configs.
+2. Parse + validate both configs without connecting (runs even when mins are
+   `0`; malformed pool/ingest/query/TLS/auth/enum/range values fail here).
+3. Resolve pool keys: explicit builder setters override conn-string keys;
+   conflicting pool values across the two conn strings fail.
+4. Construct `SenderPool` and `QueryClientPool`.
+5. Eagerly create `min` connections per pool.
+6. Start the `PoolHousekeeper`.
+
+### Initial-connect mode resolution (`Sender.java`)
+
+```text
+if initialConnectMode set explicitly -> use it (incl. OFF + tuned budget)
+else if any reconnect_* set          -> SYNC
+else                                 -> OFF
+```
+
+### Pooled SF startup recovery nuance
+
+- Live/prewarmed sender slots recover their own unacked data via their
+  `CursorSendEngine`.
+- Non-live managed slots are scanned by the housekeeper startup recovery path,
+  so `build()` does not block on stranded slots.
+- Recovery of non-live stranded slots is best-effort and bounded: a build/drain
+  failure aborts that scan; data stays durable for a later attempt, but the
+  current process does not retry the aborted scan indefinitely.
+- For immediate background drain of all slots, keep enough `sender_pool_min`
+  slots warm or construct direct senders for the slots that must actively retry.
+
+### Reconnect deadline (`CursorWebSocketSendLoop`)
+
+`deadlineNanos = outageStartNanos + reconnect_max_duration_millis * 1e6`; the
+loop runs `while (running && now < deadline)`. Hence `0` ⇒ no iterations ⇒
+immediate give-up. `QwpAuthFailedException` / `WebSocketUpgradeException` inside
+the loop are terminal across all endpoints.