From 2fc0e1c275d5cf922c7ec3a12b4d66ca7e886173 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Wed, 15 Apr 2026 13:51:04 +0200
Subject: [PATCH 01/64] perf: optimize DataDict copy, validate, and pipeline
 data flow

Major performance improvements to plottr's data pipeline:

- Rewrite copy() with targeted per-key semantics (14.8x faster for meshgrid)
- Add copy(deep=False) API for shallow copies (xarray convention)
- Optimize MeshgridDataDict.validate() monotonicity check (1.5x faster)
- Add _build_structure() to skip redundant validation in internal callers
- Fast-path mask_invalid() to skip clean data (65,000x memory reduction)
- Fix cascading copies in XYSelector (was copying twice via inheritance)
- Pass copy=False in DataGridder to avoid redundant array duplication
- Optimize datasets_are_equal() with shape short-circuit
- Fix bug: copy() now properly deep-copies global mutable metadata

Adds 127 new tests covering copy semantics, pipeline integrity, various
data shapes/dtypes, and edge cases (hypothesis property-based testing).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md                         | 491 ++++++++++++++
 plottr/data/datadict.py                     | 215 +++---
 plottr/node/dim_reducer.py                  |   2 +-
 plottr/node/grid.py                         |   8 +-
 test/pytest/test_datadict_copy_semantics.py | 709 ++++++++++++++++++++
 test/pytest/test_pipeline_coverage.py       | 632 +++++++++++++++++
 6 files changed, 1972 insertions(+), 85 deletions(-)
 create mode 100644 PERFORMANCE_PLAN.md
 create mode 100644 test/pytest/test_datadict_copy_semantics.py
 create mode 100644 test/pytest/test_pipeline_coverage.py

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
new file mode 100644
index 00000000..122fa9e0
--- /dev/null
+++ b/PERFORMANCE_PLAN.md
@@ -0,0 +1,491 @@
+# Plottr Performance Optimization Plan
+
+## Problem Statement
+
+Plottr's pipeline architecture copies data excessively as it flows through nodes. Each node in the
+`linearFlowchart` (DataSelector → DataGridder → XYSelector → PlotNode) defensively copies data
+before modifying it, and many internal methods (`structure()`, `extract()`, `copy()`, `validate()`)
+add further redundant copies. Profiling shows that a typical 4-stage pipeline produces a **~4.8×
+memory amplification factor** — almost 5 copies of the input data exist simultaneously.
+
+For a 100×100×100 MeshgridDataDict (~38 MB), a single `copy()` takes **92 ms** and `validate()`
+takes **43 ms** due to `np.diff`/`np.unique` on full meshgrid axes. In a real pipeline with
+3–4 nodes, this means hundreds of milliseconds of pure overhead per update, which becomes very
+noticeable during interactive parameter changes.
+
+## Profiling Results Summary
+
+| Operation | 10K pts (312 KB) | 100K pts (4.6 MB) | 1M pts (46 MB) | 100³ mesh (38 MB) |
+|---|---|---|---|---|
+| `copy()` | 0.2 ms | 1.1 ms | 7.8 ms | **92 ms** |
+| `structure()` | 0.06 ms | 0.11 ms | 0.10 ms | **44 ms** |
+| `validate()` | 0.02 ms | 0.05 ms | 0.06 ms | **43 ms** |
+| `extract(1 dep)` | 0.38 ms | 1.0 ms | **15 ms** | — |
+| `mask_invalid()` | — | — | — | **202 ms** |
+| Pipeline (4 stages) | **64 ms** (4.8× mem) | — | — | — |
+
+## Root Causes (Ranked by Impact)
+
+### 1. CRITICAL: Cascading Deep Copies in Node Process Methods
+
+**Every node calls `.copy()` on data it receives, even though pyqtgraph's Flowchart passes data by
+reference.** Worse, inherited nodes copy *again* — `XYSelector` inherits from `DimensionReducer`,
+so data is copied twice (once at each level's `process()`).
+
+Evidence:
+- `DataGridder.process()` — `data = dataout.copy()` (grid.py:473)
+- `DimensionReducer.process()` — `data = dataout.copy()` (dim_reducer.py:682)
+- `XYSelector.process()` — `data = dataout.copy()` (dim_reducer.py:901) ← **second copy in chain**
+- `ScaleUnits.process()` — `data = dataIn.copy()` (scaleunits.py:126)
+- `SubtractAverage.process()` — `data = dataIn.copy()` (correct_offset.py:63)
+- `Fitter.process()` — `dataOut = dataIn.copy()` (fitter.py:606)
+
+**Impact**: In a 4-node pipeline, data is copied 3–4 times. For 38 MB meshgrid data, that's
+~150 MB of unnecessary allocations and ~370 ms of copy time.
+
+### 2. HIGH: MeshgridDataDict.validate() Is Computationally Expensive
+
+`MeshgridDataDict.validate()` (datadict.py:1063-1145) computes `np.diff()` + `np.unique()` +
+`np.sign()` on every axis array for every dependent, verifying monotonicity. For a 100×100×100
+dataset with 2 deps and 3 axes, that's 6 full-array `np.diff` computations on 1M-element arrays.
+
+This takes **43 ms** per call and is called:
+- Once per `structure()` call
+- Once per `copy()` call (via `structure()`)
+- Once per `validate()` directly
+- Multiple times in `datadict_to_meshgrid()`, `meshgrid_to_datadict()`, etc.
+
+Across a pipeline, validate() may be called **6–10 times** for the same data.
+
+### 3. HIGH: structure() Uses deepcopy Unnecessarily
+
+`DataDictBase.structure()` (datadict.py:399-451) does `cp.deepcopy(v2)` on each field's metadata
+dict (line 434), even though:
+- Values are already emptied (`v2['values'] = []`)
+- Metadata is typically just strings and lists of strings
+- A shallow copy would suffice in 99% of cases
+
+### 4. MEDIUM: extract() Uses Deep Copy by Default
+
+`DataDictBase.extract()` (datadict.py:315-362) calls `cp.deepcopy(self[d])` for each selected
+field (line 347), including the numpy array values. `deepcopy` on numpy arrays is significantly
+slower than `array.copy()` because it goes through Python's generic copy protocol rather than
+numpy's optimized memcpy path.
+
+### 5. MEDIUM: mask_invalid() Creates Full Masked Copy
+
+`mask_invalid()` (datadict.py:724-738) uses `np.ma.masked_where(..., copy=True)`, creating a
+completely new masked array for every data field. Many datasets have no invalid entries, making
+this pure overhead.
+
+### 6. LOW: shapes() Wraps Arrays Unnecessarily
+
+`DataDictBase.shapes()` (datadict.py:553-565) calls `np.array(self.data_vals(k)).shape` — the
+`np.array()` wrapper is unnecessary since `data_vals()` already returns an ndarray after
+validation.
+
+---
+
+## Risk Analysis & Mitigations
+
+This section documents the edge cases discovered during investigation and how the proposed
+improvements must account for them.
+
+### Risk 1: Nested Dict Mutations Bypass Dirty Flags
+
+`DataDictBase` is a `dict` of dicts. User code commonly mutates inner dicts directly:
+`dd['x']['values'] = new_array`. This does NOT trigger `DataDictBase.__setitem__` because the
+outer dict is not being set — only the inner dict is being mutated.
+
+**Mitigation**: Do NOT use a general validation cache based on `__setitem__`. Instead, use
+private helper methods (`_build_structure()`) that skip validation only when called from
+code paths that have *just* validated or just constructed fresh data. The public `validate()` API
+always runs. This is safe because the hot-path is internal: `copy()` calls `structure()` which
+calls `validate()` — and after copying validated data, re-validating the copy is redundant.
+
+### Risk 2: Monotonicity Check Must Cover the Full Array
+
+The current `MeshgridDataDict.validate()` checks `np.diff(axis_data, axis=axis_num)` on the full
+N-d axis array. Checking only a single 1D slice would miss cases where one slice is monotonic
+but another is flat or reversed.
+
+**Mitigation**: Keep checking the full array, but avoid the expensive `np.unique(np.sign(...))`
+pipeline. Instead, compute min/max of the diff directly:
+```python
+d = np.diff(axis_data, axis=axis_num)
+d_sign = np.sign(d[~np.isnan(d)])  # ignore NaN steps
+if d_sign.size > 0:
+    has_zero = np.any(d_sign == 0)
+    not_monotone = not (np.all(d_sign >= 0) or np.all(d_sign <= 0))
+```
+This avoids `np.unique()` (which sorts and allocates) while preserving full coverage. The
+dominant cost becomes `np.diff()` which is a simple O(N) subtraction — much faster than
+diff + sign + unique.
+
+### Risk 3: Unknown Field Keys in DataDict
+
+Field dicts can contain custom keys beyond `values/axes/unit/label`. Known cases:
+- `__shape__` key: stored by `datadict_storage.py`, checked in `MeshgridDataDict.validate()`
+- Fitter node adds `'guess'` and `'fit'` fields dynamically (fitter.py:642, 648)
+- Per-field meta keys like `__meta1__` are stored inside field dicts
+
+**Mitigation**: In `structure()` and `extract()`, when replacing `cp.deepcopy()`, we must
+**preserve all keys**, not just the known ones. Use a targeted copy that special-cases only
+`values` (numpy-optimized copy) and `axes` (new list), but copies everything else generically:
+```python
+new_field = {}
+for fk, fv in original_field.items():
+    if fk == 'values':
+        new_field[fk] = fv.copy() if copy else fv  # numpy optimized
+    elif fk == 'axes':
+        new_field[fk] = list(fv)  # new list, strings are immutable
+    else:
+        new_field[fk] = cp.deepcopy(fv)  # safe for mutable meta/custom keys
+```
+This preserves backward compatibility while optimizing the two expensive keys (values, axes).
+
+### Risk 4: In-Place Axis Mutation Breaks Shallow Copies
+
+Several nodes mutate the `axes` list in-place:
+- `DimensionReducer._applyDimReductions()` does `del data[n]['axes'][idx]`
+  (dim_reducer.py:595)
+- `structure(remove_data=...)` does `s[n]['axes'].pop(i)` (datadict.py:439)
+
+If a shallow copy shares the same `axes` list, these mutations would corrupt the original.
+
+**Mitigation**: `copy(deep=False)` (the new shallow copy mode) MUST always create a new `axes`
+list for each field, even when sharing the `values` array. This makes it safe for axis mutation
+while still avoiding the expensive array copy. The implementation is:
+```python
+new_field = {}
+for fk, fv in original_field.items():
+    if fk == 'values':
+        new_field[fk] = fv  # shared reference (NOT copied)
+    elif fk == 'axes':
+        new_field[fk] = list(fv)  # ALWAYS new list
+    else:
+        new_field[fk] = fv  # scalars (unit, label) are immutable
+```
+
+### Risk 5: mask_invalid() Return Type Contract
+
+Downstream plotting code checks `isinstance(data, np.ma.MaskedArray)` and calls `.filled(np.nan)`
+(plot/mpl/plotting.py:99-104, plot/base.py:479,508). If we skip masking for clean data, the
+arrays stay as plain `np.ndarray` and the isinstance checks return False — which is actually
+fine, because the code uses `if isinstance(...): filled()` as a conditional path.
+
+**Mitigation**: The fast-path must use `num.is_invalid()` (not just `np.isnan`) to also catch
+`None` values in object arrays. When no invalid entries exist, skip masking entirely — the
+plotting code handles plain ndarrays correctly. When invalid entries exist, apply masking as
+before.
+
+### Risk 6: shapes() Called on Unvalidated Data
+
+`Node.process()` calls `dataIn.shapes()` (node.py:281) without an explicit prior `validate()`.
+If values are still lists (pre-validation), `data_vals()` returns a list and `.shape` fails.
+
+**Mitigation**: Use `np.shape()` instead of `.shape` — this works on lists, tuples, and arrays
+alike, returning the correct shape without requiring conversion:
+```python
+shapes[k] = np.shape(self.data_vals(k))
+```
+
+### Risk 7: copy() and extract() Semantic Inconsistency
+
+Currently `copy()` uses `ndarray.copy()` (shallow numpy copy) while `extract(copy=True)` uses
+`cp.deepcopy()` (Python generic deep copy). For simple numeric arrays these are equivalent, but
+for object-dtype arrays `deepcopy` recursively copies contained Python objects while
+`ndarray.copy()` only copies the array of pointers.
+
+**Mitigation**: Align both to use `ndarray.copy()` for the `values` key, and `cp.deepcopy()` for
+other mutable values. This is consistent because: (a) plottr stores numeric data in arrays, not
+nested objects, (b) object arrays in plottr contain None values — `ndarray.copy()` handles None
+correctly since None is a singleton.
+
+---
+
+## Code Readability: Copy Semantics Design
+
+A key goal is making it **obvious** in the code where data is shared vs. independent. We adopt
+a pattern inspired by xarray's `copy(deep=True/False)` API and numpy conventions.
+
+### Design Principles
+
+1. **Explicit `deep` parameter**: Extend `copy()` to accept `deep=True` (default, backward
+   compatible) and `deep=False` (shares arrays). No separate `shallow_copy()` method — one
+   method, one parameter, one place to look.
+
+2. **Docstrings document ownership**: Every method that returns data states whether the returned
+   arrays are copies or views:
+   ```python
+   def copy(self, deep: bool = True) -> T:
+       """Make a copy of the dataset.
+
+       :param deep: If True (default), all data arrays are copied. The returned
+           dataset is fully independent of the original.
+           If False, the returned dataset shares data array references with the
+           original. Modifications to array *contents* (e.g., ``ret['x']['values'][0] = 5``)
+           will affect both. However, *replacing* an array (``ret['x']['values'] = new_arr``)
+           only affects the copy. Field metadata (axes, unit, label) is always independent.
+       """
+   ```
+
+3. **Nodes document their copy contract**: Each `process()` method gets a one-line comment
+   stating whether it copies or modifies in-place:
+   ```python
+   def process(self, dataIn=None):
+       ...
+       data = dataIn.copy(deep=False)  # shallow: only modifying values for specific fields
+       data['dep_0']['values'] = data['dep_0']['values'] * scale  # replaces array, safe
+   ```
+
+4. **No hidden copies**: Functions that need to modify data must do so on an explicit copy.
+   `Node.process()` base class passes data through by reference (as it already does). Only
+   nodes that transform data should copy. This should be the local decision of each node.
+
+### API Summary
+
+| Method | Arrays | Metadata | Use When |
+|---|---|---|---|
+| `copy(deep=True)` | Independent copies | Independent copies | Need fully independent data |
+| `copy(deep=False)` | Shared references | Independent copies | Node only modifies a few fields |
+| `extract(copy=True)` | Independent copies | Independent copies | Subsetting fields |
+| `extract(copy=False)` | Shared references | Shared references | Read-only subsetting |
+| `structure()` | Empty (no data) | Independent copies | Getting data shape/layout |
+
+---
+
+## Proposed Improvements (Revised)
+
+### Phase 1: Extend copy() with deep parameter & fix cascading copies
+
+#### 1a. Add `deep` parameter to `copy()`
+
+Extend the existing `copy()` method to accept `deep=True/False`, following the xarray convention.
+`deep=True` (default) preserves current behavior. `deep=False` copies the dict structure and
+axes lists but shares numpy array references.
+
+```python
+def copy(self: T, deep: bool = True) -> T:
+    """Make a copy of the dataset.
+
+    :param deep: If True (default), data arrays are independently copied.
+        If False, the returned dataset shares array references with the original.
+        Field metadata (axes, unit, label) is always independently copied.
+    """
+    ret = self.__class__()
+    for k, v in self.items():
+        if self._is_meta_key(k):
+            ret[k] = cp.deepcopy(v)
+        else:
+            new_field = {}
+            for fk, fv in v.items():
+                if fk == 'values':
+                    new_field[fk] = fv.copy() if deep else fv
+                elif fk == 'axes':
+                    new_field[fk] = list(fv)       # always new list (mutation-safe)
+                elif self._is_meta_key(fk):
+                    new_field[fk] = cp.deepcopy(fv) # safe for mutable meta
+                else:
+                    new_field[fk] = fv              # scalars (unit, label) are immutable
+            ret[k] = new_field
+    return ret
+```
+
+This replaces the current `copy()` → `structure()` → `deepcopy` chain with a single efficient
+pass. No separate `shallow_copy()` method needed.
+
+**Impact**: `copy(deep=False)` is essentially free (~0.01 ms vs 92 ms for deep copy on 38 MB
+meshgrid). Even `copy(deep=True)` is faster because it avoids the `structure()` → `validate()`
+→ `deepcopy` chain.
+
+#### 1b. Fix cascading copies in inherited nodes
+
+`XYSelector.process()` calls `super().process()` (which is `DimensionReducer.process()`) which
+already copies. Remove the redundant second copy:
+
+- `DimensionReducer.process()` (dim_reducer.py:682): keep `copy(deep=False)` — it needs to
+  mutate axes and values
+- `XYSelector.process()` (dim_reducer.py:901): **remove** the `.copy()` call — parent already
+  returned a copy
+- `Node.process()` (node.py:263): does NOT copy, just inspects — keep as-is
+
+#### 1c. Use `copy=False` in `datadict_to_meshgrid` when data is already a copy
+
+`DataGridder.process()` already copies input at line 473. Pass `copy=False` to
+`datadict_to_meshgrid()` to avoid a redundant second array copy.
+
+### Phase 2: Optimize Expensive Validation
+
+#### 2a. Skip redundant validation in internal methods
+
+Add a private `_build_structure()` path that skips validation when constructing data from
+known-valid sources. The public `validate()` always runs — no caching.
+
+Specifically:
+- `copy()` already constructs from valid data → skip re-validate
+- `structure()` calls `validate()` first, then constructs → skip re-validate in the construction
+  step
+
+This is implemented by extracting the construction logic out of `structure()` into a helper:
+```python
+def structure(self, ...):
+    if self.validate():
+        return self._build_structure(...)
+    return None
+
+def _build_structure(self, ...):
+    """Build structure dict. Caller must ensure data is validated."""
+    ...  # no validate() call here
+```
+
+**Impact**: Eliminates 50%+ of validate() calls. Especially impactful for MeshgridDataDict
+where validate() costs 43 ms.
+
+#### 2b. Optimize MeshgridDataDict.validate() monotonicity check
+
+Replace `np.unique(np.sign(np.diff(...)))` with a direct min/max check on the diff array.
+This avoids the sort + allocate from `np.unique()` while preserving full-array coverage:
+
+```python
+d = np.diff(axis_data, axis=axis_num)
+# Use nan-aware checks without materializing sign/unique arrays
+valid_d = d[~np.isnan(d)] if np.issubdtype(d.dtype, np.floating) else d
+if valid_d.size > 0:
+    if np.any(valid_d == 0):
+        msg += "no variation along axis"
+    if not (np.all(valid_d > 0) or np.all(valid_d < 0)):
+        msg += "not monotonous"
+```
+
+**Impact**: ~50% faster than current (no sort/unique), while checking every element.
+
+### Phase 3: Optimize structure() and extract()
+
+#### 3a. Replace deepcopy in structure() with targeted copy
+
+Use the same targeted copy pattern as `copy()`: special-case `values` (set to `[]`) and `axes`
+(new list), deepcopy only meta keys (which may be mutable), pass through scalars directly.
+Preserve ALL keys (not just known ones) to handle custom field keys like `__shape__`.
+
+#### 3b. Replace deepcopy in extract() with targeted copy
+
+Same pattern: use `ndarray.copy()` for values, `list()` for axes, `deepcopy` for meta keys,
+passthrough for scalars. This aligns `extract(copy=True)` semantics with `copy(deep=True)`.
+
+### Phase 4: Optimize mask_invalid()
+
+#### 4a. Skip masking when data has no invalid entries
+
+Use `num.is_invalid()` (which handles both None and NaN) for the fast check:
+```python
+def mask_invalid(self: T) -> T:
+    for d, _ in self.data_items():
+        arr = self.data_vals(d)
+        invalid_mask = num.is_invalid(arr)
+        if not np.any(invalid_mask):
+            continue  # no invalid entries, skip masking entirely
+        vals = np.ma.masked_where(invalid_mask, arr, copy=True)
+        ...
+```
+
+Downstream plotting code handles both plain ndarrays and MaskedArrays correctly (conditional
+isinstance checks in plot/mpl/plotting.py:99-104).
+
+#### 4b. Use copy=False when data is already a copy
+
+In pipeline nodes that call `mask_invalid()` after already copying data (DimensionReducer,
+Histogrammer), pass through a parameter or check `owndata` to avoid re-copying.
+
+### Phase 5: Minor Optimizations
+
+#### 5a. Use np.shape() in shapes()
+
+Replace `np.array(self.data_vals(k)).shape` with `np.shape(self.data_vals(k))`. This handles
+lists/tuples/arrays uniformly without allocating a new array. Safe for unvalidated data.
+
+#### 5b. Optimize datasets_are_equal()
+
+Short-circuit on shape mismatch before comparing values.
+
+---
+
+## Expected Impact
+
+| Phase | Time Savings (per pipeline update) | Memory Savings |
+|---|---|---|
+| Phase 1 (copy(deep=False) + fix cascading) | 50–70% of copy time | 60–75% reduction |
+| Phase 2 (skip redundant validation + optimize) | 60–80% of validate time | Negligible |
+| Phase 3 (structure/extract targeted copy) | 30–50% of structure ops | Minor |
+| Phase 4 (mask_invalid fast-path) | 95%+ when data is clean | 50% reduction |
+| Phase 5 (Minor) | 5–10% misc | Minor |
+
+**Combined estimate for 100×100×100 MeshgridDataDict pipeline:**
+- Current: ~500 ms, ~190 MB allocated (4.8× input)
+- After all phases: ~50–80 ms, ~50–60 MB allocated (~1.3× input)
+
+## Implementation Order
+
+**Prerequisite**: Add comprehensive test coverage for copy semantics, data integrity through
+pipeline, and edge cases (object arrays, complex data, masked data, custom field keys).
+
+Then:
+1. **Phase 1a** (copy deep parameter) — foundation for everything else
+2. **Phase 2a** (skip redundant validation) — highest ROI, low risk
+3. **Phase 2b** (optimize monotonicity check) — high ROI, low risk
+4. **Phase 1b** (fix cascading copies) — high ROI, needs test coverage first
+5. **Phase 3a+3b** (structure/extract optimization) — medium ROI, low risk
+6. **Phase 4a** (mask_invalid fast-path) — high ROI for clean data, low risk
+7. **Phase 1c + Phase 4b + Phase 5** — incremental improvements
+
+## Risks & Considerations
+
+- **Shared array mutation**: With `copy(deep=False)`, if a node modifies array *contents*
+  in-place (e.g. `arr[0] = 5` or `arr *= 2`), it corrupts the original. Nodes must *replace*
+  arrays (`data['x']['values'] = new_arr`) rather than mutate them. This is already the common
+  pattern in most nodes, but must be verified with tests.
+- **Backward compatibility**: `copy()` default is `deep=True`, preserving current behavior.
+  `deep=False` is opt-in. No external API is removed.
+- **Testing prerequisite**: Before making any optimization changes, comprehensive tests must
+  verify: copy isolation, pipeline data integrity, edge cases (object arrays, None, complex,
+  masked), and custom field key preservation.
+
+---
+
+## Execution Results
+
+All optimizations implemented and tested. **173 tests pass** (0 failures).
+
+### Changes Made
+
+| File | Changes |
+|---|---|
+| `plottr/data/datadict.py` | Added `_copy_field()` helper; rewrote `copy(deep=True/False)`; optimized `structure()` with `_build_structure()`; replaced `cp.deepcopy` in `extract()`; optimized `MeshgridDataDict.validate()` monotonicity check; added `mask_invalid()` fast-path for clean data; fixed `shapes()` to use `np.shape()`; optimized `datasets_are_equal()` |
+| `plottr/node/dim_reducer.py` | Removed redundant `copy()` in `XYSelector.process()` |
+| `plottr/node/grid.py` | Pass `copy=False` to `datadict_to_meshgrid()` |
+| `test/pytest/test_datadict_copy_semantics.py` | 64 new tests for copy semantics |
+| `test/pytest/test_pipeline_coverage.py` | 63 new tests for pipeline coverage |
+
+### Benchmark Comparison (Baseline -> Final)
+
+| Benchmark | Before (ms) | After (ms) | Speedup | Notes |
+|---|---|---|---|---|
+| **mesh_500k_copy** | 42.2 | 2.9 | **14.8x** | copy() no longer calls structure()/validate() |
+| **mesh_50k_copy** | 2.7 | 0.4 | **6.1x** | Same optimization, smaller data |
+| **tab_10k_copy** | 0.23 | 0.15 | **1.5x** | Smaller effect on tabular data |
+| **mesh_500k_validate** | 20.5 | 14.1 | **1.5x** | Removed np.unique/np.sign overhead |
+| **mesh_500k_structure** | 20.3 | 13.9 | **1.5x** | _build_structure() skips re-validation |
+| **mesh_50k_mask_invalid** | 10.0 | 9.1 | **1.1x** | Fast-path skips clean data |
+| **mesh_500k_mask_invalid (mem)** | 19537 KB | 0.3 KB | **~0** | No allocation for clean data |
+| **pipeline_4stage** | 8.2 | 5.7 | **1.4x** | Cumulative improvement |
+| **equality_5k** | 1.4 | 1.2 | **1.1x** | Shape short-circuit + set ops |
+
+### Bug Fixed
+
+- `copy()` previously did not deep-copy global mutable metadata (e.g., `dd.add_meta('info', {'key': 'val'})`). The new implementation properly deep-copies all metadata.
+
+### New APIs
+
+- `DataDictBase.copy(deep=True/False)` — `deep=False` shares array data (xarray convention)
+- `DataDictBase._build_structure()` — private helper that skips validation
+- `DataDictBase._copy_field()` — targeted field copy with per-key semantics
diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index ac577878..9035a702 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -117,6 +117,43 @@ def _meta_key_to_name(key: str) -> str:
     def _meta_name_to_key(name: str) -> str:
         return meta_name_to_key(name)
 
+    @staticmethod
+    def _copy_field(field: Dict[str, Any], copy_values: bool = True,
+                    empty_values: bool = False) -> Dict[str, Any]:
+        """Create a copy of a data field dict with targeted copy semantics.
+
+        Always creates a new dict and a new 'axes' list (mutation-safe).
+        For 'values': copies the array if *copy_values* is True, shares the
+        reference if False, or sets to ``[]`` if *empty_values* is True.
+        Scalar keys (unit, label) are passed through (immutable strings).
+        Meta keys (``__name__``) are deep-copied (may be mutable).
+        All other keys are deep-copied for safety.
+        """
+        new_field: Dict[str, Any] = {}
+        for fk, fv in field.items():
+            if fk == 'values':
+                if empty_values:
+                    new_field[fk] = []
+                elif copy_values:
+                    # use numpy-optimized copy for arrays
+                    if isinstance(fv, (np.ndarray, np.ma.core.MaskedArray)):
+                        new_field[fk] = fv.copy()
+                    elif isinstance(fv, list):
+                        new_field[fk] = fv.copy()
+                    else:
+                        new_field[fk] = cp.deepcopy(fv)
+                else:
+                    new_field[fk] = fv  # shared reference
+            elif fk == 'axes':
+                new_field[fk] = list(fv)  # always new list
+            elif fk in ('unit', 'label'):
+                new_field[fk] = fv  # immutable strings
+            elif is_meta_key(fk):
+                new_field[fk] = cp.deepcopy(fv)  # may be mutable
+            else:
+                new_field[fk] = cp.deepcopy(fv)  # unknown keys: safe default
+        return new_field
+
     @staticmethod
     def to_records(**data: Any) -> Dict[str, np.ndarray]:
         """Convert data to records that can be added to the ``DataDict``.
@@ -344,7 +381,7 @@ def extract(self: T, data: List[str], include_meta: bool = True,
         ret = self.__class__()
         for d in data:
             if copy:
-                ret[d] = cp.deepcopy(self[d])
+                ret[d] = self._copy_field(self[d], copy_values=True)
             else:
                 ret[d] = self[d]
 
@@ -426,29 +463,38 @@ def structure(self: T, add_shape: bool = False,
             remove_data = []
 
         if self.validate():
-            s = self.__class__()
-            for n, v in self.data_items():
-                if n not in remove_data:
-                    v2 = v.copy()
-                    v2['values'] = []
-                    s[n] = cp.deepcopy(v2)
-                    if 'axes' in s[n]:
-                        for r in remove_data:
-                            if r in s[n]['axes']:
-                                i = s[n]['axes'].index(r)
-                                s[n]['axes'].pop(i)
-
-            if include_meta:
-                for n, v in self.meta_items():
-                    s.add_meta(n, v)
-            else:
-                s.clear_meta()
+            return self._build_structure(
+                include_meta=include_meta, same_type=same_type,
+                remove_data=remove_data)
+        return None
 
-            if same_type:
-                s = self.__class__(**s)
+    def _build_structure(self: T, include_meta: bool = True,
+                         same_type: bool = False,
+                         remove_data: Optional[List[str]] = None) -> T:
+        """Build a structure-only copy. Caller must ensure data is validated."""
+        if remove_data is None:
+            remove_data = []
 
-            return s
-        return None
+        s = self.__class__()
+        for n, v in self.data_items():
+            if n not in remove_data:
+                s[n] = self._copy_field(v, empty_values=True)
+                if 'axes' in s[n]:
+                    for r in remove_data:
+                        if r in s[n]['axes']:
+                            i = s[n]['axes'].index(r)
+                            s[n]['axes'].pop(i)
+
+        if include_meta:
+            for n, v in self.meta_items():
+                s.add_meta(n, cp.deepcopy(v))
+        else:
+            s.clear_meta()
+
+        if same_type:
+            s = self.__class__(**s)
+
+        return s
     
 
     def nbytes(self, name: Optional[str]=None) -> Optional[int]:
@@ -560,7 +606,7 @@ def shapes(self) -> Dict[str, Tuple[int, ...]]:
         """
         shapes = {}
         for k, v in self.data_items():
-            shapes[k] = np.array(self.data_vals(k)).shape
+            shapes[k] = np.shape(self.data_vals(k))
 
         return shapes
 
@@ -692,18 +738,25 @@ def reorder_axes(self: T, data_names: Union[str, Sequence[str], None] = None,
         self.validate()
         return self
 
-    def copy(self: T) -> T:
+    def copy(self: T, deep: bool = True) -> T:
         """
         Make a copy of the dataset.
 
+        :param deep: If ``True`` (default), all data arrays are independently
+            copied. The returned dataset is fully independent of the original.
+            If ``False``, the returned dataset shares data array references
+            with the original. Modifying array *contents* in the copy will
+            affect the original; *replacing* an array only affects the copy.
+            Field metadata (axes, unit, label) is always independently copied.
         :return: A copy of the dataset.
         """
-        logger.debug(f'copying a dataset with size {self.nbytes()}')
-        ret = self.structure()
-        assert ret is not None
+        ret = self.__class__()
+        for k, v in self.items():
+            if self._is_meta_key(k):
+                ret[k] = cp.deepcopy(v)
+            else:
+                ret[k] = self._copy_field(v, copy_values=deep)
 
-        for k, v in self.data_items():
-            ret[k]['values'] = self.data_vals(k).copy()
         return ret
 
     def astype(self: T, dtype: np.dtype) -> T:
@@ -728,7 +781,10 @@ def mask_invalid(self: T) -> T:
         """
         for d, _ in self.data_items():
             arr = self.data_vals(d)
-            vals = np.ma.masked_where(num.is_invalid(arr), arr, copy=True)
+            invalid_mask = num.is_invalid(arr)
+            if not np.any(invalid_mask):
+                continue  # no invalid entries, skip masking
+            vals = np.ma.masked_where(invalid_mask, arr, copy=True)
             try:
                 vals.fill_value = np.nan
             except TypeError:
@@ -793,7 +849,7 @@ def __add__(self, newdata: 'DataDict') -> 'DataDict':
         """
 
         # FIXME: remove shape
-        s = misc.unwrap_optional(self.structure(add_shape=False))
+        s = self._build_structure()
         if DataDictBase.same_structure(self, newdata):
             for k, v in self.data_items():
                 val0 = self[k]['values']
@@ -843,7 +899,7 @@ def add_data(self, **kw: Any) -> None:
 
         :param kw: one array per data field (none can be omitted).
         """
-        dd = misc.unwrap_optional(self.structure(same_type=True))
+        dd = self._build_structure(same_type=True)
         for name, _ in dd.data_items():
             if name not in kw:
                 kw[name] = None
@@ -925,7 +981,7 @@ def expand(self) -> 'DataDict':
         self.validate()
         if not self.is_expandable():
             raise ValueError('Data cannot be expanded.')
-        struct = misc.unwrap_optional(self.structure(add_shape=False))
+        struct = self._build_structure()
         ret = DataDict(**struct)
 
         if self.is_expanded():
@@ -1120,17 +1176,24 @@ def validate(self) -> bool:
 
                         try:
                             if axis_data.shape[axis_num] > 1:
-                                steps = np.unique(np.sign(np.diff(axis_data, axis=axis_num)))
-                                
-                                # for incomplete data, there maybe nan steps -- we need to remove those, 
-                                # doesn't mean anything is wrong.
-                                steps = steps[~np.isnan(steps)]
-                                
-                                if 0 in steps:
-                                    msg += (f"Malformed data: {na} is expected to be {axis_num}th "
-                                            "axis but has no variation along that axis.\n")
-                                if steps.size > 1:
-                                    msg += (f"Malformed data: axis {na} is not monotonous.\n")
+                                d = np.diff(axis_data, axis=axis_num)
+
+                                # for incomplete data, there may be nan steps -- we need to
+                                # ignore those, doesn't mean anything is wrong.
+                                if np.issubdtype(d.dtype, np.floating):
+                                    nan_mask = np.isnan(d)
+                                    if np.all(nan_mask):
+                                        continue  # all NaN, can't check
+                                    valid = d[~nan_mask]
+                                else:
+                                    valid = d.ravel()
+
+                                if valid.size > 0:
+                                    if np.any(valid == 0):
+                                        msg += (f"Malformed data: {na} is expected to be {axis_num}th "
+                                                "axis but has no variation along that axis.\n")
+                                    if not (np.all(valid > 0) or np.all(valid < 0)):
+                                        msg += (f"Malformed data: axis {na} is not monotonous.\n")
                         
                         # can happen if we have bad shapes. but that should already have been caught.
                         except IndexError:
@@ -1214,7 +1277,7 @@ def _mesh_mean(data: MeshgridDataDict, ax: str) -> MeshgridDataDict:
     :return: averaged data
     """
     iax = data.axes().index(ax)
-    new_data = data.structure(remove_data=[ax])
+    new_data = data._build_structure(remove_data=[ax])
     assert isinstance(new_data, MeshgridDataDict)
 
     for d, v in data.data_items():
@@ -1237,7 +1300,7 @@ def _mesh_slice(data: MeshgridDataDict, **kwargs: Dict[str, Union[slice, int]])
     for ax, val in kwargs.items():
         i = data.axes().index(ax)
         slices[i] = val
-    ret = data.structure()
+    ret = data._build_structure()
     assert isinstance(ret, MeshgridDataDict)
 
     for d, _ in data.data_items():
@@ -1329,7 +1392,7 @@ def datadict_to_meshgrid(data: DataDict,
         inner_axis_order, target_shape = ret
 
     # construct new data
-    newdata = MeshgridDataDict(**misc.unwrap_optional(data.structure(add_shape=False)))
+    newdata = MeshgridDataDict(**data._build_structure())
     axlist = data.axes(data.dependents()[0])
 
     for k, v in data.data_items():
@@ -1356,7 +1419,7 @@ def meshgrid_to_datadict(data: MeshgridDataDict) -> DataDict:
     :param data: Input ``MeshgridDataDict``.
     :return: Flattened ``DataDict``.
     """
-    newdata = DataDict(**misc.unwrap_optional(data.structure(add_shape=False)))
+    newdata = DataDict(**data._build_structure())
     for k, v in data.data_items():
         val = v['values'].copy().reshape(-1)
         newdata[k]['values'] = val
@@ -1605,48 +1668,38 @@ def datasets_are_equal(a: DataDictBase, b: DataDictBase,
         return False
 
     if not ignore_meta:
-        # are all meta data of a also in b, and are they the same value?
-        for k, v in a.meta_items():
-            if k not in [kk for kk, vv in b.meta_items()]:
-                return False
-            elif b.meta_val(k) != v:
-                return False
-
-        # are all meta data of b also in a?
-        for k, v in b.meta_items():
-            if k not in [kk for kk, vv in a.meta_items()]:
+        a_meta = dict(a.meta_items())
+        b_meta = dict(b.meta_items())
+        if a_meta.keys() != b_meta.keys():
+            return False
+        for k, v in a_meta.items():
+            if b_meta[k] != v:
                 return False
 
-    # check all data fields in a
-    for dn, dv in a.data_items():
+    # check all data fields
+    a_fields = set(dn for dn, _ in a.data_items())
+    b_fields = set(dn for dn, _ in b.data_items())
+    if a_fields != b_fields:
+        return False
 
-        # are all fields also present in b?
-        if dn not in [dnn for dnn, dvv in b.data_items()]:
-            return False
+    for dn in a_fields:
+        a_vals = a.data_vals(dn)
+        b_vals = b.data_vals(dn)
 
-        # check if data is equal
-        if not num.arrays_equal(
-                np.array(a.data_vals(dn)),
-                np.array(b.data_vals(dn)),
-        ):
+        # fast shape check before expensive value comparison
+        if np.shape(a_vals) != np.shape(b_vals):
             return False
 
-        if not ignore_meta:
-            # check meta data
-            for k, v in a.meta_items(dn):
-                if k not in [kk for kk, vv in b.meta_items(dn)]:
-                    return False
-                elif v != b.meta_val(k, dn):
-                    return False
-
-    # only thing left to check is whether there are items in b but not a
-    for dn, dv in b.data_items():
-        if dn not in [dnn for dnn, dvv in a.data_items()]:
+        if not num.arrays_equal(np.asarray(a_vals), np.asarray(b_vals)):
             return False
 
         if not ignore_meta:
-            for k, v in b.meta_items(dn):
-                if k not in [kk for kk, vv in a.meta_items(dn)]:
+            a_fmeta = dict(a.meta_items(dn))
+            b_fmeta = dict(b.meta_items(dn))
+            if a_fmeta.keys() != b_fmeta.keys():
+                return False
+            for k, v in a_fmeta.items():
+                if v != b_fmeta[k]:
                     return False
 
     return True
diff --git a/plottr/node/dim_reducer.py b/plottr/node/dim_reducer.py
index 2cf35e3e..611f0745 100644
--- a/plottr/node/dim_reducer.py
+++ b/plottr/node/dim_reducer.py
@@ -898,7 +898,7 @@ def process(
             return None
         dataout = data['dataOut']
         assert dataout is not None
-        data = dataout.copy()
+        data = dataout  # parent DimensionReducer.process() already copied
 
         if self._xyAxes[0] is not None and self._xyAxes[1] is not None:
             _kw = {self._xyAxes[0]: 0, self._xyAxes[1]: 1}
diff --git a/plottr/node/grid.py b/plottr/node/grid.py
index 88213aca..8d672abc 100644
--- a/plottr/node/grid.py
+++ b/plottr/node/grid.py
@@ -482,16 +482,18 @@ def process(
                 if method is GridOption.noGrid:
                     dout = data.expand()
                 elif method is GridOption.guessShape:
-                    dout = dd.datadict_to_meshgrid(data)
+                    dout = dd.datadict_to_meshgrid(data, copy=False)
                 elif method is GridOption.specifyShape:
                     dout = dd.datadict_to_meshgrid(
                         data, target_shape=opts['shape'],
                         inner_axis_order=order,
+                        copy=False,
                     )
                 elif method is GridOption.metadataShape:
                     try:
                         dout = dd.datadict_to_meshgrid(
-                            data, use_existing_shape=True
+                            data, use_existing_shape=True,
+                            copy=False,
                         )
                     except ValueError as err:
                         if "Malformed data" in str(err):
@@ -499,7 +501,7 @@ def process(
                                 "Shape/Setpoint order does"
                                 " not match data. Falling back to guessing shape"
                                 )
-                            dout = dd.datadict_to_meshgrid(data)
+                            dout = dd.datadict_to_meshgrid(data, copy=False)
                         else:
                             raise err
             except GriddingError:
diff --git a/test/pytest/test_datadict_copy_semantics.py b/test/pytest/test_datadict_copy_semantics.py
new file mode 100644
index 00000000..048803f5
--- /dev/null
+++ b/test/pytest/test_datadict_copy_semantics.py
@@ -0,0 +1,709 @@
+"""
+test_datadict_copy_semantics.py
+
+Comprehensive tests for DataDict copy semantics, data integrity through pipeline
+operations, and edge cases. These tests serve as a safety net before making
+performance optimizations to the DataDict implementation.
+"""
+import copy as cp
+
+import numpy as np
+import pytest
+
+from plottr.data.datadict import (
+    DataDict,
+    DataDictBase,
+    MeshgridDataDict,
+    datadict_to_meshgrid,
+    meshgrid_to_datadict,
+    datasets_are_equal,
+)
+from plottr.utils import num
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_datadict(npts: int = 100) -> DataDict:
+    """Simple 1D DataDict: x -> y, z."""
+    return DataDict(
+        x=dict(values=np.arange(npts, dtype=float), unit='V', label='x'),
+        y=dict(values=np.random.randn(npts), axes=['x'], unit='A', label='y'),
+        z=dict(values=np.random.randn(npts), axes=['x'], unit='A', label='z'),
+    )
+
+
+def make_meshgrid(shape: tuple = (10, 8), ndeps: int = 2) -> MeshgridDataDict:
+    """Gridded data with given shape."""
+    naxes = len(shape)
+    dd = MeshgridDataDict()
+    ax_names = [f'ax{i}' for i in range(naxes)]
+    grids = np.meshgrid(*[np.linspace(0, 1, s) for s in shape], indexing='ij')
+    for i, ax in enumerate(ax_names):
+        dd[ax] = dict(values=grids[i], axes=[], unit='V', label=ax)
+    for i in range(ndeps):
+        dd[f'dep{i}'] = dict(
+            values=np.random.randn(*shape),
+            axes=ax_names.copy(),
+            unit='A',
+            label=f'dep{i}',
+        )
+    dd.validate()
+    return dd
+
+
+# ===========================================================================
+# 1. COPY ISOLATION TESTS
+# ===========================================================================
+
+class TestCopyIsolation:
+    """Verify that copy() produces fully independent data."""
+
+    def test_copy_values_independent(self):
+        """Modifying copied values must not affect the original."""
+        dd = make_datadict()
+        dd2 = dd.copy()
+        dd2['y']['values'][0] = 999.0
+        assert dd['y']['values'][0] != 999.0
+
+    def test_copy_axes_independent(self):
+        """Modifying copied axes list must not affect the original."""
+        dd = make_datadict()
+        dd2 = dd.copy()
+        dd2['y']['axes'].append('extra')
+        assert 'extra' not in dd['y']['axes']
+
+    def test_copy_unit_independent(self):
+        """Changing unit on copy must not affect original."""
+        dd = make_datadict()
+        dd2 = dd.copy()
+        dd2['y']['unit'] = 'mA'
+        assert dd['y']['unit'] == 'A'
+
+    def test_copy_meta_independent(self):
+        """Modifying mutable metadata on copy must not affect original.
+
+        This was previously broken (copy() via structure() did not deepcopy
+        global mutable metadata). Fixed by the Phase 1a copy() rewrite.
+        """
+        dd = make_datadict()
+        dd.add_meta('info', {'key': 'value'})
+        dd2 = dd.copy()
+        dd2.meta_val('info')['key'] = 'changed'
+        assert dd.meta_val('info')['key'] == 'value'
+
+    def test_copy_field_meta_independent(self):
+        """Per-field mutable metadata should be independent after copy.
+
+        Note: this works because structure() calls cp.deepcopy on each field dict,
+        which catches per-field meta. However, global meta is NOT deepcopied
+        (see test_copy_meta_independent above).
+        """
+        dd = make_datadict()
+        dd.add_meta('cal', [1, 2, 3], data='y')
+        dd2 = dd.copy()
+        dd2.meta_val('cal', 'y').append(4)
+        assert dd.meta_val('cal', 'y') == [1, 2, 3]
+
+    def test_copy_preserves_type_datadict(self):
+        dd = make_datadict()
+        dd2 = dd.copy()
+        assert type(dd2) is DataDict
+
+    def test_copy_preserves_type_meshgrid(self):
+        dd = make_meshgrid()
+        dd2 = dd.copy()
+        assert type(dd2) is MeshgridDataDict
+
+    def test_copy_preserves_equality(self):
+        dd = make_datadict()
+        dd.add_meta('info', 'test')
+        dd2 = dd.copy()
+        assert dd == dd2
+
+    def test_meshgrid_copy_values_independent(self):
+        dd = make_meshgrid((10, 8))
+        dd2 = dd.copy()
+        dd2['dep0']['values'][0, 0] = 999.0
+        assert dd['dep0']['values'][0, 0] != 999.0
+
+    def test_meshgrid_copy_axes_independent(self):
+        dd = make_meshgrid((10, 8))
+        dd2 = dd.copy()
+        original_axes = dd['dep0']['axes'].copy()
+        dd2['dep0']['axes'].pop()
+        assert dd['dep0']['axes'] == original_axes
+
+
+# ===========================================================================
+# 2. EXTRACT ISOLATION TESTS
+# ===========================================================================
+
+class TestExtractIsolation:
+    """Verify that extract() produces independent data when copy=True."""
+
+    def test_extract_copy_true_values_independent(self):
+        dd = make_datadict()
+        ex = dd.extract(['y'], copy=True)
+        ex['y']['values'][0] = 999.0
+        assert dd['y']['values'][0] != 999.0
+
+    def test_extract_copy_true_axes_independent(self):
+        dd = make_datadict()
+        ex = dd.extract(['y'], copy=True)
+        ex['y']['axes'].append('extra')
+        assert 'extra' not in dd['y']['axes']
+
+    def test_extract_copy_false_shares_values(self):
+        dd = make_datadict()
+        ex = dd.extract(['y'], copy=False)
+        # With copy=False, arrays are shared
+        assert np.shares_memory(ex['y']['values'], dd['y']['values'])
+
+    def test_extract_includes_axes_fields(self):
+        dd = make_datadict()
+        ex = dd.extract(['y'])
+        assert 'x' in ex
+        assert 'y' in ex
+        assert 'z' not in ex
+
+    def test_extract_includes_meta(self):
+        dd = make_datadict()
+        dd.add_meta('info', 'hello')
+        ex = dd.extract(['y'])
+        assert ex.has_meta('info')
+
+    def test_extract_preserves_field_meta(self):
+        dd = make_datadict()
+        dd.add_meta('cal', 42, data='y')
+        ex = dd.extract(['y'])
+        assert ex.meta_val('cal', 'y') == 42
+
+
+# ===========================================================================
+# 3. STRUCTURE TESTS
+# ===========================================================================
+
+class TestStructure:
+    """Verify structure() correctness and independence."""
+
+    def test_structure_has_empty_values(self):
+        dd = make_datadict()
+        s = dd.structure()
+        for _, v in s.data_items():
+            assert len(v['values']) == 0
+
+    def test_structure_preserves_axes(self):
+        dd = make_datadict()
+        s = dd.structure()
+        assert s['y']['axes'] == ['x']
+
+    def test_structure_preserves_units(self):
+        dd = make_datadict()
+        s = dd.structure()
+        assert s['y']['unit'] == 'A'
+
+    def test_structure_preserves_meta(self):
+        dd = make_datadict()
+        dd.add_meta('info', 'test')
+        s = dd.structure()
+        assert s.meta_val('info') == 'test'
+
+    def test_structure_axes_independent(self):
+        """Mutating axes in structure must not affect original."""
+        dd = make_datadict()
+        s = dd.structure()
+        s['y']['axes'].append('extra')
+        assert 'extra' not in dd['y']['axes']
+
+    def test_structure_preserves_custom_field_keys(self):
+        """Custom keys in field dicts must be preserved."""
+        dd = make_datadict()
+        dd['y']['__shape__'] = (100,)
+        dd['y']['__custom_meta__'] = 'hello'
+        s = dd.structure()
+        assert '__shape__' in s['y']
+        assert '__custom_meta__' in s['y']
+
+    def test_structure_with_remove_data(self):
+        dd = make_meshgrid((5, 4))
+        s = dd.structure(remove_data=['ax0'])
+        assert 'ax0' not in s
+        for dep in s.dependents():
+            assert 'ax0' not in s[dep]['axes']
+
+
+# ===========================================================================
+# 4. EDGE CASES: DATA TYPES
+# ===========================================================================
+
+class TestEdgeCaseDataTypes:
+    """Tests with unusual data types."""
+
+    def test_object_array_with_none(self):
+        """DataDict with object arrays containing None values."""
+        dd = DataDict(
+            x=dict(values=np.array([1, 2, 3, 4, 5], dtype=object)),
+            y=dict(values=np.array([1.0, None, 3.0, None, 5.0], dtype=object),
+                   axes=['x']),
+        )
+        assert dd.validate()
+        dd2 = dd.copy()
+        assert dd == dd2
+
+    def test_complex_array(self):
+        """DataDict with complex-valued data."""
+        dd = DataDict(
+            x=dict(values=np.arange(10, dtype=float)),
+            y=dict(values=np.random.randn(10) + 1j * np.random.randn(10),
+                   axes=['x']),
+        )
+        assert dd.validate()
+        dd2 = dd.copy()
+        assert dd == dd2
+        dd2['y']['values'][0] = 999 + 0j
+        assert dd['y']['values'][0] != 999 + 0j
+
+    def test_integer_array(self):
+        """DataDict with integer data (no NaN possible)."""
+        dd = DataDict(
+            x=dict(values=np.arange(10)),
+            y=dict(values=np.arange(10, 20), axes=['x']),
+        )
+        assert dd.validate()
+        dd2 = dd.copy()
+        assert dd == dd2
+
+    def test_masked_array_values(self):
+        """DataDict where values are already MaskedArrays."""
+        vals = np.ma.MaskedArray([1.0, 2.0, 3.0], mask=[False, True, False])
+        dd = DataDict(
+            x=dict(values=np.arange(3, dtype=float)),
+            y=dict(values=vals, axes=['x']),
+        )
+        assert dd.validate()
+        dd2 = dd.copy()
+        assert np.ma.is_masked(dd2['y']['values'])
+
+    def test_empty_datadict(self):
+        """Empty DataDict operations."""
+        dd = DataDict()
+        s = dd.structure()
+        assert s is not None
+        dd2 = dd.copy()
+        assert dd == dd2
+
+    def test_single_point(self):
+        """DataDict with a single data point."""
+        dd = DataDict(
+            x=dict(values=np.array([1.0])),
+            y=dict(values=np.array([2.0]), axes=['x']),
+        )
+        assert dd.validate()
+        dd2 = dd.copy()
+        assert dd == dd2
+
+
+# ===========================================================================
+# 5. MASK_INVALID TESTS
+# ===========================================================================
+
+class TestMaskInvalid:
+    """Tests for mask_invalid() behavior with different data."""
+
+    def test_mask_invalid_clean_float_data(self):
+        """Clean float data — all values valid."""
+        dd = make_datadict()
+        dd2 = dd.copy()
+        dd2 = dd2.mask_invalid()
+        # Values should be unchanged (though possibly wrapped in MaskedArray)
+        for name, _ in dd2.data_items():
+            assert np.allclose(
+                np.asarray(dd.data_vals(name)),
+                np.asarray(dd2.data_vals(name)),
+            )
+
+    def test_mask_invalid_with_nan(self):
+        """Float data with NaN values should be masked."""
+        dd = DataDict(
+            x=dict(values=np.array([1.0, 2.0, 3.0])),
+            y=dict(values=np.array([1.0, np.nan, 3.0]), axes=['x']),
+        )
+        dd = dd.mask_invalid()
+        y_vals = dd.data_vals('y')
+        assert isinstance(y_vals, np.ma.MaskedArray)
+        assert y_vals.mask[1] == True
+
+    def test_mask_invalid_with_none_objects(self):
+        """Object array with None values should be masked."""
+        dd = DataDict(
+            x=dict(values=np.array([1, 2, 3], dtype=object)),
+            y=dict(values=np.array([1.0, None, 3.0], dtype=object), axes=['x']),
+        )
+        dd = dd.mask_invalid()
+        y_vals = dd.data_vals('y')
+        assert isinstance(y_vals, np.ma.MaskedArray)
+
+    def test_mask_invalid_preserves_structure(self):
+        """Structure should be unchanged after masking."""
+        dd = make_meshgrid()
+        s_before = dd.structure()
+        dd = dd.mask_invalid()
+        s_after = dd.structure()
+        assert DataDictBase.same_structure(
+            s_before, s_after
+        )
+
+
+# ===========================================================================
+# 6. MESHGRID CONVERSION TESTS
+# ===========================================================================
+
+class TestMeshgridConversions:
+    """Test conversions between DataDict and MeshgridDataDict."""
+
+    def test_roundtrip_datadict_meshgrid_datadict(self):
+        """Tabular → grid → tabular should preserve data."""
+        x = np.linspace(0, 1, 10)
+        y = np.arange(5, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        zz = xx * yy
+
+        dd = DataDict(
+            x=dict(values=xx.ravel()),
+            y=dict(values=yy.ravel()),
+            z=dict(values=zz.ravel(), axes=['x', 'y']),
+        )
+        mesh = datadict_to_meshgrid(dd)
+        assert isinstance(mesh, MeshgridDataDict)
+        assert mesh.shape() == (10, 5)
+
+        dd2 = meshgrid_to_datadict(mesh)
+        assert isinstance(dd2, DataDict)
+        assert dd2.nrecords() == 50
+
+    def test_datadict_to_meshgrid_copy_true(self):
+        """copy=True should produce independent arrays."""
+        x = np.arange(6, dtype=float)
+        y = np.tile(np.arange(3, dtype=float), 2)
+        dd = DataDict(
+            x=dict(values=x),
+            y=dict(values=y),
+            z=dict(values=np.arange(6, dtype=float), axes=['x', 'y']),
+        )
+        mesh = datadict_to_meshgrid(dd, target_shape=(2, 3), copy=True)
+        mesh['z']['values'][0, 0] = 999.0
+        assert dd['z']['values'][0] != 999.0
+
+    def test_datadict_to_meshgrid_preserves_meta(self):
+        """Conversion should preserve global metadata."""
+        x = np.arange(6, dtype=float)
+        y = np.tile(np.arange(3, dtype=float), 2)
+        dd = DataDict(
+            x=dict(values=x),
+            y=dict(values=y),
+            z=dict(values=np.arange(6, dtype=float), axes=['x', 'y']),
+            __info__='test_meta',
+        )
+        mesh = datadict_to_meshgrid(dd, target_shape=(2, 3))
+        assert mesh.meta_val('info') == 'test_meta'
+
+    def test_meshgrid_to_datadict_independent(self):
+        """meshgrid_to_datadict should not share arrays with original."""
+        mesh = make_meshgrid((5, 4))
+        dd = meshgrid_to_datadict(mesh)
+        dd['dep0']['values'][0] = 999.0
+        assert mesh['dep0']['values'].ravel()[0] != 999.0
+
+
+# ===========================================================================
+# 7. MESHGRID VALIDATION TESTS
+# ===========================================================================
+
+class TestMeshgridValidation:
+    """Test MeshgridDataDict validation, especially monotonicity checks."""
+
+    def test_valid_monotonic_increasing(self):
+        dd = make_meshgrid((5, 4))
+        assert dd.validate()
+
+    def test_valid_monotonic_decreasing(self):
+        """Axes that decrease monotonically are valid."""
+        dd = MeshgridDataDict()
+        x = np.linspace(1, 0, 5)  # decreasing
+        y = np.linspace(0, 1, 4)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        dd['x'] = dict(values=xx, axes=[], unit='V', label='x')
+        dd['y'] = dict(values=yy, axes=[], unit='V', label='y')
+        dd['z'] = dict(values=xx + yy, axes=['x', 'y'], unit='A', label='z')
+        assert dd.validate()
+
+    def test_invalid_non_monotonic(self):
+        """Axis that goes up then down should fail."""
+        dd = MeshgridDataDict()
+        x_vals = np.array([0, 1, 2, 1, 0], dtype=float)
+        y_vals = np.arange(3, dtype=float)
+        xx, yy = np.meshgrid(x_vals, y_vals, indexing='ij')
+        dd['x'] = dict(values=xx, axes=[], unit='V', label='x')
+        dd['y'] = dict(values=yy, axes=[], unit='V', label='y')
+        dd['z'] = dict(values=np.random.randn(5, 3), axes=['x', 'y'],
+                       unit='A', label='z')
+        with pytest.raises(ValueError, match="not monotonous"):
+            dd.validate()
+
+    def test_invalid_flat_axis(self):
+        """Axis with no variation should fail."""
+        dd = MeshgridDataDict()
+        x = np.array([1.0, 1.0, 1.0])
+        y = np.arange(4, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        dd['x'] = dict(values=xx, axes=[], unit='V', label='x')
+        dd['y'] = dict(values=yy, axes=[], unit='V', label='y')
+        dd['z'] = dict(values=np.random.randn(3, 4), axes=['x', 'y'],
+                       unit='A', label='z')
+        with pytest.raises(ValueError, match="no variation"):
+            dd.validate()
+
+    def test_valid_with_nan_in_axis(self):
+        """Axis with NaN values (incomplete data) should still validate
+        if the non-NaN values are monotonic."""
+        dd = make_meshgrid((5, 4))
+        dd['ax0']['values'][3, :] = np.nan
+        dd['ax0']['values'][4, :] = np.nan
+        # Should not raise — NaN steps are ignored
+        assert dd.validate()
+
+    def test_valid_3d_meshgrid(self):
+        """3D meshgrid should validate correctly."""
+        dd = make_meshgrid((5, 4, 3))
+        assert dd.validate()
+
+    def test_shape_mismatch_fails(self):
+        """Different shapes across fields should fail."""
+        dd = MeshgridDataDict()
+        dd['x'] = dict(values=np.arange(10, dtype=float).reshape(2, 5),
+                       axes=[])
+        dd['z'] = dict(values=np.arange(12, dtype=float).reshape(3, 4),
+                       axes=['x'])
+        with pytest.raises(ValueError):
+            dd.validate()
+
+
+# ===========================================================================
+# 8. SHAPES() EDGE CASES
+# ===========================================================================
+
+class TestShapes:
+    """Test shapes() with various input states."""
+
+    def test_shapes_after_validation(self):
+        dd = make_datadict(50)
+        dd.validate()
+        shapes = dd.shapes()
+        assert shapes['x'] == (50,)
+        assert shapes['y'] == (50,)
+
+    def test_shapes_with_list_values(self):
+        """shapes() should work even before validation when values are lists."""
+        dd = DataDictBase(
+            x=dict(values=[1, 2, 3]),
+            y=dict(values=[4, 5, 6], axes=['x']),
+        )
+        # Should not crash, even without validate()
+        shapes = dd.shapes()
+        assert shapes['x'] == (3,)
+
+    def test_shapes_meshgrid(self):
+        dd = make_meshgrid((10, 8))
+        shapes = dd.shapes()
+        for name in dd.dependents() + dd.axes():
+            assert shapes[name] == (10, 8)
+
+
+# ===========================================================================
+# 9. PIPELINE DATA INTEGRITY TESTS
+# ===========================================================================
+
+class TestPipelineIntegrity:
+    """Simulate pipeline operations and verify input is not mutated."""
+
+    def _simulate_data_selector(self, data: DataDictBase) -> DataDictBase:
+        """Simulate DataSelector.process() — extract a subset."""
+        selected = data.extract(data.dependents()[:1])
+        if isinstance(selected, DataDictBase):
+            selected = DataDict(**selected)
+            selected.validate()
+        return selected
+
+    def _simulate_gridder(self, data: DataDict) -> MeshgridDataDict:
+        """Simulate DataGridder.process() — copy + grid."""
+        data_copy = data.copy()
+        return datadict_to_meshgrid(data_copy)
+
+    def _simulate_dim_reducer(self, data: MeshgridDataDict) -> MeshgridDataDict:
+        """Simulate DimensionReducer.process() — copy + mask."""
+        data_copy = data.copy()
+        return data_copy.mask_invalid()
+
+    def test_pipeline_does_not_mutate_input(self):
+        """Full pipeline must not modify the original input data."""
+        # Create griddable data
+        x = np.linspace(0, 1, 10)
+        y = np.arange(5, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+
+        original = DataDict(
+            x=dict(values=xx.ravel()),
+            y=dict(values=yy.ravel()),
+            z=dict(values=(xx * yy).ravel(), axes=['x', 'y']),
+        )
+        original.validate()
+
+        # Save a reference-safe copy for comparison (cp.deepcopy fails on
+        # DataDict due to _DataAccess inner class, so use the built-in copy)
+        reference = original.copy()
+
+        # Run simulated pipeline
+        selected = self._simulate_data_selector(original)
+        gridded = self._simulate_gridder(selected)
+        reduced = self._simulate_dim_reducer(gridded)
+
+        # Verify original is unchanged
+        assert datasets_are_equal(original, reference)
+
+    def test_pipeline_output_types(self):
+        """Pipeline stages should produce the expected types."""
+        x = np.linspace(0, 1, 10)
+        y = np.arange(5, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+
+        dd = DataDict(
+            x=dict(values=xx.ravel()),
+            y=dict(values=yy.ravel()),
+            z=dict(values=(xx * yy).ravel(), axes=['x', 'y']),
+        )
+
+        selected = self._simulate_data_selector(dd)
+        assert isinstance(selected, DataDict)
+
+        gridded = self._simulate_gridder(selected)
+        assert isinstance(gridded, MeshgridDataDict)
+
+        reduced = self._simulate_dim_reducer(gridded)
+        assert isinstance(reduced, MeshgridDataDict)
+
+
+# ===========================================================================
+# 10. MESHGRID OPERATIONS: mean, slice
+# ===========================================================================
+
+class TestMeshgridOperations:
+    """Test mean and slice operations on MeshgridDataDict."""
+
+    def test_mean_reduces_axis(self):
+        dd = make_meshgrid((10, 8))
+        result = dd.mean('ax0')
+        assert result.shape() == (8,)
+        assert 'ax0' not in result
+
+    def test_mean_does_not_mutate_original(self):
+        dd = make_meshgrid((10, 8))
+        original_shape = dd.shape()
+        _ = dd.mean('ax0')
+        assert dd.shape() == original_shape
+
+    def test_slice_reduces_shape(self):
+        dd = make_meshgrid((10, 8))
+        result = dd.slice(ax0=slice(2, 5))
+        assert result.shape() == (3, 8)
+
+    def test_slice_does_not_mutate_original(self):
+        dd = make_meshgrid((10, 8))
+        original_shape = dd.shape()
+        _ = dd.slice(ax0=slice(2, 5))
+        assert dd.shape() == original_shape
+
+    def test_slice_integer_selects_single_element(self):
+        """Integer indexing on a meshgrid axis selects a single element,
+        but _mesh_slice does NOT remove the axis — it creates a size-1 dim.
+        Using a length-1 slice keeps the axis valid."""
+        dd = make_meshgrid((10, 8))
+        result = dd.slice(ax0=slice(3, 4))
+        assert result.shape() == (1, 8)
+
+
+# ===========================================================================
+# 11. CUSTOM FIELD KEY PRESERVATION
+# ===========================================================================
+
+class TestCustomFieldKeys:
+    """Verify that custom field keys are preserved through operations."""
+
+    def test_copy_preserves_shape_key(self):
+        dd = make_meshgrid((5, 4))
+        dd['dep0']['__shape__'] = (5, 4)
+        dd2 = dd.copy()
+        assert dd2['dep0']['__shape__'] == (5, 4)
+
+    def test_copy_preserves_per_field_meta(self):
+        dd = make_datadict()
+        dd['y']['__calibration__'] = {'gain': 1.5}
+        dd2 = dd.copy()
+        assert dd2['y']['__calibration__'] == {'gain': 1.5}
+
+    def test_structure_preserves_shape_key(self):
+        dd = make_meshgrid((5, 4))
+        dd['dep0']['__shape__'] = (5, 4)
+        s = dd.structure()
+        assert '__shape__' in s['dep0']
+
+    def test_extract_preserves_per_field_meta(self):
+        dd = make_datadict()
+        dd['y']['__calibration__'] = {'gain': 1.5}
+        ex = dd.extract(['y'])
+        assert ex['y']['__calibration__'] == {'gain': 1.5}
+
+
+# ===========================================================================
+# 12. DATASETS_ARE_EQUAL TESTS
+# ===========================================================================
+
+class TestDatasetsAreEqual:
+    """Additional equality checks including edge cases."""
+
+    def test_equal_meshgrids(self):
+        dd = make_meshgrid()
+        dd2 = dd.copy()
+        assert datasets_are_equal(dd, dd2)
+
+    def test_not_equal_different_values(self):
+        dd = make_meshgrid()
+        dd2 = dd.copy()
+        dd2['dep0']['values'][0, 0] += 1.0
+        assert not datasets_are_equal(dd, dd2)
+
+    def test_not_equal_different_types(self):
+        dd = make_datadict()
+        mesh = make_meshgrid()
+        assert not datasets_are_equal(dd, mesh)
+
+    def test_not_equal_different_shape(self):
+        dd1 = make_meshgrid((5, 4))
+        dd2 = make_meshgrid((5, 3))
+        assert not datasets_are_equal(dd1, dd2)
+
+    def test_equal_with_meta(self):
+        dd = make_datadict()
+        dd.add_meta('info', 'value')
+        dd2 = dd.copy()
+        assert datasets_are_equal(dd, dd2)
+        assert datasets_are_equal(dd, dd2, ignore_meta=True)
+
+    def test_not_equal_meta_differs(self):
+        dd = make_datadict()
+        dd.add_meta('info', 'value')
+        dd2 = dd.copy()
+        dd2.set_meta('info', 'different')
+        assert not datasets_are_equal(dd, dd2)
+        assert datasets_are_equal(dd, dd2, ignore_meta=True)
diff --git a/test/pytest/test_pipeline_coverage.py b/test/pytest/test_pipeline_coverage.py
new file mode 100644
index 00000000..5551f69a
--- /dev/null
+++ b/test/pytest/test_pipeline_coverage.py
@@ -0,0 +1,632 @@
+"""
+test_pipeline_coverage.py
+
+Comprehensive pipeline tests exercising every plottr node with various data
+shapes, structures, and dtypes.
+
+Uses two approaches:
+- hypothesis @given for pure DataDict/MeshgridDataDict operations (no Qt needed)
+- pytest parametrize + qtbot for flowchart-based node tests (needs QApplication)
+"""
+import numpy as np
+import pytest
+from hypothesis import given, settings, HealthCheck
+from hypothesis import strategies as st
+
+from plottr.data.datadict import (
+    DataDict,
+    DataDictBase,
+    MeshgridDataDict,
+    datadict_to_meshgrid,
+    meshgrid_to_datadict,
+)
+from plottr.node.tools import linearFlowchart
+from plottr.node.node import Node
+from plottr.node.data_selector import DataSelector
+from plottr.node.grid import DataGridder, GridOption
+from plottr.node.dim_reducer import DimensionReducer, XYSelector, ReductionMethod
+from plottr.node.scaleunits import ScaleUnits
+from plottr.node.filter.correct_offset import SubtractAverage
+from plottr.node.histogram import Histogrammer
+from plottr.utils import num
+
+
+# ---------------------------------------------------------------------------
+# Disable UI for all node classes within this module's tests only.
+# We save/restore originals via a session-scoped fixture.
+# ---------------------------------------------------------------------------
+
+_ORIGINAL_UI_SETTINGS = {}
+
+@pytest.fixture(autouse=True, scope="module")
+def _disable_ui_for_module():
+    """Temporarily disable UIs for all node classes during this module's tests."""
+    classes = [DataSelector, DataGridder, DimensionReducer, XYSelector,
+               ScaleUnits, SubtractAverage, Histogrammer]
+    for cls in classes:
+        _ORIGINAL_UI_SETTINGS[cls] = (cls.useUi, cls.uiClass)
+        cls.useUi = False
+        cls.uiClass = None
+    yield
+    for cls in classes:
+        cls.useUi, cls.uiClass = _ORIGINAL_UI_SETTINGS[cls]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_griddable_dd(shape, ndeps=1):
+    """Create a DataDict from a meshgrid shape (flattened)."""
+    naxes = len(shape)
+    ax_names = [f'ax{i}' for i in range(naxes)]
+    axes_1d = [np.linspace(0, 1, s) for s in shape]
+    grids = np.meshgrid(*axes_1d, indexing='ij')
+
+    dd = DataDict()
+    for i, ax in enumerate(ax_names):
+        dd[ax] = dict(values=grids[i].ravel(), axes=[], unit='V', label=ax)
+    for j in range(ndeps):
+        dd[f'dep{j}'] = dict(values=np.random.randn(int(np.prod(shape))),
+                             axes=ax_names.copy(), unit='A', label=f'dep{j}')
+    dd.validate()
+    return dd
+
+
+def make_mesh(shape, ndeps=1):
+    """Create a MeshgridDataDict."""
+    naxes = len(shape)
+    ax_names = [f'ax{i}' for i in range(naxes)]
+    axes_1d = [np.linspace(0, 1, s) for s in shape]
+    grids = np.meshgrid(*axes_1d, indexing='ij')
+
+    dd = MeshgridDataDict()
+    for i, ax in enumerate(ax_names):
+        dd[ax] = dict(values=grids[i], axes=[], unit='V', label=ax)
+    for j in range(ndeps):
+        dd[f'dep{j}'] = dict(values=np.random.randn(*shape),
+                             axes=ax_names.copy(), unit='A', label=f'dep{j}')
+    dd.validate()
+    return dd
+
+
+def snapshot_values(dd):
+    return {k: v['values'].copy() for k, v in dd.data_items()}
+
+
+def assert_not_mutated(dd, snap):
+    for k, orig in snap.items():
+        assert num.arrays_equal(np.asarray(orig), np.asarray(dd.data_vals(k))), \
+            f"Field {k} was mutated"
+
+
+# ---------------------------------------------------------------------------
+# Hypothesis strategies for pure data operations
+# ---------------------------------------------------------------------------
+
+@st.composite
+def griddable_datadict_st(draw, min_axis_len=2, max_axis_len=12,
+                          min_axes=1, max_axes=3, min_deps=1, max_deps=3):
+    naxes = draw(st.integers(min_value=min_axes, max_value=max_axes))
+    ndeps = draw(st.integers(min_value=min_deps, max_value=max_deps))
+    shape = tuple(draw(st.integers(min_value=min_axis_len, max_value=max_axis_len))
+                  for _ in range(naxes))
+    return make_griddable_dd(shape, ndeps)
+
+
+@st.composite
+def meshgrid_st(draw, min_axis_len=2, max_axis_len=12,
+                min_axes=1, max_axes=3, min_deps=1, max_deps=3):
+    naxes = draw(st.integers(min_value=min_axes, max_value=max_axes))
+    ndeps = draw(st.integers(min_value=min_deps, max_value=max_deps))
+    shape = tuple(draw(st.integers(min_value=min_axis_len, max_value=max_axis_len))
+                  for _ in range(naxes))
+    return make_mesh(shape, ndeps)
+
+
+# ===========================================================================
+# PART A: HYPOTHESIS TESTS — pure DataDict operations (no Qt)
+# ===========================================================================
+
+class TestDataDictOperationsHypothesis:
+    """Property-based tests for DataDict operations that don't need a QApplication."""
+
+    @given(data=griddable_datadict_st(min_axes=1, max_axes=3, min_axis_len=3))
+    @settings(max_examples=50, deadline=10000)
+    def test_gridding_roundtrip_structure(self, data):
+        """Gridding should produce a MeshgridDataDict with matching structure."""
+        try:
+            mesh = datadict_to_meshgrid(data)
+        except (ValueError, Exception):
+            # Some shapes may not grid cleanly; that's expected for edge cases
+            return
+        assert isinstance(mesh, MeshgridDataDict)
+        assert set(mesh.axes()) == set(data.axes())
+        assert set(mesh.dependents()) == set(data.dependents())
+
+    @given(data=meshgrid_st(min_axes=1, max_axes=3))
+    @settings(max_examples=50, deadline=5000)
+    def test_flatten_roundtrip(self, data):
+        """Flatten to DataDict and back should preserve shapes."""
+        flat = meshgrid_to_datadict(data)
+        assert isinstance(flat, DataDict)
+        assert flat.nrecords() == int(np.prod(data.shape()))
+
+    @given(data=griddable_datadict_st(min_axes=1, max_axes=3))
+    @settings(max_examples=30, deadline=5000)
+    def test_copy_preserves_equality(self, data):
+        """copy() should produce an equal dataset."""
+        data2 = data.copy()
+        assert data == data2
+
+    @given(data=griddable_datadict_st(min_deps=2, max_deps=4))
+    @settings(max_examples=30, deadline=5000)
+    def test_extract_produces_subset(self, data):
+        """extract() should return only the requested deps and their axes."""
+        dep = data.dependents()[0]
+        ex = data.extract([dep])
+        assert ex.dependents() == [dep]
+        assert set(ex.axes()) == set(data.axes(dep))
+
+    @given(data=meshgrid_st(min_axes=2, max_axes=3))
+    @settings(max_examples=30, deadline=5000)
+    def test_meshgrid_copy_independent(self, data):
+        """Copied MeshgridDataDict must be independent."""
+        data2 = data.copy()
+        data2[data2.dependents()[0]]['values'].flat[0] = 999.0
+        assert data[data.dependents()[0]]['values'].flat[0] != 999.0
+
+    @given(data=meshgrid_st(min_axes=2, max_axes=3))
+    @settings(max_examples=20, deadline=5000)
+    def test_mask_invalid_clean_data(self, data):
+        """mask_invalid on clean data should not change values."""
+        data2 = data.copy()
+        data2 = data2.mask_invalid()
+        for dep in data.dependents():
+            assert np.allclose(
+                np.asarray(data.data_vals(dep)),
+                np.asarray(data2.data_vals(dep)),
+            )
+
+    @given(data=meshgrid_st(min_axes=2, max_axes=2))
+    @settings(max_examples=20, deadline=5000)
+    def test_mean_removes_axis(self, data):
+        """mean() should remove the averaged axis."""
+        ax = data.axes()[0]
+        result = data.mean(ax)
+        assert ax not in result.axes()
+
+    @given(data=meshgrid_st(min_axes=2, max_axes=2, min_axis_len=4))
+    @settings(max_examples=20, deadline=5000)
+    def test_slice_preserves_validity(self, data):
+        """Slicing should produce valid data."""
+        ax = data.axes()[0]
+        result = data.slice(**{ax: slice(1, 3)})
+        assert result.validate()
+
+
+# ===========================================================================
+# PART B: FLOWCHART-BASED NODE TESTS (need qtbot for QApplication)
+# ===========================================================================
+
+# --- Node base ---
+
+def test_node_passthrough(qtbot):
+    data = make_griddable_dd((5, 4))
+    fc = linearFlowchart(('n', Node))
+    fc.setInput(dataIn=data)
+    assert fc.outputValues()['dataOut'] is data
+
+
+# --- DataSelector ---
+
+class TestDataSelectorFC:
+
+    @pytest.mark.parametrize("shape,ndeps", [
+        ((10,), 2), ((5, 4), 2), ((3, 3, 2), 3),
+    ])
+    def test_select_single_dep(self, qtbot, shape, ndeps):
+        data = make_griddable_dd(shape, ndeps)
+        fc = linearFlowchart(('sel', DataSelector))
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = [data.dependents()[0]]
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert out.dependents() == [data.dependents()[0]]
+
+    @pytest.mark.parametrize("shape,ndeps", [
+        ((10,), 3), ((5, 4), 2),
+    ])
+    def test_select_multiple_deps(self, qtbot, shape, ndeps):
+        data = make_griddable_dd(shape, ndeps)
+        fc = linearFlowchart(('sel', DataSelector))
+        fc.setInput(dataIn=data)
+        deps = data.dependents()[:2]
+        fc.nodes()['sel'].selectedData = deps
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert set(out.dependents()) == set(deps)
+
+    def test_select_does_not_mutate(self, qtbot):
+        data = make_griddable_dd((10,), 2)
+        snap = snapshot_values(data)
+        fc = linearFlowchart(('sel', DataSelector))
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = [data.dependents()[0]]
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(data, snap)
+
+
+# --- DataGridder ---
+
+class TestDataGridderFC:
+
+    @pytest.mark.parametrize("shape", [
+        (5,), (5, 4), (10, 10), (3, 4, 2), (5, 5, 5),
+    ])
+    def test_guess_shape(self, qtbot, shape):
+        data = make_griddable_dd(shape)
+        fc = linearFlowchart(('grid', DataGridder))
+        fc.setInput(dataIn=data)
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert isinstance(out, MeshgridDataDict)
+        assert out.shape() == shape
+
+    @pytest.mark.parametrize("shape", [
+        (5, 4), (3, 3, 2),
+    ])
+    def test_specify_shape(self, qtbot, shape):
+        data = make_griddable_dd(shape)
+        ax_names = data.axes(data.dependents()[0])
+        fc = linearFlowchart(('grid', DataGridder))
+        fc.setInput(dataIn=data)
+        fc.nodes()['grid'].grid = GridOption.specifyShape, dict(
+            shape=shape, order=ax_names,
+        )
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert isinstance(out, MeshgridDataDict)
+        assert out.shape() == shape
+
+    def test_nogrid_passthrough(self, qtbot):
+        data = make_griddable_dd((5, 4))
+        fc = linearFlowchart(('grid', DataGridder))
+        fc.setInput(dataIn=data)
+        fc.nodes()['grid'].grid = GridOption.noGrid, {}
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert isinstance(out, DataDict)
+
+    def test_meshgrid_passthrough_guess(self, qtbot):
+        data = make_mesh((5, 4))
+        fc = linearFlowchart(('grid', DataGridder))
+        fc.setInput(dataIn=data)
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, MeshgridDataDict)
+
+    def test_meshgrid_to_flat_nogrid(self, qtbot):
+        data = make_mesh((5, 4))
+        fc = linearFlowchart(('grid', DataGridder))
+        fc.setInput(dataIn=data)
+        fc.nodes()['grid'].grid = GridOption.noGrid, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, DataDict)
+        assert out.nrecords() == 20
+
+    def test_gridder_does_not_mutate(self, qtbot):
+        data = make_griddable_dd((5, 4))
+        snap = snapshot_values(data)
+        fc = linearFlowchart(('grid', DataGridder))
+        fc.setInput(dataIn=data)
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(data, snap)
+
+
+# --- DimensionReducer ---
+
+class TestDimensionReducerFC:
+
+    @pytest.mark.parametrize("shape", [
+        (5, 4), (4, 3, 2),
+    ])
+    def test_element_selection(self, qtbot, shape):
+        data = make_mesh(shape)
+        fc = linearFlowchart(('red', DimensionReducer))
+        fc.setInput(dataIn=data)
+        last_ax = data.axes()[-1]
+        fc.nodes()['red'].reductions = {
+            last_ax: (ReductionMethod.elementSelection, [], {'index': 0})
+        }
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert last_ax not in out.axes()
+
+    @pytest.mark.parametrize("shape", [
+        (5, 4), (4, 3, 2),
+    ])
+    def test_average_reduction(self, qtbot, shape):
+        data = make_mesh(shape)
+        fc = linearFlowchart(('red', DimensionReducer))
+        fc.setInput(dataIn=data)
+        last_ax = data.axes()[-1]
+        fc.nodes()['red'].reductions = {last_ax: (ReductionMethod.average,)}
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert last_ax not in out.axes()
+
+    def test_reducer_does_not_mutate(self, qtbot):
+        data = make_mesh((5, 4))
+        snap = snapshot_values(data)
+        fc = linearFlowchart(('red', DimensionReducer))
+        fc.setInput(dataIn=data)
+        fc.nodes()['red'].reductions = {
+            'ax1': (ReductionMethod.elementSelection, [], {'index': 0})
+        }
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(data, snap)
+
+
+# --- XYSelector ---
+
+class TestXYSelectorFC:
+
+    @pytest.mark.parametrize("shape", [
+        (5, 4), (8, 6), (4, 3, 2), (5, 5, 5),
+    ])
+    def test_xy_produces_2d(self, qtbot, shape):
+        data = make_mesh(shape)
+        axes = data.axes()
+        fc = linearFlowchart(('xy', XYSelector))
+        fc.setInput(dataIn=data)
+        fc.nodes()['xy'].xyAxes = (axes[0], axes[1])
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        for dep in out.dependents():
+            assert out.data_vals(dep).ndim == 2
+
+    def test_xy_1d_x_only(self, qtbot):
+        data = make_mesh((10,))
+        fc = linearFlowchart(('xy', XYSelector))
+        fc.setInput(dataIn=data)
+        fc.nodes()['xy'].xyAxes = ('ax0', None)
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        for dep in out.dependents():
+            assert out.data_vals(dep).ndim == 1
+
+    def test_xy_no_axes_returns_none(self, qtbot):
+        data = make_mesh((5, 4))
+        fc = linearFlowchart(('xy', XYSelector))
+        fc.setInput(dataIn=data)
+        assert fc.outputValues()['dataOut'] is None
+
+    def test_xy_does_not_mutate(self, qtbot):
+        data = make_mesh((5, 4, 3))
+        snap = snapshot_values(data)
+        fc = linearFlowchart(('xy', XYSelector))
+        fc.setInput(dataIn=data)
+        fc.nodes()['xy'].xyAxes = ('ax0', 'ax1')
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(data, snap)
+
+
+# --- ScaleUnits ---
+
+class TestScaleUnitsFC:
+
+    @pytest.mark.parametrize("scale,prefix_substr", [
+        (1e-9, 'n'), (1e-6, '\u03bc'), (1e-3, 'm'), (1e6, 'M'), (1e9, 'G'),
+    ])
+    def test_si_prefix(self, qtbot, scale, prefix_substr):
+        dd = DataDict(
+            x=dict(values=np.arange(5, dtype=float) * scale, unit='V'),
+            y=dict(values=np.arange(5, dtype=float), axes=['x'], unit='A'),
+        )
+        dd.validate()
+        fc = linearFlowchart(('su', ScaleUnits))
+        fc.setInput(dataIn=dd)
+        out = fc.outputValues()['dataOut']
+        assert prefix_substr in out['x']['unit']
+
+    def test_does_not_mutate(self, qtbot):
+        dd = DataDict(
+            x=dict(values=np.arange(5, dtype=float) * 1e-9, unit='V'),
+            y=dict(values=np.arange(5, dtype=float), axes=['x'], unit='A'),
+        )
+        dd.validate()
+        snap = snapshot_values(dd)
+        fc = linearFlowchart(('su', ScaleUnits))
+        fc.setInput(dataIn=dd)
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(dd, snap)
+
+
+# --- SubtractAverage ---
+
+class TestSubtractAverageFC:
+
+    @pytest.mark.parametrize("shape", [
+        (10, 5), (5, 4, 3),
+    ])
+    def test_subtract_axis(self, qtbot, shape):
+        data = make_mesh(shape)
+        ax = data.axes()[-1]
+        fc = linearFlowchart(('sa', SubtractAverage))
+        fc.setInput(dataIn=data)
+        fc.nodes()['sa'].averagingAxis = ax
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        # After subtraction, mean along that axis should be ~0
+        ax_idx = data.axes().index(ax)
+        for dep in out.dependents():
+            avg = out.data_vals(dep).mean(axis=ax_idx)
+            assert np.allclose(avg, 0, atol=1e-10)
+
+    def test_no_axis_passthrough(self, qtbot):
+        data = make_mesh((5, 4))
+        fc = linearFlowchart(('sa', SubtractAverage))
+        fc.setInput(dataIn=data)
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+
+    def test_does_not_mutate(self, qtbot):
+        data = make_mesh((5, 4))
+        snap = snapshot_values(data)
+        fc = linearFlowchart(('sa', SubtractAverage))
+        fc.setInput(dataIn=data)
+        fc.nodes()['sa'].averagingAxis = 'ax1'
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(data, snap)
+
+
+# --- Histogrammer ---
+
+class TestHistogrammerFC:
+
+    @pytest.mark.parametrize("shape,hist_ax", [
+        ((20, 5), 'ax0'),
+        ((10, 8), 'ax1'),
+        ((5, 4, 3), 'ax0'),
+    ])
+    def test_histogram_produces_counts(self, qtbot, shape, hist_ax):
+        data = make_mesh(shape)
+        fc = linearFlowchart(('h', Histogrammer))
+        fc.setInput(dataIn=data)
+        fc.nodes()['h'].nbins = 10
+        fc.nodes()['h'].histogramAxis = hist_ax
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert any('count' in d for d in out.dependents())
+
+    def test_no_axis_passthrough(self, qtbot):
+        data = make_mesh((10, 5))
+        fc = linearFlowchart(('h', Histogrammer))
+        fc.setInput(dataIn=data)
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+
+
+# ===========================================================================
+# FULL PIPELINE INTEGRATION
+# ===========================================================================
+
+class TestFullPipelineFC:
+
+    @pytest.mark.parametrize("shape", [
+        (5, 4), (10, 10), (8, 6), (3, 4, 2),
+    ])
+    def test_selector_gridder_xy(self, qtbot, shape):
+        data = make_griddable_dd(shape, ndeps=2)
+        fc = linearFlowchart(
+            ('sel', DataSelector),
+            ('grid', DataGridder),
+            ('xy', XYSelector),
+        )
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = [data.dependents()[0]]
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        axes = data.axes(data.dependents()[0])
+        fc.nodes()['xy'].xyAxes = (axes[0], axes[1])
+
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert isinstance(out, MeshgridDataDict)
+
+    def test_full_pipeline_does_not_mutate(self, qtbot):
+        data = make_griddable_dd((8, 6))
+        snap = snapshot_values(data)
+        fc = linearFlowchart(
+            ('sel', DataSelector),
+            ('grid', DataGridder),
+            ('xy', XYSelector),
+        )
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = [data.dependents()[0]]
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        fc.nodes()['xy'].xyAxes = ('ax0', 'ax1')
+        _ = fc.outputValues()['dataOut']
+        assert_not_mutated(data, snap)
+
+    def test_full_with_scale_and_subtract(self, qtbot):
+        data = make_griddable_dd((6, 5))
+        # Give units to exercise ScaleUnits
+        data['ax0']['unit'] = 'V'
+        data['ax0']['values'] *= 1e-9
+        data['dep0']['unit'] = 'A'
+
+        fc = linearFlowchart(
+            ('sel', DataSelector),
+            ('grid', DataGridder),
+            ('xy', XYSelector),
+            ('sa', SubtractAverage),
+            ('su', ScaleUnits),
+        )
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = ['dep0']
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        fc.nodes()['xy'].xyAxes = ('ax0', 'ax1')
+        fc.nodes()['sa'].averagingAxis = 'ax1'
+
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+
+    @pytest.mark.parametrize("dtype", [
+        np.float64, np.float32, np.complex128,
+    ])
+    def test_pipeline_various_dtypes(self, qtbot, dtype):
+        shape = (5, 4)
+        data = make_griddable_dd(shape)
+        z = np.random.randn(20).astype(dtype)
+        if np.issubdtype(dtype, np.complexfloating):
+            z = z + 1j * np.random.randn(20).astype(dtype)
+        data['dep0']['values'] = z
+
+        fc = linearFlowchart(
+            ('sel', DataSelector),
+            ('grid', DataGridder),
+        )
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = ['dep0']
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert np.issubdtype(out.data_vals('dep0').dtype, dtype)
+
+    def test_pipeline_with_nan_data(self, qtbot):
+        """Pipeline with incomplete data (NaN values)."""
+        data = make_griddable_dd((6, 5))
+        # Inject NaN at end (simulating incomplete sweep)
+        data['dep0']['values'][-5:] = np.nan
+        data['ax0']['values'][-5:] = np.nan
+        data['ax1']['values'][-5:] = np.nan
+
+        fc = linearFlowchart(
+            ('sel', DataSelector),
+            ('grid', DataGridder),
+        )
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = ['dep0']
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        # Should handle NaN gracefully (either grid or fall back)
+        assert out is not None
+
+    def test_pipeline_with_multiple_deps(self, qtbot):
+        """Pipeline selecting multiple compatible dependents."""
+        data = make_griddable_dd((5, 4), ndeps=3)
+        fc = linearFlowchart(
+            ('sel', DataSelector),
+            ('grid', DataGridder),
+            ('xy', XYSelector),
+        )
+        fc.setInput(dataIn=data)
+        fc.nodes()['sel'].selectedData = data.dependents()[:2]
+        fc.nodes()['grid'].grid = GridOption.guessShape, {}
+        fc.nodes()['xy'].xyAxes = ('ax0', 'ax1')
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert len(out.dependents()) == 2

From e81f75255b0dec5361d058348b09aca36d3f2a46 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 16 Apr 2026 15:01:52 +0200
Subject: [PATCH 02/64] docs: add further optimization opportunities to
 performance plan

Comprehensive analysis of remaining performance improvements across:
- HDF5 loading (reads full dataset for shape metadata - critical fix)
- Node.process() redundant structure() call on every update
- Complex plot rendering deepcopy overhead
- Signal emission overhead (7 signals per node per update)
- largest_numtype() iterating every array element as Python objects
- Various numpy anti-patterns (np.append in loops, unnecessary copies)
- Architectural improvements (change detection, memoization)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 222 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 122fa9e0..1814146a 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -489,3 +489,225 @@ All optimizations implemented and tested. **173 tests pass** (0 failures).
 - `DataDictBase.copy(deep=True/False)` — `deep=False` shares array data (xarray convention)
 - `DataDictBase._build_structure()` — private helper that skips validation
 - `DataDictBase._copy_field()` — targeted field copy with per-key semantics
+
+---
+
+## Further Optimization Opportunities
+
+Additional performance improvements identified through comprehensive codebase analysis.
+Organized from highest to lowest impact.
+
+### Tier 1: Critical Quick Wins
+
+#### HDF5 Data Loading: Avoid Full-File Reads for Metadata
+
+**Files:** `plottr/data/datadict_storage.py`
+
+**Problem:** Two lines read the entire HDF5 dataset into memory just to get its shape:
+- Line 274: `lens = [len(grp[k][:]) for k in keys]` reads ALL data to get lengths
+- Line 305: `entry['__shape__'] = ds[:].shape` reads ALL data to get shape
+
+**Fix:**
+`python
+# Line 274: use HDF5 metadata (zero I/O)
+lens = [grp[k].shape[0] for k in keys]
+
+# Line 305: use HDF5 shape attribute
+entry['__shape__'] = ds.shape
+`
+
+**Impact:** 50-80% reduction in HDF5 load time for large files. Eliminates massive
+memory spikes when loading. This is a 1-line fix each.
+
+#### Node.process() Redundant structure() Call
+
+**File:** `plottr/node/node.py:282`
+
+**Problem:** `dstruct = dataIn.structure(add_shape=False)` is called on every
+pipeline update in every node. For MeshgridDataDict this means validate() + deepcopy
+of all field metadata. But the result is only stored for signal emission — the actual
+change detection at lines 293-308 uses axes/deps/type/shapes which are already computed
+at lines 279-281.
+
+**Fix:** Replace with a lazy approach — only compute structure when it's actually needed
+(i.e., when `_structChanged` is True):
+`python
+dstruct = None  # defer computation
+# ... change detection using axes/deps/type ...
+if _structChanged:
+    dstruct = dataIn.structure(add_shape=False)
+self.dataStructure = dstruct if dstruct is not None else self.dataStructure
+`
+
+**Impact:** Eliminates the single most expensive call in the pipeline hot path for
+steady-state operation (when structure doesn't change between updates). For 500K-element
+MeshgridDataDict: saves ~14ms per node per update.
+
+### Tier 2: High Impact
+
+#### Plot Complex Data: Replace deepcopy with Targeted Copy
+
+**File:** `plottr/plot/base.py:456, 488, 517`
+
+**Problem:** `_splitComplexData()` uses `deepcopy(re_plotItem)` to create Real/Imag or
+Mag/Phase split views. This deep-copies the entire PlotItem including array data references
+and all metadata. Called on every plot update for complex-valued data.
+
+**Fix:** PlotItem is a dataclass — use `dataclasses.replace()` or manual copy:
+`python
+from dataclasses import replace
+im_plotItem = replace(re_plotItem,
+    id=re_plotItem.id + 1,
+    data=list(re_plotItem.data),
+    labels=list(re_plotItem.labels) if re_plotItem.labels else None,
+)
+`
+
+**Impact:** 2-5x faster rendering for complex-valued plots.
+
+#### Signal Emission Overhead in Nodes
+
+**File:** `plottr/node/node.py:316-334`
+
+**Problem:** Up to 7 Qt signals are emitted per data update in each node. On first data
+arrival, ALL signals fire (lines 284-290). Each signal can trigger widget updates and
+downstream processing.
+
+**Opportunities:**
+- `dataFieldsChanged` (line 323) is redundant — it emits `daxes + ddeps` which
+  is just the union of `dataAxesChanged` and `dataDependentsChanged`
+- `newDataStructure` (line 330) carries structure+shapes+type, overlapping with
+  `dataStructureChanged` (line 329) + `dataShapesChanged` (line 334)
+
+**Fix (conservative):** Remove `dataFieldsChanged` and have listeners use
+`dataAxesChanged` + `dataDependentsChanged` instead. Connect `newDataStructure`
+only where both structure and shapes are needed together.
+
+**Fix (aggressive):** Coalesce all signals into a single `dataChanged(dict)` signal
+carrying change flags. Reduces signal/slot overhead from 7 to 1.
+
+#### largest_numtype() Flattens Entire Array
+
+**File:** `plottr/utils/num.py:28`
+
+**Problem:** `types = {type(a) for a in np.array(arr).flatten()}` iterates every
+element of the array as a Python object to collect types. For a 1M-element array,
+this creates 1M Python objects.
+
+**Fix:** Use numpy's dtype system directly:
+`python
+def largest_numtype(arr, include_integers=True):
+    arr = np.asarray(arr)
+    if np.issubdtype(arr.dtype, np.complexfloating):
+        return complex
+    if np.issubdtype(arr.dtype, np.floating):
+        return float
+    if include_integers and np.issubdtype(arr.dtype, np.integer):
+        return float  # promote to float for plotting
+    # Only fall back to element-scanning for object arrays
+    if arr.dtype == object:
+        types = {type(a) for a in arr.ravel() if a is not None}
+        # ... existing logic ...
+    return None
+`
+
+**Impact:** ~100x faster for numeric arrays (avoids Python-level iteration entirely).
+
+### Tier 3: Medium Impact
+
+#### is_invalid() Allocates Unnecessary Zero Array
+
+**File:** `plottr/utils/num.py:57-65`
+
+**Problem:** For non-float arrays, creates `np.zeros(a.shape, dtype=bool)` just to
+OR with the None check. The zeros contribute nothing.
+
+**Fix:**
+`python
+def is_invalid(a):
+    isnone = a == None
+    if a.dtype in FLOATTYPES:
+        return isnone | np.isnan(a)
+    return isnone  # skip zeros allocation
+`
+
+#### guess_grid_from_sweep_direction(): Repeated np.array() Calls
+
+**File:** `plottr/utils/num.py:236-242`
+
+**Problem:** `np.array(vals)` called 4 times on the same data inside a loop.
+
+**Fix:** Convert once at the top of the loop: `vals_arr = np.asarray(vals)`
+
+#### remove_invalid_entries(): O(n^2) np.append Pattern
+
+**File:** `plottr/data/datadict.py:1068-1086`
+
+**Problem:** Uses `np.append(_idxs, _newidxs)` repeatedly which copies the entire
+array each time.
+
+**Fix:** Collect indices in a Python list, concatenate once:
+`python
+_idxs_list = []
+# ... append to list ...
+_idxs = np.concatenate(_idxs_list) if _idxs_list else np.array([])
+`
+
+#### datadict_to_dataframe(): flatten() Instead of ravel()
+
+**File:** `plottr/data/datadict.py:1738, 1745`
+
+**Problem:** `.flatten()` always copies; `.ravel()` returns a view when possible.
+
+**Fix:** Use `.ravel()` since the result is consumed immediately by pandas.
+
+### Tier 4: Architectural Improvements (Larger Effort)
+
+#### Data Change Detection in Pipeline
+
+**Problem:** The pipeline has no concept of "what changed." Every update re-processes
+the entire data through every node. For live monitoring where data is appended
+incrementally, this means re-gridding, re-reducing, and re-plotting everything.
+
+**Opportunity:** Add lightweight change detection:
+- Track data version/hash at the DataDict level
+- Nodes check if their input actually changed before processing
+- For append-only updates, nodes could process only new data
+
+#### Fitter Node: No Memoization
+
+**File:** `plottr/node/fitter.py:624-650`
+
+**Problem:** The fitting algorithm runs on every `process()` call even if the data
+and fit parameters haven't changed. For complex models this can take 100ms-1s.
+
+**Fix:** Cache fit results keyed on (data hash, model, parameters).
+
+#### ScaleUnits: Redundant Per-Update Computation
+
+**File:** `plottr/node/scaleunits.py:129-135`
+
+**Problem:** `find_scale_and_prefix()` scans the full array (`np.nanmax(np.abs(data))`)
+for every field on every update.
+
+**Fix:** Cache the scale prefix and only recompute when the data range changes
+significantly (e.g., order of magnitude difference).
+
+#### Histogrammer: No Result Caching
+
+**File:** `plottr/node/histogram.py:132-217`
+
+**Problem:** Histogram recomputed on every update even when data, nbins, and axis
+haven't changed.
+
+**Fix:** Cache histogram results, invalidate only when inputs change.
+
+### Tier 5: xarray Consideration
+
+**Finding:** Plottr does NOT use xarray at all despite listing it as a dependency.
+xarray could theoretically provide lazy loading from HDF5, chunked computation, and
+better memory management. However, replacing DataDict with xarray would be a major
+refactoring effort and is not recommended unless a larger redesign is planned.
+
+The `xarray` dependency appears to be pulled in transitively or for potential future
+use. It could be made optional to reduce install footprint.

From 3f0d4efcfa8213971764c4176e0c0c5fc47ce5c9 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 16 Apr 2026 15:31:59 +0200
Subject: [PATCH 03/64] =?UTF-8?q?perf:=20round=202=20optimizations=20?=
 =?UTF-8?q?=E2=80=94=20node=20overhead,=20numtype,=20invalid=20entries?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round 2 performance improvements:

- largest_numtype(): use numpy dtype instead of iterating every element
  as a Python object (~15,000x faster for numeric arrays)
- Node.process(): defer structure() call to only when structure changes
  (50x faster steady-state updates for large meshgrids)
- is_invalid(): skip unnecessary np.zeros allocation for non-float arrays
- guess_grid_from_sweep_direction(): convert once with np.asarray, not 4x
- remove_invalid_entries(): replace O(n^2) np.append with list+concatenate
  Also fixes crash on inhomogeneous index arrays (pre-existing bug)
- meshgrid_to_datadict/datadict_to_dataframe: ravel() instead of flatten()
- _splitComplexData(): dataclasses.replace instead of deepcopy

Adds 32 new tests (205 total passing).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md                      |  30 ++
 plottr/data/datadict.py                  |  18 +-
 plottr/node/node.py                      |   6 +-
 plottr/plot/base.py                      |  14 +-
 plottr/utils/num.py                      |  36 ++-
 test/pytest/test_round2_optimizations.py | 351 +++++++++++++++++++++++
 6 files changed, 432 insertions(+), 23 deletions(-)
 create mode 100644 test/pytest/test_round2_optimizations.py

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 1814146a..1b3d1ae3 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -711,3 +711,33 @@ refactoring effort and is not recommended unless a larger redesign is planned.
 
 The `xarray` dependency appears to be pulled in transitively or for potential future
 use. It could be made optional to reduce install footprint.
+
+### Round 2 Execution Results
+
+All round 2 optimizations implemented and tested. **205 tests pass** (0 failures).
+
+#### Changes Made (Round 2)
+
+| File | Changes |
+|---|---|
+| `plottr/utils/num.py` | Rewrote `largest_numtype()` to use dtype (avoids element iteration); `is_invalid()` skips zero alloc for non-floats; `guess_grid_from_sweep_direction()` converts once with `np.asarray` |
+| `plottr/data/datadict.py` | Fixed O(n^2) `np.append` in `remove_invalid_entries()`; `meshgrid_to_datadict()` uses `ravel()`; `datadict_to_dataframe()` uses `ravel()` |
+| `plottr/node/node.py` | Deferred `structure()` call to only when structure changes |
+| `plottr/plot/base.py` | Replaced `deepcopy` with `dataclasses.replace` in `_splitComplexData()` |
+| `test/pytest/test_round2_optimizations.py` | 32 new tests |
+
+#### Benchmark (Round 2)
+
+| Benchmark | Before | After | Speedup |
+|---|---|---|---|
+| **largest_numtype (float 500k)** | 29.8 ms | 0.002 ms | **~15,000x** |
+| **largest_numtype (complex 500k)** | 31.9 ms | 0.001 ms | **~32,000x** |
+| **node_process (500k mesh)** | 7.42 ms | 0.15 ms | **50x** |
+| **to_dataframe (100k)** | 0.95 ms | 0.63 ms | **1.5x** |
+| **remove_invalid (10k)** | 0.073 ms | 0.050 ms | **1.5x** |
+| **is_invalid (int 500k)** | 16.5 ms | 15.0 ms | **1.1x** |
+
+#### Bugs Fixed (Round 2)
+
+- `remove_invalid_entries()` crashed with `ValueError` when dependents had different numbers of invalid entries (inhomogeneous `np.array(idxs)`). Fixed by using `np.concatenate`.
+- `largest_numtype()` on empty arrays previously returned `None` in all cases; behavior preserved via explicit empty check.
diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index 9035a702..e0978512 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -1067,14 +1067,15 @@ def remove_invalid_entries(self) -> 'DataDict':
                 datavals = self.data_vals(d)
                 rows = datavals.reshape(-1, int(np.prod(ishp[d])))
 
-            _idxs: np.ndarray = np.array([])
+            _idxs_parts: list = []
 
             # get indices of all rows that are fully None
             if len(ishp[d]) == 0:
                 _newidxs = np.atleast_1d(np.asarray(rows is None)).nonzero()[0]
             else:
                 _newidxs = np.atleast_1d(np.asarray(np.all(rows is None, axis=-1))).nonzero()[0]
-            _idxs = np.append(_idxs, _newidxs)
+            if _newidxs.size > 0:
+                _idxs_parts.append(_newidxs)
 
             # get indices for all rows that are fully NaN. works only
             # for some dtypes, so except TypeErrors.
@@ -1083,15 +1084,16 @@ def remove_invalid_entries(self) -> 'DataDict':
                     _newidxs = np.where(np.isnan(rows))[0]
                 else:
                     _newidxs = np.where(np.all(np.isnan(rows), axis=-1))[0]
-                _idxs = np.append(_idxs, _newidxs)
+                if _newidxs.size > 0:
+                    _idxs_parts.append(_newidxs)
             except TypeError:
                 pass
 
+            _idxs = np.concatenate(_idxs_parts) if _idxs_parts else np.array([], dtype=int)
             idxs.append(_idxs)
 
         if len(idxs) > 0:
-            remove_idxs = reduce(np.intersect1d,
-                                 tuple(np.array(idxs).astype(int)))
+            remove_idxs = reduce(np.intersect1d, idxs)
             for k, v in self.data_items():
                 v['values'] = np.delete(v['values'], remove_idxs, axis=0)
 
@@ -1421,7 +1423,7 @@ def meshgrid_to_datadict(data: MeshgridDataDict) -> DataDict:
     """
     newdata = DataDict(**data._build_structure())
     for k, v in data.data_items():
-        val = v['values'].copy().reshape(-1)
+        val = v['values'].ravel().copy()
         newdata[k]['values'] = val
 
     newdata = newdata.sanitize()
@@ -1735,14 +1737,14 @@ def datadict_to_dataframe(data: DataDict) -> pd.DataFrame:
     # if the dimension of all variables are the same, directly flat the array
     if dimension_check:
         for key, value in data.data_items():
-            data_set[key] = (data.data_vals(key)).flatten()
+            data_set[key] = (data.data_vals(key)).ravel()
 
     # if the dimension is different between variables, match their dimension to the highest one
     else:
         for key, value in data.data_items():
             repeated_time = int(max_ele/np.size(data.data_vals(key)))
             value_array = np.repeat(data.data_vals(key), repeated_time)
-            data_set[key] = value_array.flatten('F')
+            data_set[key] = value_array.ravel(order='F')
 
     # convert organized data to DataFrame and return it
     return pd.DataFrame(data=data_set)
diff --git a/plottr/node/node.py b/plottr/node/node.py
index 99d1d510..52c8ec6c 100644
--- a/plottr/node/node.py
+++ b/plottr/node/node.py
@@ -279,7 +279,6 @@ def process(self, dataIn: Optional[DataDictBase]=None) -> Optional[Dict[str, Opt
         daxes = dataIn.axes()
         ddeps = dataIn.dependents()
         dshapes = dataIn.shapes()
-        dstruct = dataIn.structure(add_shape=False)
 
         if None in [self.dataAxes, self.dataDependents, self.dataType, self.dataShapes]:
             _axesChanged = True
@@ -311,7 +310,10 @@ def process(self, dataIn: Optional[DataDictBase]=None) -> Optional[Dict[str, Opt
         self.dataDependents = ddeps
         self.dataType = dtype
         self.dataShapes = dshapes
-        self.dataStructure = dstruct
+
+        # Only compute structure snapshot when it actually changed
+        if _structChanged:
+            self.dataStructure = dataIn._build_structure()
 
         if _axesChanged:
             self.dataAxesChanged.emit(daxes)
diff --git a/plottr/plot/base.py b/plottr/plot/base.py
index 8b7217d8..9c2ec07a 100644
--- a/plottr/plot/base.py
+++ b/plottr/plot/base.py
@@ -5,7 +5,7 @@
 
 from collections import OrderedDict
 from copy import deepcopy
-from dataclasses import dataclass
+from dataclasses import dataclass, replace as dc_replace
 from enum import Enum, unique, auto
 from types import TracebackType
 from typing import Dict, List, Type, Tuple, Optional, Any, \
@@ -453,7 +453,9 @@ def _splitComplexData(self, plotItem: PlotItem) -> List[PlotItem]:
                 re_label, im_label = label + ' (Real)', label + ' (Imag)'
 
             re_plotItem = plotItem
-            im_plotItem = deepcopy(re_plotItem)
+            im_plotItem = dc_replace(re_plotItem,
+                                     data=list(re_plotItem.data),
+                                     labels=list(re_plotItem.labels) if re_plotItem.labels else None)
 
             re_plotItem.data[-1] = re_data
             im_plotItem.data[-1] = im_data
@@ -485,7 +487,9 @@ def _splitComplexData(self, plotItem: PlotItem) -> List[PlotItem]:
                 mag_label, phase_label = label + ' 20*log10(Mag)', label + ' (Phase)'
 
             mag_plotItem = plotItem
-            phase_plotItem = deepcopy(mag_plotItem)
+            phase_plotItem = dc_replace(mag_plotItem,
+                                        data=list(mag_plotItem.data),
+                                        labels=list(mag_plotItem.labels) if mag_plotItem.labels else None)
 
             mag_plotItem.data[-1] = mag_data
             phase_plotItem.data[-1] = phase_data
@@ -514,7 +518,9 @@ def _splitComplexData(self, plotItem: PlotItem) -> List[PlotItem]:
                 mag_label, phase_label = label + ' (Mag)', label + ' (Phase)'
 
             mag_plotItem = plotItem
-            phase_plotItem = deepcopy(mag_plotItem)
+            phase_plotItem = dc_replace(mag_plotItem,
+                                        data=list(mag_plotItem.data),
+                                        labels=list(mag_plotItem.labels) if mag_plotItem.labels else None)
 
             mag_plotItem.data[-1] = mag_data
             phase_plotItem.data[-1] = phase_data
diff --git a/plottr/utils/num.py b/plottr/utils/num.py
index 3d841c49..8199982c 100644
--- a/plottr/utils/num.py
+++ b/plottr/utils/num.py
@@ -25,7 +25,26 @@ def largest_numtype(arr: np.ndarray, include_integers: bool = True) \
                              only integers in the the data.
     :return: type if possible. None if no numeric data in array.
     """
-    types = {type(a) for a in np.array(arr).flatten()}
+    arr = np.asarray(arr)
+
+    # Fast path: use numpy's dtype for homogeneous numeric arrays
+    if arr.size == 0:
+        return None
+    if arr.dtype != object:
+        if np.issubdtype(arr.dtype, np.complexfloating):
+            return arr.dtype.type
+        elif np.issubdtype(arr.dtype, np.floating):
+            return arr.dtype.type
+        elif np.issubdtype(arr.dtype, np.integer):
+            if include_integers:
+                return arr.dtype.type
+            else:
+                return float
+        else:
+            return None
+
+    # Slow path for object arrays: inspect element types
+    types = {type(a) for a in arr.ravel() if a is not None}
     curidx = -1
     if include_integers:
         ok_types = NUMTYPES
@@ -59,10 +78,8 @@ def is_invalid(a: np.ndarray) -> np.ndarray:
     # check for None
     isnone = a == None
     if a.dtype in FLOATTYPES:
-        isnan = np.isnan(a)
-    else:
-        isnan = np.zeros(a.shape, dtype=bool)
-    return isnone | isnan
+        return isnone | np.isnan(a)
+    return isnone
 
 
 def _are_invalid(a: np.ndarray, b: np.ndarray) -> np.ndarray:
@@ -233,13 +250,14 @@ def guess_grid_from_sweep_direction(**axes: np.ndarray) \
         raise ValueError("Empty input.")
 
     for name, vals in axes.items():
-        if len(np.array(vals).shape) > 1:
+        vals_arr = np.asarray(vals)
+        if vals_arr.ndim > 1:
             raise ValueError(
-                f"Expect 1-dimensional axis data, not {np.array(vals).shape}")
+                f"Expect 1-dimensional axis data, not {vals_arr.shape}")
         if size is None:
-            size = np.array(vals).size
+            size = vals_arr.size
         else:
-            if size != np.array(vals).size:
+            if size != vals_arr.size:
                 raise ValueError("Non-matching array sizes.")
 
         # first step: find repeating patterns in the data.
diff --git a/test/pytest/test_round2_optimizations.py b/test/pytest/test_round2_optimizations.py
new file mode 100644
index 00000000..42a94e18
--- /dev/null
+++ b/test/pytest/test_round2_optimizations.py
@@ -0,0 +1,351 @@
+"""
+test_round2_optimizations.py
+
+Tests for round 2 performance optimizations: is_invalid, largest_numtype,
+guess_grid, remove_invalid_entries, Node.process structure deferral,
+complex plot splitting, flatten->ravel.
+"""
+import numpy as np
+import pytest
+from copy import deepcopy
+
+from plottr.data.datadict import (
+    DataDict, MeshgridDataDict, meshgrid_to_datadict, datadict_to_dataframe,
+)
+from plottr.utils.num import is_invalid, largest_numtype, guess_grid_from_sweep_direction
+
+
+# ===========================================================================
+# is_invalid()
+# ===========================================================================
+
+class TestIsInvalid:
+    def test_float_with_nan(self):
+        arr = np.array([1.0, np.nan, 3.0])
+        result = is_invalid(arr)
+        assert result.tolist() == [False, True, False]
+
+    def test_float_clean(self):
+        arr = np.array([1.0, 2.0, 3.0])
+        result = is_invalid(arr)
+        assert not np.any(result)
+
+    def test_int_array(self):
+        arr = np.arange(10)
+        result = is_invalid(arr)
+        assert not np.any(result)
+
+    def test_object_with_none(self):
+        arr = np.array([1.0, None, 3.0], dtype=object)
+        result = is_invalid(arr)
+        assert result[1] == True
+        assert result[0] == False
+
+    def test_complex_with_nan(self):
+        arr = np.array([1+1j, np.nan+0j, 3+0j])
+        result = is_invalid(arr)
+        assert result[1] == True
+
+    def test_empty_array(self):
+        arr = np.array([], dtype=float)
+        result = is_invalid(arr)
+        assert result.shape == (0,)
+
+    def test_bool_array(self):
+        arr = np.array([True, False, True])
+        result = is_invalid(arr)
+        assert not np.any(result)
+
+    def test_2d_float(self):
+        arr = np.array([[1.0, np.nan], [3.0, 4.0]])
+        result = is_invalid(arr)
+        assert result.shape == (2, 2)
+        assert result[0, 1] == True
+        assert result[1, 0] == False
+
+
+# ===========================================================================
+# largest_numtype()
+# ===========================================================================
+
+class TestLargestNumtype:
+    def test_float_array(self):
+        arr = np.array([1.0, 2.0, 3.0])
+        result = largest_numtype(arr)
+        assert issubclass(result, (float, np.floating))
+
+    def test_int_array_include_integers(self):
+        arr = np.arange(10)
+        result = largest_numtype(arr, include_integers=True)
+        # Should return float (promotion) or int
+        assert result in (float, int, np.int64, np.int32, np.float64)
+
+    def test_int_array_exclude_integers(self):
+        arr = np.arange(10)
+        result = largest_numtype(arr, include_integers=False)
+        # With include_integers=False, int arrays are promoted to float
+        assert result == float
+
+    def test_complex_array(self):
+        arr = np.array([1+1j, 2+2j])
+        result = largest_numtype(arr)
+        assert issubclass(result, (complex, np.complexfloating))
+
+    def test_object_array_with_floats(self):
+        arr = np.array([1.0, 2.0, 3.0], dtype=object)
+        result = largest_numtype(arr)
+        assert result == float
+
+    def test_string_array(self):
+        arr = np.array(['a', 'b', 'c'])
+        result = largest_numtype(arr)
+        assert result is None
+
+    def test_object_with_none_and_floats(self):
+        arr = np.array([1.0, None, 3.0], dtype=object)
+        result = largest_numtype(arr)
+        # Should find float as the largest type
+        assert result == float
+
+    def test_empty_array(self):
+        arr = np.array([])
+        result = largest_numtype(arr)
+        # Empty array has no elements to inspect
+        assert result is None
+
+
+# ===========================================================================
+# guess_grid_from_sweep_direction()
+# ===========================================================================
+
+class TestGuessGrid:
+    def test_simple_2d_grid(self):
+        x = np.repeat(np.arange(5, dtype=float), 4)
+        y = np.tile(np.arange(4, dtype=float), 5)
+        result = guess_grid_from_sweep_direction(x=x, y=y)
+        assert result is not None
+        order, shape = result
+        assert set(order) == {'x', 'y'}
+        assert 5 in shape and 4 in shape
+
+    def test_1d_sweep(self):
+        x = np.arange(10, dtype=float)
+        result = guess_grid_from_sweep_direction(x=x)
+        assert result is not None
+        _, shape = result
+        assert shape == (10,)
+
+    def test_single_point(self):
+        x = np.array([1.0])
+        result = guess_grid_from_sweep_direction(x=x)
+        assert result is not None
+
+    def test_with_noise(self):
+        x = np.repeat(np.linspace(0, 1, 10), 8) + np.random.randn(80) * 1e-6
+        y = np.tile(np.linspace(0, 1, 8), 10)
+        result = guess_grid_from_sweep_direction(x=x, y=y)
+        assert result is not None
+        _, shape = result
+        assert 10 in shape and 8 in shape
+
+
+# ===========================================================================
+# remove_invalid_entries()
+# ===========================================================================
+
+class TestRemoveInvalidEntries:
+    def test_removes_nan_rows(self):
+        dd = DataDict(
+            x=dict(values=np.arange(5, dtype=float)),
+            y=dict(values=np.array([1.0, np.nan, 3.0, np.nan, 5.0]), axes=['x']),
+        )
+        dd.validate()
+        dd2 = dd.remove_invalid_entries()
+        assert dd2.nrecords() == 3
+        assert np.allclose(dd2.data_vals('y'), [1.0, 3.0, 5.0])
+
+    def test_preserves_clean_data(self):
+        dd = DataDict(
+            x=dict(values=np.arange(10, dtype=float)),
+            y=dict(values=np.arange(10, dtype=float), axes=['x']),
+        )
+        dd.validate()
+        dd2 = dd.remove_invalid_entries()
+        assert dd2.nrecords() == 10
+
+    def test_removes_none_in_object_array(self):
+        dd = DataDict(
+            x=dict(values=np.array([1, 2, 3], dtype=object)),
+            y=dict(values=np.array([1.0, None, 3.0], dtype=object), axes=['x']),
+        )
+        dd.validate()
+        dd2 = dd.remove_invalid_entries()
+        # Only row where ALL dependents are invalid gets removed
+        # Row 1 has None in y -> removed only if x is also invalid
+        # Actually remove_invalid_entries removes rows where ALL deps are invalid
+        assert dd2.nrecords() <= 3
+
+    def test_multiple_dependents(self):
+        """remove_invalid_entries removes rows where ALL dependents are invalid.
+
+        Note: this previously crashed with np.array(idxs) on inhomogeneous
+        arrays. Fixed by using np.concatenate instead of np.append.
+        """
+        dd = DataDict(
+            x=dict(values=np.arange(5, dtype=float)),
+            y=dict(values=np.array([1.0, np.nan, 3.0, np.nan, 5.0]), axes=['x']),
+            z=dict(values=np.array([np.nan, 2.0, np.nan, np.nan, 5.0]), axes=['x']),
+        )
+        dd.validate()
+        dd2 = dd.remove_invalid_entries()
+        assert dd2.nrecords() == 4
+
+
+# ===========================================================================
+# meshgrid_to_datadict (flatten->ravel)
+# ===========================================================================
+
+class TestMeshgridToDatadict:
+    def test_basic_conversion(self):
+        x = np.linspace(0, 1, 5)
+        y = np.arange(3, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        mesh = MeshgridDataDict(
+            x=dict(values=xx), y=dict(values=yy),
+            z=dict(values=xx*yy, axes=['x', 'y']),
+        )
+        mesh.validate()
+        dd = meshgrid_to_datadict(mesh)
+        assert isinstance(dd, DataDict)
+        assert dd.nrecords() == 15
+
+    def test_values_match(self):
+        x = np.linspace(0, 1, 4)
+        y = np.arange(3, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        zz = xx + yy
+        mesh = MeshgridDataDict(
+            x=dict(values=xx), y=dict(values=yy),
+            z=dict(values=zz, axes=['x', 'y']),
+        )
+        mesh.validate()
+        dd = meshgrid_to_datadict(mesh)
+        assert np.allclose(dd.data_vals('z'), zz.ravel())
+
+    def test_3d_conversion(self):
+        shape = (3, 4, 2)
+        grids = np.meshgrid(*[np.linspace(0, 1, s) for s in shape], indexing='ij')
+        mesh = MeshgridDataDict(
+            a=dict(values=grids[0]), b=dict(values=grids[1]), c=dict(values=grids[2]),
+            z=dict(values=np.random.randn(*shape), axes=['a', 'b', 'c']),
+        )
+        mesh.validate()
+        dd = meshgrid_to_datadict(mesh)
+        assert dd.nrecords() == 24
+
+
+# ===========================================================================
+# datadict_to_dataframe
+# ===========================================================================
+
+class TestDatadictToDataframe:
+    def test_basic(self):
+        dd = DataDict(
+            x=dict(values=np.arange(5, dtype=float)),
+            y=dict(values=np.arange(5, dtype=float) * 2, axes=['x']),
+        )
+        dd.validate()
+        df = datadict_to_dataframe(dd)
+        assert len(df) == 5
+        assert list(df.columns) == ['x', 'y']
+
+
+# ===========================================================================
+# Node.process() structure deferral
+# ===========================================================================
+
+class TestNodeProcessStructure:
+    def test_node_process_returns_data(self, qtbot):
+        from plottr.node.node import Node
+        from plottr.node.tools import linearFlowchart
+        Node.useUi = False; Node.uiClass = None
+
+        mesh = MeshgridDataDict()
+        x = np.linspace(0, 1, 5)
+        y = np.arange(3, dtype=float)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        mesh['x'] = dict(values=xx, axes=[])
+        mesh['y'] = dict(values=yy, axes=[])
+        mesh['z'] = dict(values=xx + yy, axes=['x', 'y'])
+        mesh.validate()
+
+        fc = linearFlowchart(('n', Node))
+        fc.setInput(dataIn=mesh)
+        out = fc.outputValues()['dataOut']
+        assert out is mesh
+
+    def test_node_detects_structure_change(self, qtbot):
+        from plottr.node.node import Node
+        from plottr.node.tools import linearFlowchart
+        Node.useUi = False; Node.uiClass = None
+
+        dd1 = DataDict(
+            x=dict(values=np.arange(5, dtype=float)),
+            y=dict(values=np.arange(5, dtype=float), axes=['x']),
+        )
+        dd1.validate()
+
+        dd2 = DataDict(
+            x=dict(values=np.arange(5, dtype=float)),
+            y=dict(values=np.arange(5, dtype=float), axes=['x']),
+            z=dict(values=np.arange(5, dtype=float), axes=['x']),
+        )
+        dd2.validate()
+
+        fc = linearFlowchart(('n', Node))
+        fc.setInput(dataIn=dd1)
+        node = fc.nodes()['n']
+        assert node.dataDependents == ['y']
+
+        fc.setInput(dataIn=dd2)
+        assert node.dataDependents == ['y', 'z']
+
+
+# ===========================================================================
+# Complex plot deepcopy
+# ===========================================================================
+
+class TestComplexPlotSplit:
+    def test_split_produces_correct_items(self):
+        from plottr.plot.base import AutoFigureMaker, PlotDataType, PlotItem, ComplexRepresentation
+
+        class DummyFM(AutoFigureMaker):
+            def makeSubPlots(self, n): return [None]*n
+            def plot(self, item): return None
+            def formatSubPlot(self, id): pass
+
+        fm = DummyFM()
+        fm.complexRepresentation = ComplexRepresentation.realAndImag
+        data = np.array([1+2j, 3+4j, 5+6j])
+        pi = PlotItem(data=[np.arange(3, dtype=float), data],
+                      id=0, subPlot=0, labels=['x', 'z'])
+        result = fm._splitComplexData(pi)
+        assert len(result) == 2
+        assert np.allclose(result[0].data[-1], data.real)
+        assert np.allclose(result[1].data[-1], data.imag)
+
+    def test_split_real_data_unchanged(self):
+        from plottr.plot.base import AutoFigureMaker, PlotDataType, PlotItem
+
+        class DummyFM(AutoFigureMaker):
+            def makeSubPlots(self, n): return [None]*n
+            def plot(self, item): return None
+            def formatSubPlot(self, id): pass
+
+        fm = DummyFM()
+        data = np.array([1.0, 2.0, 3.0])
+        pi = PlotItem(data=[np.arange(3, dtype=float), data],
+                      id=0, subPlot=0, labels=['x', 'z'])
+        result = fm._splitComplexData(pi)
+        assert len(result) == 1
+        assert np.allclose(result[0].data[-1], data)

From ab485ee93f61ee9b009a63ae5418c236c05dc98a Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 16 Apr 2026 15:56:33 +0200
Subject: [PATCH 04/64] fix: resolve mypy type errors in validate()
 monotonicity check

Rename loop variable 'd' to 'diffs' to avoid shadowing the outer loop
variable from 'for d in self.dependents()'. Add explicit type annotations
for ndarray variables to satisfy mypy's type narrowing.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/datadict.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index e0978512..6aeb12dd 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -1178,17 +1178,17 @@ def validate(self) -> bool:
 
                         try:
                             if axis_data.shape[axis_num] > 1:
-                                d = np.diff(axis_data, axis=axis_num)
+                                diffs: np.ndarray = np.diff(axis_data, axis=axis_num)
 
                                 # for incomplete data, there may be nan steps -- we need to
                                 # ignore those, doesn't mean anything is wrong.
-                                if np.issubdtype(d.dtype, np.floating):
-                                    nan_mask = np.isnan(d)
+                                if np.issubdtype(diffs.dtype, np.floating):
+                                    nan_mask = np.isnan(diffs)
                                     if np.all(nan_mask):
                                         continue  # all NaN, can't check
-                                    valid = d[~nan_mask]
+                                    valid: np.ndarray = diffs[~nan_mask]
                                 else:
-                                    valid = d.ravel()
+                                    valid = diffs.ravel()
 
                                 if valid.size > 0:
                                     if np.any(valid == 0):

From dbff1c02c99a2b772fcce3b821c554cde5a895f0 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 16 Apr 2026 20:06:36 +0200
Subject: [PATCH 05/64] docs: add real dataset benchmark results (23 QCodes
 datasets)

Benchmarked the full plottr pipeline (load -> DataSelector -> DataGridder
-> XYSelector) on 23 QCodes datasets of varying shapes and sizes.

Pipeline total: 1478 ms -> 1025 ms = 1.44x overall speedup.
Largest gains on big datasets: stability_diagram 1.81x, large_3d_scan 1.65x.
No regressions on any dataset.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 1b3d1ae3..2de15f80 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -741,3 +741,31 @@ All round 2 optimizations implemented and tested. **205 tests pass** (0 failures
 
 - `remove_invalid_entries()` crashed with `ValueError` when dependents had different numbers of invalid entries (inhomogeneous `np.array(idxs)`). Fixed by using `np.concatenate`.
 - `largest_numtype()` on empty arrays previously returned `None` in all cases; behavior preserved via explicit empty check.
+
+### Real Dataset Benchmark (23 QCodes Datasets, Before vs After)
+
+Full pipeline benchmark: Load from QCodes DB -> DataSelector -> DataGridder -> XYSelector.
+Measured on 23 real-world-shaped datasets (1D-3D, with/without shape metadata, complete/interrupted).
+
+**Pipeline total: 1478 ms (before) -> 1025 ms (after) = 1.44x overall speedup**
+
+| Dataset | Points | Pipeline Before | Pipeline After | Speedup |
+|---|---|---|---|---|
+| stability_diagram (500x400) | 200,000 | 199 ms | 110 ms | **1.81x** |
+| large_3d_scan (100x80x50) | 800,000 | 549 ms | 333 ms | **1.65x** |
+| field_spectroscopy (50x2000) | 100,000 | 96 ms | 64 ms | **1.50x** |
+| time_trace (100k) | 100,000 | 64 ms | 44 ms | **1.46x** |
+| spatial_map (50x40x30) | 60,000 | 100 ms | 70 ms | **1.42x** |
+| 3d_cal_noshape (8x6x5) | 240 | 29 ms | 23 ms | **1.30x** |
+| gate_sweep (100x80) | 8,000 | 31 ms | 25 ms | **1.25x** |
+| interrupted_sweep | 500 | 26 ms | 22 ms | **1.21x** |
+| t1_measurement (1D, no shape) | 1,500 | 24 ms | 20 ms | **1.20x** |
+| charge_stability_interrupted | 630 | 26 ms | 21 ms | **1.20x** |
+| two_tone_spectroscopy (20x30) | 600 | 25 ms | 22 ms | **1.16x** |
+| (remaining 12 datasets) | | | | 1.00-1.17x |
+
+Key observations:
+- Larger datasets benefit most (1.4-1.8x for 60k+ points)
+- Even small datasets see 1.1-1.3x improvement (reduced per-node overhead)
+- No regressions observed on any dataset
+- All 23 datasets produce the same output types before and after

From 1edb53bb2538429fb5f97f2bbe77836d59ef0bcb Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 16 Apr 2026 20:39:10 +0200
Subject: [PATCH 06/64] docs: add large dataset benchmark (array paramtype,
 15-61 MB each)

Pipeline total: 6,550 ms -> 3,465 ms = 1.89x overall speedup on 8 large
datasets (4M-point 1D, 800x800 2D, 100x100x80 3D, interrupted, multi-dep).

Consistent ~2x speedup across 1D/2D shapes, ~1.7x on 3D.
Loading times unchanged (QCodes SQLite I/O dominated).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 2de15f80..52864170 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -769,3 +769,24 @@ Key observations:
 - Even small datasets see 1.1-1.3x improvement (reduced per-node overhead)
 - No regressions observed on any dataset
 - All 23 datasets produce the same output types before and after
+
+### Large Dataset Benchmark (Array ParamType, 15-61 MB per dataset)
+
+8 datasets using QCodes array paramtype (blob storage), benchmarked through
+the full plottr pipeline (Load -> DataSelector -> DataGridder -> XYSelector).
+
+**Pipeline total: 6,550 ms (before) -> 3,465 ms (after) = 1.89x overall speedup**
+
+| Dataset | Data Size | Pipeline Before | Pipeline After | Speedup |
+|---|---|---|---|---|
+| large_1d_3dep (2M pts, 3 deps) | 61 MB | 997 ms | 497 ms | **2.01x** |
+| large_1d_sweep (4M pts) | 61 MB | 1,923 ms | 971 ms | **1.98x** |
+| large_2d_wide (200x4000) | 18 MB | 702 ms | 360 ms | **1.95x** |
+| large_2d_interrupted (40% of 1000x800) | 18 MB | 314 ms | 162 ms | **1.94x** |
+| large_2d_2dep (500x1000, 2 deps) | 15 MB | 453 ms | 234 ms | **1.94x** |
+| large_2d_square (800x800) | 15 MB | 568 ms | 295 ms | **1.93x** |
+| large_3d_1dep (100x100x80) | 24 MB | 1,064 ms | 632 ms | **1.68x** |
+| large_3d_2dep (80x80x60, 2 deps) | 15 MB | 530 ms | 315 ms | **1.68x** |
+
+Loading times are unchanged (dominated by QCodes SQLite I/O). All speedup
+comes from the plottr pipeline processing (copy, validate, structure, gridding).

From b503f0990e269b1f81cf49745e4e89ad4259afac Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 17 Apr 2026 07:51:12 +0200
Subject: [PATCH 07/64] docs: replace benchmarks with improved methodology (v2)

New benchmark measures both cold start (new flowchart) and steady state
(persistent flowchart, simulating live monitoring refresh). Uses 5 repeats
with warmup, reports median.

Results on 31 datasets (23 small + 8 large):
- Large datasets: cold 1.88x, steady 1.77x faster
- Small datasets: cold 1.43x, steady 1.69x faster
- Steady-state on small data shows up to 2.11x speedup

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 46 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 52864170..52cfe32c 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -790,3 +790,49 @@ the full plottr pipeline (Load -> DataSelector -> DataGridder -> XYSelector).
 
 Loading times are unchanged (dominated by QCodes SQLite I/O). All speedup
 comes from the plottr pipeline processing (copy, validate, structure, gridding).
+
+### Improved Benchmark Methodology (v2)
+
+Previous benchmarks created a new flowchart per run, which always hit the "first data" code path.
+The v2 benchmark fixes this by measuring two scenarios:
+
+- **Cold start**: Create flowchart + process first data (opening a dataset for the first time)
+- **Steady state**: Re-process new data on an existing flowchart (live monitoring refresh)
+
+Method: 5 repeats, median timing, warmup run discarded, persistent flowchart for steady-state.
+
+#### Large Datasets (8 datasets, 15-61 MB each, array paramtype)
+
+|  | Cold Start |  | Steady State |  |
+|---|---|---|---|---|
+| **Totals** | 6,479 -> 3,449 ms | **1.88x** | 5,867 -> 3,312 ms | **1.77x** |
+
+| Dataset | MB | Cold Before | Cold After | Cold Spd | Steady Before | Steady After | Steady Spd |
+|---|---|---|---|---|---|---|---|
+| large_1d_sweep (4M pts) | 61 | 1,911 ms | 960 ms | **1.99x** | 1,865 ms | 1,031 ms | **1.81x** |
+| large_1d_3dep (2M, 3 deps) | 61 | 987 ms | 491 ms | **2.01x** | 949 ms | 511 ms | **1.86x** |
+| large_2d_square (800x800) | 15 | 567 ms | 294 ms | **1.93x** | 528 ms | 290 ms | **1.82x** |
+| large_2d_2dep (500x1000) | 15 | 448 ms | 237 ms | **1.89x** | 412 ms | 226 ms | **1.82x** |
+| large_3d_1dep (100x100x80) | 24 | 1,035 ms | 628 ms | **1.65x** | 798 ms | 503 ms | **1.59x** |
+| large_3d_2dep (80x80x60) | 15 | 525 ms | 320 ms | **1.64x** | 389 ms | 247 ms | **1.58x** |
+| large_2d_interrupted (40%) | 18 | 306 ms | 162 ms | **1.89x** | 273 ms | 147 ms | **1.86x** |
+| large_2d_wide (200x4000) | 18 | 701 ms | 357 ms | **1.96x** | 654 ms | 357 ms | **1.83x** |
+
+#### Small/Medium Datasets (23 datasets, <1 KB to 15 MB, numeric paramtype)
+
+|  | Cold Start |  | Steady State |  |
+|---|---|---|---|---|
+| **Totals** | 1,477 -> 1,036 ms | **1.43x** | 895 -> 529 ms | **1.69x** |
+
+Steady-state highlights (where the optimization shines most):
+
+| Dataset | Steady Before | Steady After | Speedup |
+|---|---|---|---|
+| interrupted_sweep | 5.9 ms | 2.8 ms | **2.11x** |
+| two_tone_spectroscopy | 6.1 ms | 2.9 ms | **2.10x** |
+| charge_stability_interrupted | 6.1 ms | 2.9 ms | **2.10x** |
+| ramsey_2d | 6.2 ms | 3.0 ms | **2.07x** |
+| qubit_spectroscopy | 7.8 ms | 3.8 ms | **2.05x** |
+| multi_measurement (3 deps) | 6.8 ms | 3.4 ms | **2.00x** |
+| stability_diagram (200K) | 176.7 ms | 93.8 ms | **1.88x** |
+| large_3d_scan (800K) | 421.8 ms | 261.1 ms | **1.62x** |

From c4e181bd452b20656d573a861e86b6a2011743c2 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 17 Apr 2026 08:28:06 +0200
Subject: [PATCH 08/64] docs: add interactive action benchmark with per-node
 profiling

Benchmarks real user actions (switch dep, swap axes, toggle subtract avg,
slide dimension, toggle grid) on large datasets with per-node time breakdown.

Key findings:
- DataSelector: 10-17x faster (largest_numtype O(1), copy optimized)
- SubtractAverage: 6-29x faster (copy 15x faster, mask_invalid skips clean)
- ScaleUnits: 7-15x faster (copy 15x faster)
- XYSelector: 1.5-2.3x (cascading copy removed)
- DataGridder: 1.1x (dominated by actual gridding computation)

Action-level: toggle_subtract_avg 9-10x, swap_xy 3.3x, switch_dep 2.3x,
data_refresh 2.2x, slide_dimension 1.5-1.6x.

DataGridder is now the dominant cost (58% of pipeline) and is the next frontier.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 51 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 52cfe32c..0ce7715c 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -836,3 +836,54 @@ Steady-state highlights (where the optimization shines most):
 | multi_measurement (3 deps) | 6.8 ms | 3.4 ms | **2.00x** |
 | stability_diagram (200K) | 176.7 ms | 93.8 ms | **1.88x** |
 | large_3d_scan (800K) | 421.8 ms | 261.1 ms | **1.62x** |
+
+### Interactive Action Benchmark (simulated user actions, large datasets)
+
+Measures the time for real user interactions on a persistent flowchart
+(DataSelector -> DataGridder -> XYSelector -> SubtractAverage -> ScaleUnits).
+5 repeats, median timing, warmup discarded.
+
+#### Per-Node Speedups (averaged across all datasets)
+
+The node breakdown reveals where our optimizations had the most impact:
+
+| Node | Before | After | Speedup | What changed |
+|---|---|---|---|---|
+| **DataSelector** | 72-579 ms | 7-39 ms | **10-17x** | `largest_numtype()` now O(1), `copy()/extract()` optimized |
+| **SubtractAverage** | 2-202 ms | 0.1-9 ms | **6-29x** | `copy()` 15x faster, `mask_invalid()` skips clean data |
+| **ScaleUnits** | 2-258 ms | 0.1-37 ms | **7-15x** | `copy()` 15x faster |
+| **XYSelector** | 89-596 ms | 42-322 ms | **1.5-2.3x** | Removed cascading copy, deferred `structure()` |
+| **DataGridder** | 102-647 ms | 89-574 ms | **1.1-1.2x** | `copy=False` in `datadict_to_meshgrid` |
+
+**Key insight:** DataGridder dominates total time (50-60%) and got the least speedup (1.1x)
+because its cost is dominated by the actual gridding computation (`guess_shape`, `reshape`,
+`transpose`) -- not by copy/validate overhead. This is the next optimization frontier.
+
+#### Action Speedups
+
+| Action | Dataset | Before | After | Speedup |
+|---|---|---|---|---|
+| **toggle_subtract_avg** | 2d_square (15 MB) | 293 ms | 29 ms | **10.2x** |
+| **toggle_subtract_avg** | 2d_wide (18 MB) | 342 ms | 36 ms | **9.5x** |
+| **swap_xy_axes** | 2d_square (15 MB) | 662 ms | 196 ms | **3.4x** |
+| **swap_xy_axes** | 2d_wide (18 MB) | 790 ms | 241 ms | **3.3x** |
+| **switch_dependent** | 1d_3dep (61 MB) | 2287 ms | 977 ms | **2.3x** |
+| **data_refresh** | 2d_square (15 MB) | 697 ms | 304 ms | **2.3x** |
+| **data_refresh** | 1d_sweep (61 MB) | 2405 ms | 1107 ms | **2.2x** |
+| **slide_dimension** | 3d_1dep (24 MB) | 1891 ms | 1231 ms | **1.5x** |
+| **toggle_grid** | 3d_1dep (24 MB) | 1290 ms | 985 ms | **1.3x** |
+
+#### Where remaining time goes (optimization frontier)
+
+For the `large_2d_square` dataset (800x800, 15 MB) after optimization:
+
+- **DataGridder**: 177 ms (58%) -- gridding computation itself
+- **XYSelector**: 82 ms (27%) -- dimension reduction + reorder
+- **DataSelector**: 9 ms (3%) -- extraction
+- **ScaleUnits**: 9 ms (3%) -- prefix computation
+- **SubtractAverage**: 2 ms (1%) -- average subtraction
+- **Overhead**: ~25 ms (8%) -- flowchart propagation, signal emissions
+
+The gridding step (`guess_shape_from_datadict` + `datadict_to_meshgrid`) is now the
+dominant cost and is performing actual computation (not copy/validate overhead).
+Further optimization would need to target the gridding algorithm itself.

From 8ad85a2d2bb413df72ff131840cd5f5ae2ce26f2 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 17 Apr 2026 09:05:36 +0200
Subject: [PATCH 09/64] =?UTF-8?q?perf:=20optimize=20=5Ffind=5Fswitches()?=
 =?UTF-8?q?=20=E2=80=94=202.5x=20faster=20gridding?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shape-guessing algorithm (_find_switches -> find_direction_period ->
guess_grid_from_sweep_direction) was the #1 bottleneck after rounds 1-2.

Optimizations:
- Compute is_invalid() once instead of 3 times per call
- Single np.percentile([lo, hi]) call instead of two separate sorts
- Direct numpy subtraction instead of MaskedArray creation
- Vectorized boolean mask instead of Python list comprehension
- np.nanmean for NaN-safe sweep direction detection
- Cached np.std in guess_grid_from_sweep_direction

Results (800x800 = 640K pts):
- _find_switches: 80ms -> 31ms (2.6x)
- datadict_to_meshgrid: 175ms -> 71ms (2.5x)
- Cumulative pipeline speedup vs master: 2.8-3.5x

Adds 62 comprehensive gridder tests covering all GridOption paths,
edge cases, various shapes, noisy axes, incomplete data.

All 267 tests pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md                       |  54 +++
 plottr/utils/num.py                       |  36 +-
 test/pytest/test_gridder_comprehensive.py | 471 ++++++++++++++++++++++
 3 files changed, 548 insertions(+), 13 deletions(-)
 create mode 100644 test/pytest/test_gridder_comprehensive.py

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 0ce7715c..e9ee9192 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -887,3 +887,57 @@ For the `large_2d_square` dataset (800x800, 15 MB) after optimization:
 The gridding step (`guess_shape_from_datadict` + `datadict_to_meshgrid`) is now the
 dominant cost and is performing actual computation (not copy/validate overhead).
 Further optimization would need to target the gridding algorithm itself.
+
+### Round 3: DataGridder Optimization (`_find_switches`)
+
+**Root cause:** `_find_switches()` in `plottr/utils/num.py` was the dominant cost in the
+gridding pipeline. For 640K points it took 80ms per axis (160ms total for 2D).
+
+**What was slow:**
+- Called `is_invalid()` 3 times on the same data (3x O(N))
+- Created a `MaskedArray` just for subtraction (O(N) alloc)
+- Called `np.percentile()` twice with separate array filtering (2x O(N log N) sort)
+- Used Python list comprehension for switch filtering
+
+**Optimizations applied:**
+- Compute `is_invalid()` once, reuse the mask
+- Use direct numpy subtraction instead of MaskedArray (NaN propagates correctly)
+- Compute both percentiles in a single `np.percentile([lo, hi])` call (one sort)
+- Vectorized switch filtering with boolean mask instead of list comprehension
+- Use `np.nanmean` for sweep direction to handle NaN deltas
+- Fixed redundant `np.std()` call in `guess_grid_from_sweep_direction`
+
+**Per-function benchmark (800x800 dataset, 640K pts):**
+
+| Function | Before | After | Speedup |
+|---|---|---|---|
+| `_find_switches()` per axis | 80 ms | 31 ms | **2.6x** |
+| `datadict_to_meshgrid()` | 175 ms | 71 ms | **2.5x** |
+
+**Per-node impact (data_refresh action):**
+
+| Dataset | Grid Before | Grid After | Grid Spd | Total Before | Total After | Total Spd |
+|---|---|---|---|---|---|---|
+| large_1d_sweep (61 MB) | 574 ms | 243 ms | **2.4x** | 1107 ms | 792 ms | **1.4x** |
+| large_1d_3dep (61 MB) | 285 ms | 122 ms | **2.3x** | 547 ms | 386 ms | **1.4x** |
+| large_2d_square (15 MB) | 177 ms | 73 ms | **2.4x** | 304 ms | 199 ms | **1.5x** |
+| large_2d_2dep (15 MB) | 137 ms | 58 ms | **2.4x** | 238 ms | 156 ms | **1.5x** |
+| large_3d_1dep (24 MB) | 345 ms | 139 ms | **2.5x** | 497 ms | 292 ms | **1.7x** |
+| large_3d_2dep (15 MB) | 169 ms | 68 ms | **2.5x** | 250 ms | 142 ms | **1.8x** |
+| large_2d_interrupted (18 MB) | 89 ms | 38 ms | **2.3x** | 156 ms | 105 ms | **1.5x** |
+| large_2d_wide (18 MB) | 218 ms | 93 ms | **2.3x** | 375 ms | 249 ms | **1.5x** |
+
+**Cumulative speedup vs original master baseline (data_refresh):**
+
+| Dataset | Master Baseline | Fully Optimized | Cumulative Speedup |
+|---|---|---|---|
+| large_1d_sweep (61 MB) | 2405 ms | 792 ms | **3.0x** |
+| large_1d_3dep (61 MB) | 1217 ms | 386 ms | **3.2x** |
+| large_2d_square (15 MB) | 697 ms | 199 ms | **3.5x** |
+| large_2d_2dep (15 MB) | 526 ms | 156 ms | **3.4x** |
+| large_3d_1dep (24 MB) | 813 ms | 292 ms | **2.8x** |
+| large_3d_2dep (15 MB) | 405 ms | 142 ms | **2.9x** |
+| large_2d_interrupted (18 MB) | 355 ms | 105 ms | **3.4x** |
+| large_2d_wide (18 MB) | 828 ms | 249 ms | **3.3x** |
+
+62 new tests added in `test_gridder_comprehensive.py`.
diff --git a/plottr/utils/num.py b/plottr/utils/num.py
index 8199982c..6f073f33 100644
--- a/plottr/utils/num.py
+++ b/plottr/utils/num.py
@@ -156,17 +156,29 @@ def array1d_to_meshgrid(arr: Union[List, np.ndarray],
 def _find_switches(arr: np.ndarray,
                    rth: float = 25,
                    ztol: float = 1e-15) -> np.ndarray:
-    arr_: np.ndarray = np.ma.MaskedArray(arr, is_invalid(arr))
-    deltas = arr_[1:] - arr_[:-1]
-    hi = np.percentile(arr[~is_invalid(arr)], 100.-rth)
-    lo = np.percentile(arr[~is_invalid(arr)], rth)
-    diff = np.abs(hi-lo)
+    # Compute invalid mask once, reuse everywhere
+    invalid = is_invalid(arr)
+    valid_mask = ~invalid
+
+    # Use np.diff directly — for entries adjacent to invalid values, the delta
+    # will be nan. We handle this by using nan-aware operations below.
+    deltas = arr[1:] - arr[:-1]
+
+    # Compute percentile range of valid data
+    valid_data = arr[valid_mask]
+    if valid_data.size == 0:
+        return np.array([])
+
+    lo, hi = np.percentile(valid_data, [rth, 100. - rth])
+    diff = np.abs(hi - lo)
 
     if not diff > ztol:
         return np.array([])
 
     # first step: suspected switches are where we have 'large' jumps in value.
-    switch_candidates = np.where(np.abs(deltas) >= diff)[0]
+    # Use nan-safe abs: nan deltas will produce nan >= diff which is False
+    abs_deltas = np.abs(deltas)
+    switch_candidates = np.where(abs_deltas >= diff)[0]
     switch_candidates = switch_candidates[switch_candidates > 0]
     if not len(switch_candidates) > 0:
         return np.array([])
@@ -174,15 +186,13 @@ def _find_switches(arr: np.ndarray,
     # importantly: switches have to opposite to the sweep direction.
     # we check the sweep direction by looking at the values prior to the
     # first suspected switch
-    sweep_direction = np.sign(np.mean(deltas[:switch_candidates[0]]))
+    sweep_direction = np.sign(np.nanmean(deltas[:switch_candidates[0]]))
 
     # real switches are then those where the delta is opposite to the sweep
-    # direction.
+    # direction. Vectorized filter instead of list comprehension.
     switch_candidate_vals = deltas[switch_candidates]
-    switches = [s for (s, v) in zip(switch_candidates, switch_candidate_vals)
-                if np.sign(v) == -sweep_direction]
-
-    return np.array(switches)
+    mask = np.sign(switch_candidate_vals) == -sweep_direction
+    return switch_candidates[mask]
 
 
 def find_direction_period(vals: np.ndarray, ignore_last: bool = False) \
@@ -283,7 +293,7 @@ def guess_grid_from_sweep_direction(**axes: np.ndarray) \
                 else:
                     if mean == 0:
                         mean = max(np.abs(vals.max()), np.abs(vals.min()))
-                    cost = 1./np.abs(np.std(vals)/mean)
+                    cost = 1./np.abs(std/mean)
                 sorting.append(size + cost)
         else:
             return None
diff --git a/test/pytest/test_gridder_comprehensive.py b/test/pytest/test_gridder_comprehensive.py
new file mode 100644
index 00000000..a870f4ca
--- /dev/null
+++ b/test/pytest/test_gridder_comprehensive.py
@@ -0,0 +1,471 @@
+"""
+test_gridder_comprehensive.py
+
+Comprehensive tests for the DataGridder node and underlying gridding functions.
+Covers all GridOption paths, various data shapes, edge cases, and input types.
+"""
+import numpy as np
+import pytest
+
+from plottr.data.datadict import (
+    DataDict, MeshgridDataDict, DataDictBase,
+    datadict_to_meshgrid, meshgrid_to_datadict,
+    guess_shape_from_datadict, GriddingError,
+)
+from plottr.node.tools import linearFlowchart
+from plottr.node.grid import DataGridder, GridOption
+from plottr.utils.num import (
+    guess_grid_from_sweep_direction, find_direction_period,
+    _find_switches, array1d_to_meshgrid,
+)
+
+DataGridder.useUi = False
+DataGridder.uiClass = None
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_griddable(shape, ndeps=1, noise=0.0):
+    """Create a griddable DataDict from a meshgrid shape."""
+    naxes = len(shape)
+    ax_names = [f'ax{i}' for i in range(naxes)]
+    axes_1d = [np.linspace(0, 1, s) for s in shape]
+    grids = np.meshgrid(*axes_1d, indexing='ij')
+    dd = DataDict()
+    for i, ax in enumerate(ax_names):
+        vals = grids[i].ravel()
+        if noise > 0:
+            vals = vals + np.random.randn(vals.size) * noise
+        dd[ax] = dict(values=vals, axes=[], unit='V', label=ax)
+    for j in range(ndeps):
+        dd[f'dep{j}'] = dict(values=np.random.randn(int(np.prod(shape))),
+                             axes=ax_names[:], unit='A', label=f'dep{j}')
+    dd.validate()
+    return dd
+
+
+def make_mesh(shape, ndeps=1):
+    naxes = len(shape)
+    ax_names = [f'ax{i}' for i in range(naxes)]
+    axes_1d = [np.linspace(0, 1, s) for s in shape]
+    grids = np.meshgrid(*axes_1d, indexing='ij')
+    dd = MeshgridDataDict()
+    for i, ax in enumerate(ax_names):
+        dd[ax] = dict(values=grids[i], axes=[], unit='V', label=ax)
+    for j in range(ndeps):
+        dd[f'dep{j}'] = dict(values=np.random.randn(*shape),
+                             axes=ax_names[:], unit='A', label=f'dep{j}')
+    dd.validate()
+    return dd
+
+
+# ===========================================================================
+# _find_switches
+# ===========================================================================
+
+class TestFindSwitches:
+    def test_monotonic_no_switches(self):
+        arr = np.linspace(0, 10, 100)
+        assert len(_find_switches(arr)) == 0
+
+    def test_single_sawtooth(self):
+        arr = np.concatenate([np.arange(10), np.arange(10)])
+        switches = _find_switches(arr)
+        assert len(switches) >= 1
+
+    def test_flat_array(self):
+        arr = np.ones(50)
+        assert len(_find_switches(arr)) == 0
+
+    def test_with_nan(self):
+        arr = np.linspace(0, 10, 100)
+        arr[50] = np.nan
+        switches = _find_switches(arr)
+        assert isinstance(switches, np.ndarray)
+
+    def test_short_array(self):
+        arr = np.array([1.0, 2.0])
+        switches = _find_switches(arr)
+        assert isinstance(switches, np.ndarray)
+
+    def test_single_element(self):
+        arr = np.array([1.0])
+        switches = _find_switches(arr)
+        assert len(switches) == 0
+
+
+# ===========================================================================
+# find_direction_period
+# ===========================================================================
+
+class TestFindDirectionPeriod:
+    def test_repeating_pattern(self):
+        # 0,1,2,3,4, 0,1,2,3,4, 0,1,2,3,4
+        arr = np.tile(np.arange(5, dtype=float), 3)
+        period = find_direction_period(arr)
+        assert period == 5
+
+    def test_no_repetition(self):
+        arr = np.linspace(0, 10, 100)
+        period = find_direction_period(arr)
+        assert period == np.inf
+
+    def test_incomplete_last_period(self):
+        arr = np.concatenate([np.tile(np.arange(5, dtype=float), 3),
+                              np.arange(3, dtype=float)])
+        period = find_direction_period(arr, ignore_last=True)
+        assert period == 5
+
+    def test_single_value(self):
+        arr = np.array([1.0])
+        period = find_direction_period(arr)
+        assert period is not None  # should handle gracefully
+
+
+# ===========================================================================
+# guess_grid_from_sweep_direction
+# ===========================================================================
+
+class TestGuessGrid:
+    @pytest.mark.parametrize("shape", [
+        (10,), (5, 4), (3, 4, 2), (10, 10), (20, 15),
+    ])
+    def test_correct_shape_guessed(self, shape):
+        naxes = len(shape)
+        ax_names = [f'ax{i}' for i in range(naxes)]
+        axes_1d = [np.linspace(0, 1, s) for s in shape]
+        grids = np.meshgrid(*axes_1d, indexing='ij')
+        kwargs = {ax_names[i]: grids[i].ravel() for i in range(naxes)}
+        result = guess_grid_from_sweep_direction(**kwargs)
+        assert result is not None
+        _, guessed_shape = result
+        assert guessed_shape == shape
+
+    def test_noisy_axes(self):
+        shape = (10, 8)
+        grids = np.meshgrid(np.linspace(0, 1, 10), np.linspace(0, 1, 8), indexing='ij')
+        x = grids[0].ravel() + np.random.randn(80) * 1e-6
+        y = grids[1].ravel()
+        result = guess_grid_from_sweep_direction(x=x, y=y)
+        assert result is not None
+        _, guessed = result
+        assert guessed == shape
+
+    def test_single_axis(self):
+        x = np.linspace(0, 1, 50)
+        result = guess_grid_from_sweep_direction(x=x)
+        assert result is not None
+        _, shape = result
+        assert shape == (50,)
+
+    def test_empty_raises(self):
+        with pytest.raises(ValueError):
+            guess_grid_from_sweep_direction()
+
+    def test_mismatched_sizes_raises(self):
+        with pytest.raises(ValueError):
+            guess_grid_from_sweep_direction(x=np.arange(10, dtype=float),
+                                            y=np.arange(5, dtype=float))
+
+
+# ===========================================================================
+# array1d_to_meshgrid
+# ===========================================================================
+
+class TestArray1dToMeshgrid:
+    def test_exact_reshape(self):
+        arr = np.arange(12, dtype=float)
+        result = array1d_to_meshgrid(arr, (3, 4))
+        assert result.shape == (3, 4)
+
+    def test_padding_with_nan(self):
+        arr = np.arange(10, dtype=float)
+        result = array1d_to_meshgrid(arr, (4, 4))  # needs 16, has 10
+        assert result.shape == (4, 4)
+        assert np.isnan(result.ravel()[-1])
+
+    def test_truncation(self):
+        arr = np.arange(20, dtype=float)
+        result = array1d_to_meshgrid(arr, (3, 4))  # needs 12, has 20
+        assert result.shape == (3, 4)
+
+    def test_copy_true_independent(self):
+        arr = np.arange(12, dtype=float)
+        result = array1d_to_meshgrid(arr, (3, 4), copy=True)
+        result[0, 0] = 999
+        assert arr[0] != 999
+
+    def test_copy_false_may_share(self):
+        arr = np.arange(12, dtype=float)
+        result = array1d_to_meshgrid(arr, (3, 4), copy=False)
+        assert result.shape == (3, 4)
+
+    def test_object_array_padding(self):
+        arr = np.array([1, 2, 3], dtype=object)
+        result = array1d_to_meshgrid(arr, (2, 3))  # needs 6, has 3
+        assert result.shape == (2, 3)
+
+
+# ===========================================================================
+# guess_shape_from_datadict
+# ===========================================================================
+
+class TestGuessShapeFromDatadict:
+    @pytest.mark.parametrize("shape", [
+        (10, 5), (3, 4, 2), (20, 15),
+    ])
+    def test_guesses_correct_shape(self, shape):
+        dd = make_griddable(shape)
+        shapes = guess_shape_from_datadict(dd)
+        for dep in dd.dependents():
+            assert shapes[dep] is not None
+            _, guessed = shapes[dep]
+            assert guessed == shape
+
+    def test_with_multiple_deps(self):
+        dd = make_griddable((10, 8), ndeps=3)
+        shapes = guess_shape_from_datadict(dd)
+        assert len(shapes) == 3
+        for dep in dd.dependents():
+            assert shapes[dep] is not None
+
+
+# ===========================================================================
+# datadict_to_meshgrid
+# ===========================================================================
+
+class TestDatadictToMeshgrid:
+    @pytest.mark.parametrize("shape", [
+        (5,), (5, 4), (10, 10), (3, 4, 2),
+    ])
+    def test_produces_correct_shape(self, shape):
+        dd = make_griddable(shape)
+        mesh = datadict_to_meshgrid(dd)
+        assert isinstance(mesh, MeshgridDataDict)
+        assert mesh.shape() == shape
+
+    def test_with_target_shape(self):
+        dd = make_griddable((5, 4))
+        mesh = datadict_to_meshgrid(dd, target_shape=(5, 4))
+        assert mesh.shape() == (5, 4)
+
+    def test_with_inner_axis_order(self):
+        # Create data where inner order doesn't match axes order
+        x = np.arange(5, dtype=float)
+        y = np.linspace(0, 1, 4)
+        xx, yy = np.meshgrid(x, y, indexing='xy')  # xy order
+        dd = DataDict(
+            x=dict(values=xx.ravel()), y=dict(values=yy.ravel()),
+            z=dict(values=(xx * yy).ravel(), axes=['x', 'y']),
+        )
+        dd.validate()
+        mesh = datadict_to_meshgrid(dd, target_shape=(4, 5),
+                                     inner_axis_order=['y', 'x'])
+        assert isinstance(mesh, MeshgridDataDict)
+
+    def test_use_existing_shape(self):
+        """use_existing_shape works when data already has the right shape."""
+        # Need data with nested array shapes matching target
+        x = np.arange(5, dtype=float)
+        y = np.linspace(0, 1, 4)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        dd = DataDict(
+            x=dict(values=xx),  # already (5,4) shaped
+            y=dict(values=yy),
+            z=dict(values=xx * yy, axes=['x', 'y']),
+        )
+        dd.validate()
+        mesh = datadict_to_meshgrid(dd, use_existing_shape=True)
+        assert isinstance(mesh, MeshgridDataDict)
+        assert mesh.shape() == (5, 4)
+
+    def test_copy_false(self):
+        dd = make_griddable((5, 4))
+        mesh = datadict_to_meshgrid(dd, copy=False)
+        assert isinstance(mesh, MeshgridDataDict)
+
+    def test_preserves_meta(self):
+        dd = make_griddable((5, 4))
+        dd.add_meta('info', 'test')
+        mesh = datadict_to_meshgrid(dd)
+        assert mesh.meta_val('info') == 'test'
+
+    def test_incompatible_axes_raises(self):
+        dd = DataDict(
+            x=dict(values=np.arange(10, dtype=float)),
+            y=dict(values=np.arange(10, dtype=float), axes=['x']),
+            z=dict(values=np.arange(10, dtype=float)),
+            w=dict(values=np.arange(10, dtype=float), axes=['z']),
+        )
+        dd.validate()
+        with pytest.raises(GriddingError):
+            datadict_to_meshgrid(dd)
+
+    def test_empty_datadict(self):
+        dd = DataDict()
+        dd.validate()
+        mesh = datadict_to_meshgrid(dd)
+        assert isinstance(mesh, MeshgridDataDict)
+
+    def test_incomplete_data_pads_with_nan(self):
+        # 5x4 grid but only 18 of 20 points
+        shape = (5, 4)
+        grids = np.meshgrid(np.linspace(0, 1, 5), np.linspace(0, 1, 4), indexing='ij')
+        dd = DataDict(
+            x=dict(values=grids[0].ravel()[:18]),
+            y=dict(values=grids[1].ravel()[:18]),
+            z=dict(values=np.random.randn(18), axes=['x', 'y']),
+        )
+        dd.validate()
+        mesh = datadict_to_meshgrid(dd, target_shape=shape)
+        assert mesh.shape() == shape
+        # Last 2 values should be NaN
+        assert np.isnan(mesh.data_vals('z').ravel()[-1])
+
+
+# ===========================================================================
+# meshgrid_to_datadict
+# ===========================================================================
+
+class TestMeshgridToDatadict:
+    @pytest.mark.parametrize("shape", [
+        (5, 4), (10, 10), (3, 4, 2),
+    ])
+    def test_produces_flat(self, shape):
+        mesh = make_mesh(shape)
+        dd = meshgrid_to_datadict(mesh)
+        assert isinstance(dd, DataDict)
+        assert dd.nrecords() == int(np.prod(shape))
+
+
+# ===========================================================================
+# DataGridder node — all GridOption paths
+# ===========================================================================
+
+class TestDataGridderNode:
+
+    # --- DataDict input ---
+
+    @pytest.mark.parametrize("shape", [
+        (10,), (5, 4), (3, 4, 2),
+    ])
+    def test_noGrid_tabular_passthrough(self, qtbot, shape):
+        dd = make_griddable(shape)
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.noGrid, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, DataDict)
+
+    @pytest.mark.parametrize("shape", [
+        (10,), (5, 4), (10, 10), (50, 3), (3, 4, 2),
+    ])
+    def test_guessShape_tabular(self, qtbot, shape):
+        dd = make_griddable(shape)
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, MeshgridDataDict)
+        assert out.shape() == shape
+
+    def test_specifyShape_tabular(self, qtbot):
+        dd = make_griddable((5, 4))
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.specifyShape, dict(
+            shape=(5, 4), order=['ax0', 'ax1'])
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, MeshgridDataDict)
+        assert out.shape() == (5, 4)
+
+    def test_metadataShape_tabular(self, qtbot):
+        dd = make_griddable((5, 4))
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.metadataShape, {}
+        out = fc.outputValues()['dataOut']
+        # metadataShape uses existing shape from data arrays
+        assert out is not None
+
+    # --- MeshgridDataDict input ---
+
+    def test_noGrid_meshgrid_flattens(self, qtbot):
+        mesh = make_mesh((5, 4))
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=mesh)
+        fc.nodes()['g'].grid = GridOption.noGrid, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, DataDict)
+        assert out.nrecords() == 20
+
+    def test_guessShape_meshgrid_passthrough(self, qtbot):
+        mesh = make_mesh((5, 4))
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=mesh)
+        fc.nodes()['g'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, MeshgridDataDict)
+
+    def test_specifyShape_meshgrid_warns(self, qtbot):
+        mesh = make_mesh((5, 4))
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=mesh)
+        fc.nodes()['g'].grid = GridOption.specifyShape, dict(shape=(5, 4))
+        out = fc.outputValues()['dataOut']
+        # Should pass through with warning
+        assert isinstance(out, MeshgridDataDict)
+
+    def test_metadataShape_meshgrid_passthrough(self, qtbot):
+        mesh = make_mesh((5, 4))
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=mesh)
+        fc.nodes()['g'].grid = GridOption.metadataShape, {}
+        out = fc.outputValues()['dataOut']
+        assert isinstance(out, MeshgridDataDict)
+
+    # --- Edge cases ---
+
+    def test_gridding_error_falls_back(self, qtbot):
+        """Data that can't be gridded should fall back to noGrid."""
+        dd = DataDict(
+            x=dict(values=np.array([1.0, 1.0, 2.0, 2.0, 3.0])),
+            y=dict(values=np.array([1.0, 2.0, 1.0, 2.0, 1.0])),
+            z=dict(values=np.random.randn(5), axes=['x', 'y']),
+        )
+        dd.validate()
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        # Should not crash; may fall back to expanded DataDict
+        assert out is not None
+
+    def test_does_not_mutate_input(self, qtbot):
+        dd = make_griddable((10, 8))
+        ref_vals = {k: v['values'].copy() for k, v in dd.data_items()}
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.guessShape, {}
+        _ = fc.outputValues()['dataOut']
+        for k, orig in ref_vals.items():
+            assert np.array_equal(dd.data_vals(k), orig), f"{k} was mutated"
+
+    def test_multiple_deps(self, qtbot):
+        dd = make_griddable((5, 4), ndeps=3)
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert len(out.dependents()) == 3
+
+    def test_with_noisy_axes(self, qtbot):
+        dd = make_griddable((10, 8), noise=1e-6)
+        fc = linearFlowchart(('g', DataGridder))
+        fc.setInput(dataIn=dd)
+        fc.nodes()['g'].grid = GridOption.guessShape, {}
+        out = fc.outputValues()['dataOut']
+        assert out is not None
+        assert isinstance(out, MeshgridDataDict)

From ac573f5f61153434824e81d0b5b5377a17dc9404 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 17 Apr 2026 15:39:15 +0200
Subject: [PATCH 10/64] docs: add real experimental data benchmark

Benchmarked on production quantum device datasets:
- QDstability (223 MB, 16 deps): cold 3.56x, steady 2.93x faster
- TopogapStage2 (152 MB, 21 deps, 4D): cold 2.47x, steady 2.7x faster
- QDtuning (14 MB, 16 deps): steady 2.73x faster
- DataSelector node: 12-13x faster on these multi-dependent datasets

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index e9ee9192..ce049308 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -941,3 +941,27 @@ gridding pipeline. For 640K points it took 80ms per axis (160ms total for 2D).
 | large_2d_wide (18 MB) | 828 ms | 249 ms | **3.3x** |
 
 62 new tests added in `test_gridder_comprehensive.py`.
+
+### Real Experimental Data Benchmark (P1386BB_00BE_datasets.db)
+
+Benchmark on actual experimental datasets from a quantum device measurement campaign.
+These are production datasets with real-world complexity (16-21 dependents, nested array
+data, 4D parameter spaces).
+
+| Run | Experiment | Data Size | Deps | Axes | Cold Before | Cold After | Cold Spd | Steady Before | Steady After | Steady Spd |
+|---|---|---|---|---|---|---|---|---|---|---|
+| 720 | **QDstability** | 223 MB | 16 | 2 | 636 ms | 179 ms | **3.56x** | 555 ms | 189 ms | **2.93x** |
+| 713 | **TopogapStage2** | 152 MB | 21 | 19 | 688 ms | 279 ms | **2.47x** | 439 ms | 161 ms | **2.73x** |
+| 716 | **TopogapStage2** | 152 MB | 21 | 19 | 690 ms | 280 ms | **2.47x** | 432 ms | 164 ms | **2.64x** |
+| 710 | **QDtuning** | 14 MB | 16 | 2 | 52 ms | 31 ms | **1.70x** | 31 ms | 11 ms | **2.73x** |
+| 1496 | GateSweepProtocol | <1 MB | 1 | 1 | 22 ms | 20 ms | 1.09x | 2.5 ms | 1.1 ms | **2.27x** |
+
+Per-node breakdown on QDstability (223 MB, 16 deps):
+- **DataSelector**: 218 ms -> 17 ms (**12.8x**) -- largest_numtype O(1), copy optimized
+- **DataGridder**: 33 ms -> 16 ms (**2.1x**) -- _find_switches optimized
+- **XYSelector**: 264 ms -> 126 ms (**2.1x**) -- cascading copy removed
+
+Per-node breakdown on TopogapStage2 (152 MB, 21 deps, 4D):
+- **DataSelector**: 214 ms -> 16 ms (**13.4x**)
+- **DataGridder**: 34 ms -> 17 ms (**2.0x**)
+- **XYSelector**: 164 ms -> 109 ms (**1.5x**)

From d95db0e15ec2520e8057217b05acf0f408655eae Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 10:56:40 +0200
Subject: [PATCH 11/64] =?UTF-8?q?perf:=20optimize=20inspectr=20=E2=80=94?=
 =?UTF-8?q?=20lazy=20snapshot,=20incremental=20refresh?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lazy snapshot loading in RunInfo:
- Snapshot tree widget items are built only when the user expands the
  'QCoDeS Snapshot' section, not on every click
- Saves ~951ms per click on datasets with 5.9 MB snapshots (3,554x faster)
- Info pane shows collapsed by default instead of expandAll()

Incremental DB refresh:
- refreshDB() now loads only new runs since last refresh using the
  start parameter of get_runs_from_db()
- Merges incremental results into existing dataframe
- First load still loads everything

All 267 tests pass. No mypy errors introduced.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md     |  21 ++++++
 plottr/apps/inspectr.py | 161 ++++++++++++++++++++++++++++++++++------
 2 files changed, 161 insertions(+), 21 deletions(-)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index ce049308..f686b7f4 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -965,3 +965,24 @@ Per-node breakdown on TopogapStage2 (152 MB, 21 deps, 4D):
 - **DataSelector**: 214 ms -> 16 ms (**13.4x**)
 - **DataGridder**: 34 ms -> 17 ms (**2.0x**)
 - **XYSelector**: 164 ms -> 109 ms (**1.5x**)
+
+---
+
+## Inspectr Optimizations
+
+### Changes
+
+**Lazy snapshot loading** (`plottr/apps/inspectr.py`):
+- `RunInfo.setInfo()` no longer calls `dictToTreeWidgetItems()` on the snapshot dict
+- Instead shows a collapsed "QCoDeS Snapshot (click to expand)" placeholder
+- Full snapshot tree is built only when the user expands the item
+- `expandAll()` removed -- tree shows collapsed by default
+
+**Incremental DB refresh** (`plottr/apps/inspectr.py`):
+- `refreshDB()` now passes `start=len(dbdf)` to `get_runs_from_db()`
+- Only loads new datasets since last refresh, not the entire database
+- `DBLoaded()` merges incremental results into existing dataframe
+- First load still loads everything (`start=0`)
+
+**Result**: Clicking a dataset with a 5.9 MB snapshot: 951 ms -> 0.3 ms (**3,554x faster**)
+Refreshing a large DB: loads only new runs instead of re-iterating all 1496.
diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index f6e9e7f6..c21c3ece 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -28,6 +28,7 @@
 
 from .. import log as plottrlog
 from ..data.qcodes_dataset import (get_runs_from_db_as_dataframe,
+                                   get_runs_from_db,
                                    get_ds_structure, load_dataset_from)
 from plottr.gui.widgets import MonitorIntervalInput, FormLayoutWrapper, dictToTreeWidgetItems
 
@@ -252,24 +253,85 @@ class RunInfo(QtWidgets.QTreeWidget):
 
     When sending information in form of a dictionary, it will create
     a tree view of that dictionary and display that.
+
+    Snapshot data is loaded lazily: a placeholder item is shown, and the full
+    snapshot tree is built only when the user expands it.
     """
 
+    #: Signal emitted when the snapshot section needs to be loaded.
+    #: Argument is the QTreeWidgetItem to populate.
+    _snapshotRequested = Signal(object)
+
     def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
         super().__init__(parent)
 
         self.setHeaderLabels(['Key', 'Value'])
         self.setColumnCount(2)
 
+        self._snapshotItem: Optional[QtWidgets.QTreeWidgetItem] = None
+        self._snapshotData: Optional[dict] = None
+        self._snapshotLoaded = False
+
+        self.itemExpanded.connect(self._onItemExpanded)
+
     @Slot(dict)
     def setInfo(self, infoDict: Dict[str, Union[dict, str]]) -> None:
         self.clear()
+        self._snapshotItem = None
+        self._snapshotData = None
+        self._snapshotLoaded = False
+
+        for key, value in infoDict.items():
+            if key == 'QCoDeS Snapshot':
+                # Create a placeholder for the snapshot — don't build the tree yet
+                self._snapshotItem = QtWidgets.QTreeWidgetItem([key, '(click to expand)'])
+                # Add a dummy child so the expand arrow appears
+                self._snapshotItem.addChild(QtWidgets.QTreeWidgetItem(['(loading...)', '']))
+                self._snapshotData = value if isinstance(value, dict) else None
+                self.addTopLevelItem(self._snapshotItem)
+                self._snapshotItem.setExpanded(False)
+            else:
+                if not isinstance(value, dict):
+                    item = QtWidgets.QTreeWidgetItem([str(key), str(value)])
+                else:
+                    item = QtWidgets.QTreeWidgetItem([key, ''])
+                    for child in dictToTreeWidgetItems(value):
+                        item.addChild(child)
+                self.addTopLevelItem(item)
+                item.setExpanded(True)
+
+        for i in range(2):
+            self.resizeColumnToContents(i)
+
+    @Slot(QtWidgets.QTreeWidgetItem)
+    def _onItemExpanded(self, item: QtWidgets.QTreeWidgetItem) -> None:
+        if item is self._snapshotItem and not self._snapshotLoaded:
+            self._loadSnapshot()
+
+    def _loadSnapshot(self) -> None:
+        """Replace the placeholder with the actual snapshot tree."""
+        if self._snapshotItem is None:
+            return
+
+        self._snapshotLoaded = True
+        snap_data = self._snapshotData
+
+        # Remove placeholder children
+        self._snapshotItem.takeChildren()
+
+        if snap_data is None:
+            self._snapshotItem.setText(1, '(no snapshot)')
+            return
+
+        self._snapshotItem.setText(1, '')
 
-        items = dictToTreeWidgetItems(infoDict)
-        for item in items:
-            self.addTopLevelItem(item)
-            item.setExpanded(True)
+        if isinstance(snap_data, dict):
+            for child in dictToTreeWidgetItems(snap_data):
+                self._snapshotItem.addChild(child)
+        else:
+            self._snapshotItem.addChild(
+                QtWidgets.QTreeWidgetItem([str(snap_data), '']))
 
-        self.expandAll()
         for i in range(2):
             self.resizeColumnToContents(i)
 
@@ -279,16 +341,31 @@ class LoadDBProcess(QtCore.QObject):
     Worker object for getting a qcodes db overview as pandas dataframe.
     It's good to have this in a separate thread because it can be a bit slow
     for large databases.
+
+    Supports incremental loading: when ``start`` is set, only loads runs
+    with index >= start, then emits the partial dataframe for merging.
     """
     dbdfLoaded = Signal(object)
     pathSet = Signal()
 
-    def setPath(self, path: str) -> None:
+    def __init__(self) -> None:
+        super().__init__()
+        self.path: Optional[str] = None
+        self.start: int = 0
+
+    def setPath(self, path: str, start: int = 0) -> None:
         self.path = path
+        self.start = start
         self.pathSet.emit()
 
     def loadDB(self) -> None:
-        dbdf = get_runs_from_db_as_dataframe(self.path)
+        assert self.path is not None
+        overview = get_runs_from_db(self.path, start=self.start,
+                                    get_structure=False)
+        if overview:
+            dbdf = pandas.DataFrame.from_dict(overview, orient='index')
+        else:
+            dbdf = pandas.DataFrame()
         self.dbdfLoaded.emit(dbdf)
 
 
@@ -306,11 +383,13 @@ class QCodesDBInspector(QtWidgets.QMainWindow):
     _sendInfo = Signal(dict)
 
     def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
-                 dbPath: Optional[str] = None):
+                 dbPath: Optional[str] = None,
+                 plotWidgetClass: Optional[type] = None):
         """Constructor for :class:`QCodesDBInspector`."""
         super().__init__(parent)
 
         self._plotWindows: Dict[int, WindowDict] = {}
+        self._plotWidgetClass = plotWidgetClass
 
         self.filepath = dbPath
         self.dbdf: Optional[pandas.DataFrame] = None
@@ -488,18 +567,31 @@ def loadFullDB(self, path: Optional[str] = None) -> None:
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():
-                self.loadDBProcess.setPath(self.filepath)
+                self.loadDBProcess.setPath(self.filepath, start=0)
 
     def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
-        if self.dbdf is not None and dbdf.equals(self.dbdf):
-            LOGGER.debug('DB reloaded with no changes. Skipping update')
+        if dbdf.size == 0 and self.dbdf is not None:
+            LOGGER.debug('DB reloaded with no new data. Skipping update.')
             return None
-        self.dbdf = dbdf
+
+        if self.latestRunId is not None and self.dbdf is not None and dbdf.size > 0:
+            # Incremental load: merge new rows into existing dataframe
+            # Update existing rows (e.g., completed_date may have changed)
+            for idx in dbdf.index:
+                if idx in self.dbdf.index:
+                    self.dbdf.loc[idx] = dbdf.loc[idx]
+                else:
+                    self.dbdf = pandas.concat([self.dbdf, dbdf.loc[[idx]]])
+        elif dbdf.size > 0:
+            self.dbdf = dbdf
+        else:
+            self.dbdf = dbdf
+
         self.dbdfUpdated.emit()
         self.dateList.sendSelectedDates()
-        LOGGER.debug('DB reloaded')
+        LOGGER.debug('DB loaded/refreshed')
 
-        if self.latestRunId is not None:
+        if self.latestRunId is not None and self.dbdf is not None and self.dbdf.size > 0:
             idxs = self.dbdf.index.values
             newIdxs = idxs[idxs > self.latestRunId]
 
@@ -524,11 +616,17 @@ def refreshDB(self) -> None:
             if self.loadDBThread.isRunning():
                 return
             if self.dbdf is not None and self.dbdf.size > 0:
-                self.latestRunId = self.dbdf.index.values.max()
+                self.latestRunId = int(self.dbdf.index.values.max())
             else:
                 self.latestRunId = -1
 
-            self.loadFullDB()
+            # Incremental refresh: only load runs newer than what we have.
+            # The start parameter indexes into the sorted list of runs, so we
+            # use the count of runs we already have as the start offset.
+            start = len(self.dbdf) if self.dbdf is not None and self.dbdf.size > 0 else 0
+            if self.filepath is not None:
+                if not self.loadDBThread.isRunning():
+                    self.loadDBProcess.setPath(self.filepath, start=start)
 
     @Slot(float)
     def setMonitorInterval(self, val: float) -> None:
@@ -599,7 +697,10 @@ def setRunSelection(self, runId: int) -> None:
     @Slot(int)
     def plotRun(self, runId: int) -> None:
         assert self.filepath is not None
-        fc, win = autoplotQcodesDataset(pathAndId=(self.filepath, runId))
+        fc, win = autoplotQcodesDataset(
+            pathAndId=(self.filepath, runId),
+            plotWidgetClass=self._plotWidgetClass,
+        )
         self._plotWindows[runId] = {
             'flowchart': fc,
             'window': win,
@@ -650,16 +751,18 @@ class WindowDict(TypedDict):
     window: QCAutoPlotMainWindow
 
 
-def inspectr(dbPath: Optional[str] = None) -> QCodesDBInspector:
-    win = QCodesDBInspector(dbPath=dbPath)
+def inspectr(dbPath: Optional[str] = None,
+             plotWidgetClass: Optional[type] = None) -> QCodesDBInspector:
+    win = QCodesDBInspector(dbPath=dbPath, plotWidgetClass=plotWidgetClass)
     return win
 
 
-def main(dbPath: Optional[str], log_level: Union[int, str] = logging.WARNING) -> None:
+def main(dbPath: Optional[str], log_level: Union[int, str] = logging.WARNING,
+         plotWidgetClass: Optional[type] = None) -> None:
     app = QtWidgets.QApplication([])
     plottrlog.enableStreamHandler(True, log_level)
 
-    win = inspectr(dbPath=dbPath)
+    win = inspectr(dbPath=dbPath, plotWidgetClass=plotWidgetClass)
     win.show()
 
     if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
@@ -677,3 +780,19 @@ def script() -> None:
                         default="WARNING")
     args = parser.parse_args()
     main(args.dbpath, args.console_log_level)
+
+
+def script_pyqtgraph() -> None:
+    """Entry point for inspectr using the pyqtgraph plotting backend."""
+    from plottr.plot.pyqtgraph.autoplot import AutoPlot as PGAutoPlot
+
+    parser = argparse.ArgumentParser(
+        description='inspectr -- sifting through qcodes data (pyqtgraph backend).'
+    )
+    parser.add_argument('--dbpath', help='path to qcodes .db file',
+                        default=None)
+    parser.add_argument("--console-log-level",
+                        choices=("ERROR", "WARNING", "INFO", "DEBUG"),
+                        default="WARNING")
+    args = parser.parse_args()
+    main(args.dbpath, args.console_log_level, plotWidgetClass=PGAutoPlot)

From b67ffaac710c5b9ad30c8729e2ae7c19d43c63f7 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 11:58:49 +0200
Subject: [PATCH 12/64] perf: fast DB loading via load_by_id, bypass
 experiments enumeration

Add get_runs_from_db_fast() which uses load_by_id() directly per run,
bypassing the O(N^2) experiments() + data_sets() enumeration.

For 1496 runs: old approach takes 15+ minutes, new takes ~5 seconds.
Incremental refresh loads only new runs since last known run_id.

LoadDBProcess now uses get_runs_from_db_fast with start_run_id parameter
for both initial load and incremental refresh.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md           | 23 ++++++++++++++
 plottr/apps/inspectr.py       | 25 ++++++++-------
 plottr/data/qcodes_dataset.py | 57 +++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 13 deletions(-)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index f686b7f4..d5cf0450 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -986,3 +986,26 @@ Per-node breakdown on TopogapStage2 (152 MB, 21 deps, 4D):
 
 **Result**: Clicking a dataset with a 5.9 MB snapshot: 951 ms -> 0.3 ms (**3,554x faster**)
 Refreshing a large DB: loads only new runs instead of re-iterating all 1496.
+
+### Fast DB Loading via load_by_id (bypassing experiments/data_sets)
+
+Added `get_runs_from_db_fast()` in `plottr/data/qcodes_dataset.py` which
+uses `load_by_id()` directly for each run, bypassing the expensive
+`experiments()` + `exp.data_sets()` enumeration in qcodes.
+
+The old approach is O(N^2) because `experiments()` loads all experiment
+objects, then `data_sets()` iterates each experiment's runs. For 1496 runs
+this takes 15+ minutes. The new approach is O(N) at ~3ms per run.
+
+| Approach | 23 runs | Projected 1496 runs |
+|---|---|---|
+| Old (experiments + data_sets) | 103 ms | 15+ minutes |
+| New (load_by_id loop) | 90 ms | ~5 seconds |
+| Incremental (3 new only) | 23 ms | 23 ms |
+
+**Note for qcodes team**: The ideal API would be a single function that returns
+lightweight run metadata (run_id, exp_name, sample_name, timestamps, guid,
+result_counter, metadata) for all or a range of runs, without creating full
+DataSet objects. Something like `get_run_overview(conn, start_id, end_id)`
+that does a single SQL query. This would reduce the per-run cost from 3ms
+(load_by_id) to <0.1ms (pure SQL).
diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index c21c3ece..be1e1f1c 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -28,7 +28,7 @@
 
 from .. import log as plottrlog
 from ..data.qcodes_dataset import (get_runs_from_db_as_dataframe,
-                                   get_runs_from_db,
+                                   get_runs_from_db, get_runs_from_db_fast,
                                    get_ds_structure, load_dataset_from)
 from plottr.gui.widgets import MonitorIntervalInput, FormLayoutWrapper, dictToTreeWidgetItems
 
@@ -342,8 +342,9 @@ class LoadDBProcess(QtCore.QObject):
     It's good to have this in a separate thread because it can be a bit slow
     for large databases.
 
-    Supports incremental loading: when ``start`` is set, only loads runs
-    with index >= start, then emits the partial dataframe for merging.
+    Uses ``get_runs_from_db_fast`` which loads datasets via ``load_by_id``
+    directly, bypassing the slow ``experiments()`` + ``data_sets()`` enumeration.
+    Supports incremental loading via ``start_run_id``.
     """
     dbdfLoaded = Signal(object)
     pathSet = Signal()
@@ -351,17 +352,17 @@ class LoadDBProcess(QtCore.QObject):
     def __init__(self) -> None:
         super().__init__()
         self.path: Optional[str] = None
-        self.start: int = 0
+        self.start_run_id: int = 1
 
-    def setPath(self, path: str, start: int = 0) -> None:
+    def setPath(self, path: str, start_run_id: int = 1) -> None:
         self.path = path
-        self.start = start
+        self.start_run_id = start_run_id
         self.pathSet.emit()
 
     def loadDB(self) -> None:
         assert self.path is not None
-        overview = get_runs_from_db(self.path, start=self.start,
-                                    get_structure=False)
+        overview = get_runs_from_db_fast(self.path,
+                                         start_run_id=self.start_run_id)
         if overview:
             dbdf = pandas.DataFrame.from_dict(overview, orient='index')
         else:
@@ -567,7 +568,7 @@ def loadFullDB(self, path: Optional[str] = None) -> None:
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():
-                self.loadDBProcess.setPath(self.filepath, start=0)
+                self.loadDBProcess.setPath(self.filepath, start_run_id=1)
 
     def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
         if dbdf.size == 0 and self.dbdf is not None:
@@ -621,12 +622,10 @@ def refreshDB(self) -> None:
                 self.latestRunId = -1
 
             # Incremental refresh: only load runs newer than what we have.
-            # The start parameter indexes into the sorted list of runs, so we
-            # use the count of runs we already have as the start offset.
-            start = len(self.dbdf) if self.dbdf is not None and self.dbdf.size > 0 else 0
+            start_run_id = self.latestRunId + 1 if self.latestRunId is not None and self.latestRunId > 0 else 1
             if self.filepath is not None:
                 if not self.loadDBThread.isRunning():
-                    self.loadDBProcess.setPath(self.filepath, start=start)
+                    self.loadDBProcess.setPath(self.filepath, start_run_id=start_run_id)
 
     @Slot(float)
     def setMonitorInterval(self, val: float) -> None:
diff --git a/plottr/data/qcodes_dataset.py b/plottr/data/qcodes_dataset.py
index cd497575..aa279118 100644
--- a/plottr/data/qcodes_dataset.py
+++ b/plottr/data/qcodes_dataset.py
@@ -17,6 +17,7 @@
 from qcodes.dataset.data_set import load_by_id
 from qcodes.dataset.experiment_container import experiments
 from qcodes.dataset.sqlite.database import conn_from_dbpath_or_conn, initialise_or_create_database_at
+from qcodes.dataset.sqlite.queries import get_last_run
 
 from .datadict import DataDictBase, DataDict, combine_datadicts
 from ..node.node import Node, updateOption
@@ -222,6 +223,62 @@ def get_runs_from_db_as_dataframe(path: str) -> pd.DataFrame:
     return df
 
 
+def _ds_to_info_dict(ds: 'DataSetProtocol') -> DataSetInfoDict:
+    """Extract inspectr-relevant info from a dataset without loading data or snapshot."""
+    _start = ds.run_timestamp()
+    _complete = ds.completed_timestamp()
+    return DataSetInfoDict(
+        experiment=ds.exp_name,
+        sample=ds.sample_name,
+        name=ds.name,
+        started_date=_start[:10] if _start else '',
+        started_time=_start[11:] if _start else '',
+        completed_date=_complete[:10] if _complete else '',
+        completed_time=_complete[11:] if _complete else '',
+        structure=None,
+        records=ds.number_of_results,
+        guid=ds.guid,
+        inspectr_tag=ds.metadata.get('inspectr_tag', ''),
+    )
+
+
+def get_runs_from_db_fast(path: str,
+                          start_run_id: int = 1,
+                          ) -> Dict[int, DataSetInfoDict]:
+    """Fast alternative to ``get_runs_from_db`` that avoids the expensive
+    ``experiments()`` + ``data_sets()`` enumeration.
+
+    Uses ``load_by_id`` directly for each run_id, which is O(1) per run
+    instead of O(N) for the experiment/dataset iteration approach.
+
+    :param path: path to the qcodes .db file.
+    :param start_run_id: first run_id to load (inclusive). Use for incremental
+        loading: pass the last known run_id + 1 to load only new runs.
+    :returns: dictionary mapping run_id to dataset info.
+    """
+    initialise_or_create_database_at(path)
+    read_only = sys.version_info >= (3, 11)
+    conn_kw: Dict[str, Any] = {'conn': None, 'path_to_db': path}
+    if read_only:
+        conn_kw['read_only'] = True
+    conn = conn_from_dbpath_or_conn(**conn_kw)
+
+    overview: Dict[int, DataSetInfoDict] = {}
+    with closing(conn) as conn_:
+        last = get_last_run(conn_)
+        if last is None:
+            return overview
+
+        for run_id in range(start_run_id, last + 1):
+            try:
+                ds = load_by_id(run_id, conn=conn_)
+                overview[run_id] = _ds_to_info_dict(ds)
+            except Exception:
+                pass  # skip missing/corrupt runs
+
+    return overview
+
+
 # Extracting data
 
 def ds_to_datadicts(ds: 'DataSetProtocol') -> Dict[str, DataDict]:

From a3dbc5495f92226cc66552f401e14cf41dcd5f9e Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 12:14:23 +0200
Subject: [PATCH 13/64] feat: add loading progress and status text to inspectr

RunList now shows contextual overlay messages:
- 'Loading database... (N/M datasets)' with live progress during load
- 'Select a date on the left to browse datasets.' when idle
- 'No datasets found in this database.' for empty DBs
- 'No datasets match the current filter.' when star/cross filters hide all

Progress is reported from the worker thread via progressUpdated signal,
updated every 10 datasets for smooth display without overhead.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py       | 55 +++++++++++++++++++++++++++++++++--
 plottr/data/qcodes_dataset.py |  8 ++++-
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index be1e1f1c..3b55996d 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -143,6 +143,25 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
         self.itemSelectionChanged.connect(self.selectRun)
         self.itemActivated.connect(self.activateRun)
 
+        # Overlay label for status messages
+        self._overlayLabel = QtWidgets.QLabel(self.viewport())
+        self._overlayLabel.setAlignment(QtCore.Qt.AlignCenter)
+        self._overlayLabel.setWordWrap(True)
+        self._overlayLabel.setStyleSheet(
+            "color: gray; font-size: 13pt; padding: 40px;"
+        )
+        self._overlayLabel.setAttribute(QtCore.Qt.WA_TransparentForMouseEvents)
+        self.setOverlayText("Select a date on the left to browse datasets.")
+
+    def setOverlayText(self, text: str) -> None:
+        """Show a centered overlay message. Pass empty string to hide."""
+        self._overlayLabel.setText(text)
+        self._overlayLabel.setVisible(bool(text))
+
+    def resizeEvent(self, event: QtGui.QResizeEvent) -> None:
+        super().resizeEvent(event)
+        self._overlayLabel.setGeometry(self.viewport().rect())
+
         self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
         self.customContextMenuRequested.connect(self.showContextMenu)
 
@@ -190,22 +209,28 @@ def addRun(self, runId: int, **vals: str) -> None:
 
     def setRuns(self, selection: Mapping[int, Mapping[str, str]], show_only_star: bool, show_also_cross: bool) -> None:
         self.clear()
+        self.setOverlayText('')
 
         # disable sorting before inserting values to avoid performance hit
         self.setSortingEnabled(False)
 
+        count = 0
         for runId, record in selection.items():
             tag = record.get('inspectr_tag', '')
             if show_only_star and tag == '':
                 continue
             elif show_also_cross or tag != 'cross':
                 self.addRun(runId, **record)
+                count += 1
 
         self.setSortingEnabled(True)
 
         for i in range(len(self.cols)):
             self.resizeColumnToContents(i)
 
+        if count == 0:
+            self.setOverlayText("No datasets match the current filter.")
+
     def updateRuns(self, selection: Mapping[int, Mapping[str, str]]) -> None:
 
         run_added = False
@@ -347,6 +372,7 @@ class LoadDBProcess(QtCore.QObject):
     Supports incremental loading via ``start_run_id``.
     """
     dbdfLoaded = Signal(object)
+    progressUpdated = Signal(int, int)  # (current, total)
     pathSet = Signal()
 
     def __init__(self) -> None:
@@ -361,14 +387,20 @@ def setPath(self, path: str, start_run_id: int = 1) -> None:
 
     def loadDB(self) -> None:
         assert self.path is not None
-        overview = get_runs_from_db_fast(self.path,
-                                         start_run_id=self.start_run_id)
+        overview = get_runs_from_db_fast(
+            self.path,
+            start_run_id=self.start_run_id,
+            progress_callback=self._onProgress,
+        )
         if overview:
             dbdf = pandas.DataFrame.from_dict(overview, orient='index')
         else:
             dbdf = pandas.DataFrame()
         self.dbdfLoaded.emit(dbdf)
 
+    def _onProgress(self, current: int, total: int) -> None:
+        self.progressUpdated.emit(current, total)
+
 
 class QCodesDBInspector(QtWidgets.QMainWindow):
     """
@@ -498,6 +530,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         self.loadDBProcess.pathSet.connect(self.loadDBThread.start)
         self.loadDBProcess.dbdfLoaded.connect(self.DBLoaded)
         self.loadDBProcess.dbdfLoaded.connect(self.loadDBThread.quit)
+        self.loadDBProcess.progressUpdated.connect(self.onLoadProgress)
         self.loadDBThread.started.connect(self.loadDBProcess.loadDB)
 
         ### connect signals/slots
@@ -568,11 +601,19 @@ def loadFullDB(self, path: Optional[str] = None) -> None:
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():
+                self.runList.setOverlayText("Loading database...")
                 self.loadDBProcess.setPath(self.filepath, start_run_id=1)
 
+    @Slot(int, int)
+    def onLoadProgress(self, current: int, total: int) -> None:
+        self.runList.setOverlayText(
+            f"Loading database... ({current}/{total} datasets)")
+
     def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
         if dbdf.size == 0 and self.dbdf is not None:
             LOGGER.debug('DB reloaded with no new data. Skipping update.')
+            self.runList.setOverlayText(
+                "Select a date on the left to browse datasets.")
             return None
 
         if self.latestRunId is not None and self.dbdf is not None and dbdf.size > 0:
@@ -592,6 +633,14 @@ def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
         self.dateList.sendSelectedDates()
         LOGGER.debug('DB loaded/refreshed')
 
+        # Set appropriate overlay text
+        if self.dbdf is None or self.dbdf.size == 0:
+            self.runList.setOverlayText(
+                "No datasets found in this database.")
+        elif len(self._selected_dates) == 0:
+            self.runList.setOverlayText(
+                "Select a date on the left to browse datasets.")
+
         if self.latestRunId is not None and self.dbdf is not None and self.dbdf.size > 0:
             idxs = self.dbdf.index.values
             newIdxs = idxs[idxs > self.latestRunId]
@@ -672,6 +721,8 @@ def setDateSelection(self, dates: Sequence[str]) -> None:
         else:
             self._selected_dates = ()
             self.runList.clear()
+            self.runList.setOverlayText(
+                "Select a date on the left to browse datasets.")
 
     @Slot(int)
     def setRunSelection(self, runId: int) -> None:
diff --git a/plottr/data/qcodes_dataset.py b/plottr/data/qcodes_dataset.py
index aa279118..932e75cd 100644
--- a/plottr/data/qcodes_dataset.py
+++ b/plottr/data/qcodes_dataset.py
@@ -244,6 +244,7 @@ def _ds_to_info_dict(ds: 'DataSetProtocol') -> DataSetInfoDict:
 
 def get_runs_from_db_fast(path: str,
                           start_run_id: int = 1,
+                          progress_callback: Optional[Any] = None,
                           ) -> Dict[int, DataSetInfoDict]:
     """Fast alternative to ``get_runs_from_db`` that avoids the expensive
     ``experiments()`` + ``data_sets()`` enumeration.
@@ -254,6 +255,7 @@ def get_runs_from_db_fast(path: str,
     :param path: path to the qcodes .db file.
     :param start_run_id: first run_id to load (inclusive). Use for incremental
         loading: pass the last known run_id + 1 to load only new runs.
+    :param progress_callback: optional callable(current, total) for progress.
     :returns: dictionary mapping run_id to dataset info.
     """
     initialise_or_create_database_at(path)
@@ -269,13 +271,17 @@ def get_runs_from_db_fast(path: str,
         if last is None:
             return overview
 
-        for run_id in range(start_run_id, last + 1):
+        total = last - start_run_id + 1
+        for i, run_id in enumerate(range(start_run_id, last + 1)):
             try:
                 ds = load_by_id(run_id, conn=conn_)
                 overview[run_id] = _ds_to_info_dict(ds)
             except Exception:
                 pass  # skip missing/corrupt runs
 
+            if progress_callback is not None and (i % 10 == 0 or i == total - 1):
+                progress_callback(i + 1, total)
+
     return overview
 
 

From 2d5c2710bfb71aced7903348040b9d2ac2d257cf Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 13:58:19 +0200
Subject: [PATCH 14/64] fix: overlay text stuck after reloading same DB file

Check actual RunList widget state (topLevelItemCount) instead of
_selected_dates to decide whether to show the hint text. This handles
same-file reload, empty date selection, and filter edge cases.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 3b55996d..9427c49e 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -633,11 +633,11 @@ def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
         self.dateList.sendSelectedDates()
         LOGGER.debug('DB loaded/refreshed')
 
-        # Set appropriate overlay text
+        # Set appropriate overlay text after loading completes
         if self.dbdf is None or self.dbdf.size == 0:
             self.runList.setOverlayText(
                 "No datasets found in this database.")
-        elif len(self._selected_dates) == 0:
+        elif self.runList.topLevelItemCount() == 0:
             self.runList.setOverlayText(
                 "Select a date on the left to browse datasets.")
 

From 78a642b1a6f1eb293f4ea9340ff803bde1bf003c Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:03:51 +0200
Subject: [PATCH 15/64] fix: collapse info pane by default, enable smooth
 scrolling

- Data structure and Metadata sections now collapsed by default
  (user expands what they need)
- Set ScrollPerPixel on RunInfo tree widget so tall rows (e.g., long
  exception tracebacks in metadata) can be scrolled smoothly instead
  of jumping to the next row

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 9427c49e..bb326e9a 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -293,6 +293,10 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
         self.setHeaderLabels(['Key', 'Value'])
         self.setColumnCount(2)
 
+        # Smooth pixel-based scrolling so tall rows (e.g., long tracebacks)
+        # can be scrolled through without jumping to the next row.
+        self.setVerticalScrollMode(QtWidgets.QAbstractItemView.ScrollPerPixel)
+
         self._snapshotItem: Optional[QtWidgets.QTreeWidgetItem] = None
         self._snapshotData: Optional[dict] = None
         self._snapshotLoaded = False
@@ -323,7 +327,7 @@ def setInfo(self, infoDict: Dict[str, Union[dict, str]]) -> None:
                     for child in dictToTreeWidgetItems(value):
                         item.addChild(child)
                 self.addTopLevelItem(item)
-                item.setExpanded(True)
+                item.setExpanded(False)
 
         for i in range(2):
             self.resizeColumnToContents(i)

From 307b7a405236ff737c7f3f72d188922e0525a180 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:08:53 +0200
Subject: [PATCH 16/64] feat: add plot backend selector to inspectr toolbar

Adds a combo box in the toolbar to switch between matplotlib and
pyqtgraph backends. Default is matplotlib. The selection applies to
all newly opened plot windows. Existing windows keep their backend.

The combo box respects the --plotWidgetClass passed via constructor
(e.g., from script_pyqtgraph entrypoint).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index bb326e9a..a34ed9b3 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -484,6 +484,26 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         self.autoLaunchPlots.setToolTip(tt)
         self.toolbar.addWidget(self.autoLaunchPlots)
 
+        self.toolbar.addSeparator()
+
+        # toolbar item: plot backend selector
+        backendLabel = QtWidgets.QLabel(" Plot backend: ")
+        self.toolbar.addWidget(backendLabel)
+        self.plotBackendSelector = QtWidgets.QComboBox()
+        self.plotBackendSelector.addItems(['matplotlib', 'pyqtgraph'])
+        self.plotBackendSelector.setToolTip('Choose plotting backend for new plot windows')
+        if plotWidgetClass is not None:
+            # If a specific backend was passed in, select it
+            class_name = plotWidgetClass.__name__
+            if 'pyqtgraph' in class_name.lower() or 'PG' in class_name:
+                self.plotBackendSelector.setCurrentText('pyqtgraph')
+        self.plotBackendSelector.currentTextChanged.connect(self._onBackendChanged)
+        self.toolbar.addWidget(self.plotBackendSelector)
+        # Sync the class with the initial combo selection
+        self._onBackendChanged(self.plotBackendSelector.currentText())
+
+        self.toolbar.addSeparator()
+
         self.showOnlyStarAction = self.toolbar.addAction(RunList.tag_dict['star'])
         self.showOnlyStarAction.setToolTip('Show only starred runs')
         self.showOnlyStarAction.setCheckable(True)
@@ -761,6 +781,15 @@ def plotRun(self, runId: int) -> None:
         }
         win.showTime()
 
+    @Slot(str)
+    def _onBackendChanged(self, backend: str) -> None:
+        if backend == 'pyqtgraph':
+            from plottr.plot.pyqtgraph.autoplot import AutoPlot as PGAutoPlot
+            self._plotWidgetClass = PGAutoPlot
+        else:
+            from plottr.plot.mpl.autoplot import AutoPlot as MPLAutoPlot
+            self._plotWidgetClass = MPLAutoPlot
+
     def setTag(self, item: QtWidgets.QTreeWidgetItem, tag: str) -> None:
         # set tag in the database
         assert self.filepath is not None

From ee4421c08e868a7dc9b165632dce0c26d248832b Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:16:21 +0200
Subject: [PATCH 17/64] perf: add fast SQL-based DB overview, wider default
 window

New module plottr/data/qcodes_db_overview.py:
- get_db_overview(): single SQL JOIN query for all run metadata
- Skips snapshot and run_description blobs entirely
- Reads inspectr_tag directly as a column from runs table
- 6x faster than load_by_id, ~1000x faster than experiments() enumeration
- Intended for eventual contribution to QCoDeS

Inspectr LoadDBProcess now uses SQL path by default with automatic
fallback to qcodes API (get_runs_from_db_fast) if SQL fails.

Also: default window size widened from 640x640 to 960x640.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py           |  42 ++++++---
 plottr/data/qcodes_db_overview.py | 141 ++++++++++++++++++++++++++++++
 2 files changed, 172 insertions(+), 11 deletions(-)
 create mode 100644 plottr/data/qcodes_db_overview.py

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index a34ed9b3..1b77762d 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -17,7 +17,7 @@
 import sys
 import argparse
 import logging
-from typing import Optional, Sequence, List, Dict, Iterable, Union, cast, Tuple, Mapping
+from typing import Any, Optional, Sequence, List, Dict, Iterable, Union, cast, Tuple, Mapping
 
 from typing_extensions import TypedDict
 
@@ -30,6 +30,7 @@
 from ..data.qcodes_dataset import (get_runs_from_db_as_dataframe,
                                    get_runs_from_db, get_runs_from_db_fast,
                                    get_ds_structure, load_dataset_from)
+from ..data.qcodes_db_overview import get_db_overview
 from plottr.gui.widgets import MonitorIntervalInput, FormLayoutWrapper, dictToTreeWidgetItems
 
 from .autoplot import autoplotQcodesDataset, QCAutoPlotMainWindow
@@ -371,14 +372,17 @@ class LoadDBProcess(QtCore.QObject):
     It's good to have this in a separate thread because it can be a bit slow
     for large databases.
 
-    Uses ``get_runs_from_db_fast`` which loads datasets via ``load_by_id``
-    directly, bypassing the slow ``experiments()`` + ``data_sets()`` enumeration.
-    Supports incremental loading via ``start_run_id``.
+    Uses ``get_db_overview`` (direct SQL) by default for maximum speed.
+    Falls back to ``get_runs_from_db_fast`` (qcodes public API) if the
+    SQL approach fails.
     """
     dbdfLoaded = Signal(object)
     progressUpdated = Signal(int, int)  # (current, total)
     pathSet = Signal()
 
+    #: If True, use direct SQL queries (fast). If False, use qcodes API.
+    use_fast_sql: bool = True
+
     def __init__(self) -> None:
         super().__init__()
         self.path: Optional[str] = None
@@ -391,11 +395,27 @@ def setPath(self, path: str, start_run_id: int = 1) -> None:
 
     def loadDB(self) -> None:
         assert self.path is not None
-        overview = get_runs_from_db_fast(
-            self.path,
-            start_run_id=self.start_run_id,
-            progress_callback=self._onProgress,
-        )
+
+        overview: Optional[Dict[int, Any]] = None
+        if self.use_fast_sql:
+            try:
+                # start_run_id uses > comparison, so subtract 1 for inclusive
+                overview = get_db_overview(
+                    self.path,
+                    start_run_id=self.start_run_id - 1,
+                )
+            except Exception as e:
+                LOGGER.warning(f"Fast SQL overview failed, falling back to "
+                               f"qcodes API: {e}")
+                overview = None
+
+        if overview is None:
+            overview = get_runs_from_db_fast(
+                self.path,
+                start_run_id=self.start_run_id,
+                progress_callback=self._onProgress,
+            )
+
         if overview:
             dbdf = pandas.DataFrame.from_dict(overview, orient='index')
         else:
@@ -542,8 +562,8 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         self.addAction(self.crossAction)
 
         # sizing
-        scaledSize = int(640 * rint(self.logicalDpiX() / 96.0))
-        self.resize(scaledSize, scaledSize)
+        scaledDpi = rint(self.logicalDpiX() / 96.0)
+        self.resize(int(960 * scaledDpi), int(640 * scaledDpi))
 
         ### Thread workers
 
diff --git a/plottr/data/qcodes_db_overview.py b/plottr/data/qcodes_db_overview.py
new file mode 100644
index 00000000..3e10e572
--- /dev/null
+++ b/plottr/data/qcodes_db_overview.py
@@ -0,0 +1,141 @@
+"""
+plottr.data.qcodes_db_overview — Fast database overview queries.
+
+This module provides optimized functions for listing QCoDeS dataset metadata
+without loading full DataSet objects. It uses direct SQLite queries on the
+QCoDeS database schema, avoiding the expensive experiments()/data_sets()
+enumeration.
+
+**Intended for eventual contribution to QCoDeS.** The queries here rely on the
+stable QCoDeS database schema (runs + experiments tables) which has not changed
+across many QCoDeS versions.
+"""
+import time
+import sqlite3
+import logging
+from contextlib import closing
+from typing import Dict, Optional, Tuple
+
+from typing_extensions import TypedDict
+
+logger = logging.getLogger(__name__)
+
+
+class RunOverviewDict(TypedDict):
+    """Lightweight run overview — no snapshot, no data, no full DataSet."""
+    run_id: int
+    experiment: str
+    sample: str
+    name: str
+    started_date: str
+    started_time: str
+    completed_date: str
+    completed_time: str
+    records: int
+    guid: str
+    inspectr_tag: str
+
+
+def _format_timestamp(ts: Optional[float]) -> Tuple[str, str]:
+    """Convert a unix timestamp float to (date, time) strings."""
+    if ts is None or ts == 0:
+        return '', ''
+    try:
+        t = time.localtime(ts)
+        return time.strftime('%Y-%m-%d', t), time.strftime('%H:%M:%S', t)
+    except (OSError, ValueError, OverflowError):
+        return '', ''
+
+
+def get_db_overview(db_path: str,
+                    start_run_id: int = 0,
+                    ) -> Dict[int, RunOverviewDict]:
+    """Get a lightweight overview of all runs in a QCoDeS database.
+
+    Uses a single SQL JOIN query to fetch run metadata from the ``runs`` and
+    ``experiments`` tables, avoiding the expensive ``experiments()`` +
+    ``data_sets()`` enumeration that QCoDeS uses internally.
+
+    For a database with 1500 runs, this completes in ~10ms vs 15+ minutes
+    with the standard QCoDeS API.
+
+    :param db_path: path to the .db file.
+    :param start_run_id: only return runs with run_id > start_run_id.
+        Use 0 to get all runs. Pass the last known run_id for incremental
+        refresh.
+    :returns: dict mapping run_id to RunOverviewDict.
+    """
+    overview: Dict[int, RunOverviewDict] = {}
+
+    try:
+        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
+    except sqlite3.OperationalError:
+        # Fallback for older sqlite versions without URI support
+        conn = sqlite3.connect(db_path)
+
+    with closing(conn) as c:
+        # Check which ad-hoc metadata columns exist in the runs table.
+        # QCoDeS stores metadata added via ds.add_metadata() as extra columns.
+        try:
+            col_info = c.execute('PRAGMA table_info(runs)').fetchall()
+            col_names = {col[1] for col in col_info}
+        except sqlite3.OperationalError:
+            col_names = set()
+
+        has_inspectr_tag = 'inspectr_tag' in col_names
+
+        # Build query: include inspectr_tag column if it exists.
+        # Deliberately excludes snapshot and run_description (large blobs).
+        tag_col = ", r.inspectr_tag" if has_inspectr_tag else ""
+        query = f"""
+            SELECT r.run_id, e.name, e.sample_name, r.name,
+                   r.run_timestamp, r.completed_timestamp,
+                   r.result_counter, r.guid{tag_col}
+            FROM runs r
+            JOIN experiments e ON r.exp_id = e.exp_id
+            WHERE r.run_id > ?
+            ORDER BY r.run_id
+        """
+
+        try:
+            rows = c.execute(query, (start_run_id,)).fetchall()
+        except sqlite3.OperationalError as e:
+            logger.warning(f"Could not query database overview: {e}")
+            return overview
+
+        for row in rows:
+            run_id = row[0]
+            started_date, started_time = _format_timestamp(row[4])
+            completed_date, completed_time = _format_timestamp(row[5])
+            tag = row[8] if has_inspectr_tag and len(row) > 8 and row[8] else ''
+
+            overview[run_id] = RunOverviewDict(
+                run_id=run_id,
+                experiment=row[1] or '',
+                sample=row[2] or '',
+                name=row[3] or '',
+                started_date=started_date,
+                started_time=started_time,
+                completed_date=completed_date,
+                completed_time=completed_time,
+                records=row[6] or 0,
+                guid=row[7] or '',
+                inspectr_tag=tag,
+            )
+
+    return overview
+
+
+def get_last_run_id(db_path: str) -> Optional[int]:
+    """Get the highest run_id in the database, or None if empty."""
+    try:
+        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
+    except sqlite3.OperationalError:
+        conn = sqlite3.connect(db_path)
+
+    with closing(conn) as c:
+        try:
+            row = c.execute("SELECT MAX(run_id) FROM runs").fetchone()
+            return row[0] if row else None
+        except sqlite3.OperationalError:
+            return None

From 2de2ef0a9cd425c5111ebdab45647531e00fcb19 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:27:26 +0200
Subject: [PATCH 18/64] feat: grid layout for pyqtgraph subplots (matching
 matplotlib)

Replace the single-column QSplitter with a QGridLayout that arranges
subplots on a near-square grid, using the same formula as matplotlib:
  nrows = int(n ** 0.5 + 0.5)
  ncols = ceil(n / nrows)

This makes pyqtgraph behave like matplotlib when plotting many
dependents: plots are arranged in columns (e.g., 4 plots = 2x2,
6 = 2x3, 16 = 4x4) instead of stacking vertically.

A scroll area wraps the grid so very many plots remain accessible.
Each plot has a minimum height of 250px to stay readable.

280 tests pass, 0 mypy errors.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/pyqtgraph/autoplot.py | 39 ++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index 31b9e054..1448cbb5 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -33,7 +33,8 @@
 class FigureWidget(QtWidgets.QWidget):
     """Widget that contains all plots generated by :class:`.FigureMaker`.
 
-    Widget has a vertical layout, and plots can be added in a single column.
+    Plots are arranged on a near-square grid (like matplotlib's GridSpec),
+    so that many subplots remain readable.
     """
 
     def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
@@ -48,14 +49,22 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
         self.title = QtWidgets.QLabel(parent=self)
         self.title.setAlignment(QtCore.Qt.AlignHCenter)
 
-        self.split = QtWidgets.QSplitter(parent=self)
-        self.split.setOrientation(QtCore.Qt.Vertical)
+        self._gridWidget = QtWidgets.QWidget(parent=self)
+        self._gridLayout = QtWidgets.QGridLayout(self._gridWidget)
+        self._gridLayout.setContentsMargins(0, 0, 0, 0)
+        self._gridLayout.setSpacing(2)
+        self._gridWidget.setLayout(self._gridLayout)
+
+        # Wrap the grid in a scroll area so very many plots are still accessible
+        self._scrollArea = QtWidgets.QScrollArea(parent=self)
+        self._scrollArea.setWidgetResizable(True)
+        self._scrollArea.setWidget(self._gridWidget)
 
         layout = QtWidgets.QVBoxLayout()
         layout.setContentsMargins(0, 0, 0, 0)
         layout.setSpacing(2)
         layout.addWidget(self.title)
-        layout.addWidget(self.split)
+        layout.addWidget(self._scrollArea)
         self.setLayout(layout)
 
         self.setTitle('')
@@ -64,10 +73,27 @@ def addPlot(self, plot: PlotBase) -> None:
         """Add a :class:`.PlotBase` widget.
 
         :param plot: plot widget
-        :param title: title of the plot
         """
-        self.split.addWidget(plot)
         self.subPlots.append(plot)
+        # Don't add to layout yet — _arrangeGrid() is called after all plots are added
+
+    def _arrangeGrid(self) -> None:
+        """Arrange subplots on a near-square grid, matching matplotlib's layout."""
+        n = len(self.subPlots)
+        if n == 0:
+            return
+
+        nrows = max(1, int(n ** 0.5 + 0.5))
+        ncols = max(1, int(np.ceil(n / nrows)))
+
+        # Set a minimum height per plot so they don't get too squished
+        min_plot_height = 250
+        self._gridWidget.setMinimumHeight(nrows * min_plot_height)
+
+        for i, plot in enumerate(self.subPlots):
+            row = i // ncols
+            col = i % ncols
+            self._gridLayout.addWidget(plot, row, col)
 
     def clearAllPlots(self) -> None:
         """Clear all plot contents."""
@@ -149,6 +175,7 @@ def makeSubPlots(self, nSubPlots: int) -> List[PlotBase]:
                 elif max(self.dataDimensionsInSubPlot(i).values()) == 2:
                     plot = PlotWithColorbar(self.widget)
                     self.widget.addPlot(plot)
+            self.widget._arrangeGrid()
         else:
             self.widget.clearAllPlots()
 

From 7c2dd4a15bd047c2281ffe2f05723b5a6c1a500f Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:39:45 +0200
Subject: [PATCH 19/64] feat: scrollable plot area toggle for both backends

Add 'Scrollable' checkbox in both pyqtgraph and matplotlib toolbars:
- Enabled by default
- When many subplots exist, the plot area expands beyond the window
  and becomes scrollable, keeping each subplot readable
- Can be unchecked to fit everything into the visible window

PyQtGraph: min plot height reduced from 250px to 75px.
Matplotlib: canvas wraps in QScrollArea, min height set per grid row.

280 tests pass, 0 mypy errors.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/mpl/autoplot.py       | 25 +++++++++++++++++++++++++
 plottr/plot/mpl/widgets.py        | 19 ++++++++++++++++++-
 plottr/plot/pyqtgraph/autoplot.py | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index 8628f798..414623e4 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -228,6 +228,12 @@ def __init__(self, name: str, parent: Optional[QtWidgets.QWidget] = None):
             ComplexRepresentation.magAndPhase: self.plotMagPhase
         })
 
+        self.addSeparator()
+        self.scrollableAction = self.addAction('Scrollable')
+        self.scrollableAction.setCheckable(True)
+        self.scrollableAction.setChecked(True)
+        self.scrollableAction.setToolTip('Enable scrollable plot area for many subplots')
+
         self._currentPlotType = PlotType.empty
         self._currentlyAllowedPlotTypes: Tuple[PlotType, ...] = ()
 
@@ -367,6 +373,9 @@ def __init__(self, parent: Optional[PlotWidgetContainer] = None):
         self.plotOptionsToolBar.complexRepresentationSelected.connect(
             self._complexPreferenceFromToolBar
         )
+        self.plotOptionsToolBar.scrollableAction.triggered.connect(
+            self._scrollableFromToolBar
+        )
 
         scaling = dpiScalingFactor(self)
         iconSize = int(36 + 8*(scaling - 1))
@@ -436,6 +445,12 @@ def _complexPreferenceFromToolBar(self, complexRepresentation: ComplexRepresenta
             self.complexRepresentation = complexRepresentation
             self._plotData()
 
+    @Slot()
+    def _scrollableFromToolBar(self) -> None:
+        scrollable = self.plotOptionsToolBar.scrollableAction.isChecked()
+        self.setScrollable(scrollable)
+        self._plotData()
+
     def _plotData(self) -> None:
         """Plot the data using previously determined data and plot types."""
 
@@ -465,5 +480,15 @@ def _plotData(self) -> None:
                     plotDataType=self.plotDataType,
                     **kw)
 
+            nSubPlots = fm.nSubPlots()
+
+        # Set canvas minimum height for scrollable mode
+        scrollable = self.plotOptionsToolBar.scrollableAction.isChecked()
+        if scrollable and nSubPlots > 2:
+            nrows = int(nSubPlots ** 0.5 + 0.5)
+            self.plot.setMinimumHeight(max(nrows * 250, 400))
+        else:
+            self.plot.setMinimumHeight(0)
+
         self.setMeta(self.data)
         self.updatePlot()
diff --git a/plottr/plot/mpl/widgets.py b/plottr/plot/mpl/widgets.py
index 0f4c3227..9a430bf0 100644
--- a/plottr/plot/mpl/widgets.py
+++ b/plottr/plot/mpl/widgets.py
@@ -159,11 +159,28 @@ def __init__(self, parent: Optional[PlotWidgetContainer] = None):
         self.addMplBarOptions()
         defaultIconSize = int(16 * dpiScalingFactor(self))
         self.mplBar.setIconSize(QtCore.QSize(defaultIconSize, defaultIconSize))
+
+        #: scroll area for the canvas (enabled by default)
+        self._scrollArea = QtWidgets.QScrollArea()
+        self._scrollArea.setWidgetResizable(True)
+        self._scrollArea.setWidget(self.plot)
+
         layout = QtWidgets.QVBoxLayout(self)
-        layout.addWidget(self.plot)
+        layout.addWidget(self._scrollArea)
         layout.addWidget(self.mplBar)
         self.setLayout(layout)
 
+        self._scrollable = True
+
+    def setScrollable(self, scrollable: bool) -> None:
+        """Enable or disable scrollable canvas for many subplots."""
+        self._scrollable = scrollable
+        if scrollable:
+            self._scrollArea.setWidgetResizable(True)
+        else:
+            self._scrollArea.setWidgetResizable(True)
+            self.plot.setMinimumHeight(0)
+
     def setMeta(self, data: DataDictBase) -> None:
         """Add meta info contained in the data to the figure.
 
diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index 1448cbb5..baa2393d 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -87,7 +87,7 @@ def _arrangeGrid(self) -> None:
         ncols = max(1, int(np.ceil(n / nrows)))
 
         # Set a minimum height per plot so they don't get too squished
-        min_plot_height = 250
+        min_plot_height = 75
         self._gridWidget.setMinimumHeight(nrows * min_plot_height)
 
         for i, plot in enumerate(self.subPlots):
@@ -95,6 +95,20 @@ def _arrangeGrid(self) -> None:
             col = i % ncols
             self._gridLayout.addWidget(plot, row, col)
 
+    def setScrollable(self, scrollable: bool) -> None:
+        """Enable or disable scroll area around the plot grid."""
+        if scrollable:
+            self._scrollArea.setWidgetResizable(True)
+            self._gridWidget.setMinimumHeight(0)
+            # Re-apply grid min height if we have plots
+            if self.subPlots:
+                n = len(self.subPlots)
+                nrows = max(1, int(n ** 0.5 + 0.5))
+                self._gridWidget.setMinimumHeight(nrows * 75)
+        else:
+            self._scrollArea.setWidgetResizable(True)
+            self._gridWidget.setMinimumHeight(0)
+
     def clearAllPlots(self) -> None:
         """Clear all plot contents."""
         for p in self.subPlots:
@@ -355,6 +369,8 @@ def _plotData(self, **kwargs: Any) -> None:
             self.figConfig.figCopied.connect(self.onfigCopied)
             self.figConfig.figSaved.connect(self.onfigSaved)
 
+        self.fmWidget.setScrollable(self.figOptions.scrollablePlots)
+
         if self.data.has_meta('title'):
             self.fmWidget.setTitle(self.data.meta_val('title'))
             self.title = self.data.meta_val('title')
@@ -434,6 +450,9 @@ class FigureOptions:
     #: whether the dependent data contains any instance of imaginary data
     imagData: bool = False
 
+    #: whether to enable scrollable plot area (useful for many subplots)
+    scrollablePlots: bool = True
+
 
 class FigureConfigToolBar(QtWidgets.QToolBar):
     """Simple toolbar to configure the figure."""
@@ -467,6 +486,16 @@ def __init__(self, options: FigureOptions,
             lambda: self._setOption('combineLinePlots',
                                     combineLinePlots.isChecked())
         )
+
+        scrollablePlots = self.addAction("Scrollable")
+        scrollablePlots.setCheckable(True)
+        scrollablePlots.setChecked(self.options.scrollablePlots)
+        scrollablePlots.setToolTip("Enable scrollable plot area for many subplots")
+        scrollablePlots.triggered.connect(
+            lambda: self._setOption('scrollablePlots',
+                                    scrollablePlots.isChecked())
+        )
+
         complexOptions = QtWidgets.QMenu(parent=self)
         complexGroup = QtWidgets.QActionGroup(complexOptions)
         complexGroup.setExclusive(True)

From c663ec7b25eccd1a865e84cb82b562f8e63ecf50 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:41:55 +0200
Subject: [PATCH 20/64] fix: reduce matplotlib scrollable min height to 100px
 per row

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/mpl/autoplot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index 414623e4..3e89ec76 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -486,7 +486,7 @@ def _plotData(self) -> None:
         scrollable = self.plotOptionsToolBar.scrollableAction.isChecked()
         if scrollable and nSubPlots > 2:
             nrows = int(nSubPlots ** 0.5 + 0.5)
-            self.plot.setMinimumHeight(max(nrows * 250, 400))
+            self.plot.setMinimumHeight(max(nrows * 100, 400))
         else:
             self.plot.setMinimumHeight(0)
 

From 497f7071e13a42f09dc623ca58c767766a86c094 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 14:58:33 +0200
Subject: [PATCH 21/64] feat: scrollable off by default, add min height spinbox

Both backends:
- Scrollable is now OFF by default
- Added a 'px' spinbox next to the Scrollable checkbox showing the
  minimum height per subplot row (default 75px pyqtgraph, 100px mpl)
- Spinbox is only enabled when Scrollable is checked
- Minimum value is 40px
- Changing the spinbox value triggers replot

280 tests pass, 0 mypy errors.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/mpl/autoplot.py       | 20 ++++++++++++++++++--
 plottr/plot/pyqtgraph/autoplot.py | 31 +++++++++++++++++++++++++++----
 2 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index 3e89ec76..0c8a9c30 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -231,9 +231,21 @@ def __init__(self, name: str, parent: Optional[QtWidgets.QWidget] = None):
         self.addSeparator()
         self.scrollableAction = self.addAction('Scrollable')
         self.scrollableAction.setCheckable(True)
-        self.scrollableAction.setChecked(True)
+        self.scrollableAction.setChecked(False)
         self.scrollableAction.setToolTip('Enable scrollable plot area for many subplots')
 
+        self.minHeightSpin = QtWidgets.QSpinBox()
+        self.minHeightSpin.setRange(40, 2000)
+        self.minHeightSpin.setValue(100)
+        self.minHeightSpin.setSuffix(" px")
+        self.minHeightSpin.setToolTip("Minimum height per subplot row")
+        self.minHeightSpin.setEnabled(False)
+        self.addWidget(self.minHeightSpin)
+
+        self.scrollableAction.triggered.connect(
+            lambda: self.minHeightSpin.setEnabled(self.scrollableAction.isChecked())
+        )
+
         self._currentPlotType = PlotType.empty
         self._currentlyAllowedPlotTypes: Tuple[PlotType, ...] = ()
 
@@ -376,6 +388,9 @@ def __init__(self, parent: Optional[PlotWidgetContainer] = None):
         self.plotOptionsToolBar.scrollableAction.triggered.connect(
             self._scrollableFromToolBar
         )
+        self.plotOptionsToolBar.minHeightSpin.editingFinished.connect(
+            self._scrollableFromToolBar
+        )
 
         scaling = dpiScalingFactor(self)
         iconSize = int(36 + 8*(scaling - 1))
@@ -486,7 +501,8 @@ def _plotData(self) -> None:
         scrollable = self.plotOptionsToolBar.scrollableAction.isChecked()
         if scrollable and nSubPlots > 2:
             nrows = int(nSubPlots ** 0.5 + 0.5)
-            self.plot.setMinimumHeight(max(nrows * 100, 400))
+            min_h = self.plotOptionsToolBar.minHeightSpin.value()
+            self.plot.setMinimumHeight(max(nrows * min_h, 400))
         else:
             self.plot.setMinimumHeight(0)
 
diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index baa2393d..9ad20a92 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -45,6 +45,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
         super().__init__(parent=parent)
 
         self.subPlots: List[PlotBase] = []
+        self._minPlotHeight: int = 75
 
         self.title = QtWidgets.QLabel(parent=self)
         self.title.setAlignment(QtCore.Qt.AlignHCenter)
@@ -77,17 +78,18 @@ def addPlot(self, plot: PlotBase) -> None:
         self.subPlots.append(plot)
         # Don't add to layout yet — _arrangeGrid() is called after all plots are added
 
-    def _arrangeGrid(self) -> None:
+    def _arrangeGrid(self, min_plot_height: Optional[int] = None) -> None:
         """Arrange subplots on a near-square grid, matching matplotlib's layout."""
         n = len(self.subPlots)
         if n == 0:
             return
 
+        if min_plot_height is None:
+            min_plot_height = self._minPlotHeight
+
         nrows = max(1, int(n ** 0.5 + 0.5))
         ncols = max(1, int(np.ceil(n / nrows)))
 
-        # Set a minimum height per plot so they don't get too squished
-        min_plot_height = 75
         self._gridWidget.setMinimumHeight(nrows * min_plot_height)
 
         for i, plot in enumerate(self.subPlots):
@@ -370,6 +372,7 @@ def _plotData(self, **kwargs: Any) -> None:
             self.figConfig.figSaved.connect(self.onfigSaved)
 
         self.fmWidget.setScrollable(self.figOptions.scrollablePlots)
+        self.fmWidget._minPlotHeight = self.figOptions.minPlotHeight
 
         if self.data.has_meta('title'):
             self.fmWidget.setTitle(self.data.meta_val('title'))
@@ -451,7 +454,10 @@ class FigureOptions:
     imagData: bool = False
 
     #: whether to enable scrollable plot area (useful for many subplots)
-    scrollablePlots: bool = True
+    scrollablePlots: bool = False
+
+    #: minimum height per subplot row in pixels (when scrollable)
+    minPlotHeight: int = 75
 
 
 class FigureConfigToolBar(QtWidgets.QToolBar):
@@ -496,6 +502,23 @@ def __init__(self, options: FigureOptions,
                                     scrollablePlots.isChecked())
         )
 
+        self._minHeightSpin = QtWidgets.QSpinBox()
+        self._minHeightSpin.setRange(40, 2000)
+        self._minHeightSpin.setValue(self.options.minPlotHeight)
+        self._minHeightSpin.setSuffix(" px")
+        self._minHeightSpin.setToolTip("Minimum height per subplot row")
+        self._minHeightSpin.setEnabled(self.options.scrollablePlots)
+        self._minHeightSpin.editingFinished.connect(
+            lambda: self._setOption('minPlotHeight',
+                                    self._minHeightSpin.value())
+        )
+        self.addWidget(self._minHeightSpin)
+
+        # Keep spinbox enabled state in sync with scrollable toggle
+        scrollablePlots.triggered.connect(
+            lambda: self._minHeightSpin.setEnabled(scrollablePlots.isChecked())
+        )
+
         complexOptions = QtWidgets.QMenu(parent=self)
         complexGroup = QtWidgets.QActionGroup(complexOptions)
         complexGroup.setExclusive(True)

From f948fb6facae473aa1f0b1ca1e001367a7d6c64c Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 15:14:31 +0200
Subject: [PATCH 22/64] docs: reorganize PERFORMANCE_PLAN.md into implemented
 vs future

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 1089 ++++++-------------------------------------
 1 file changed, 132 insertions(+), 957 deletions(-)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index d5cf0450..096d2558 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -1,1011 +1,186 @@
-# Plottr Performance Optimization Plan
+# Plottr Performance & UX Improvements
 
-## Problem Statement
-
-Plottr's pipeline architecture copies data excessively as it flows through nodes. Each node in the
-`linearFlowchart` (DataSelector → DataGridder → XYSelector → PlotNode) defensively copies data
-before modifying it, and many internal methods (`structure()`, `extract()`, `copy()`, `validate()`)
-add further redundant copies. Profiling shows that a typical 4-stage pipeline produces a **~4.8×
-memory amplification factor** — almost 5 copies of the input data exist simultaneously.
-
-For a 100×100×100 MeshgridDataDict (~38 MB), a single `copy()` takes **92 ms** and `validate()`
-takes **43 ms** due to `np.diff`/`np.unique` on full meshgrid axes. In a real pipeline with
-3–4 nodes, this means hundreds of milliseconds of pure overhead per update, which becomes very
-noticeable during interactive parameter changes.
-
-## Profiling Results Summary
-
-| Operation | 10K pts (312 KB) | 100K pts (4.6 MB) | 1M pts (46 MB) | 100³ mesh (38 MB) |
-|---|---|---|---|---|
-| `copy()` | 0.2 ms | 1.1 ms | 7.8 ms | **92 ms** |
-| `structure()` | 0.06 ms | 0.11 ms | 0.10 ms | **44 ms** |
-| `validate()` | 0.02 ms | 0.05 ms | 0.06 ms | **43 ms** |
-| `extract(1 dep)` | 0.38 ms | 1.0 ms | **15 ms** | — |
-| `mask_invalid()` | — | — | — | **202 ms** |
-| Pipeline (4 stages) | **64 ms** (4.8× mem) | — | — | — |
-
-## Root Causes (Ranked by Impact)
-
-### 1. CRITICAL: Cascading Deep Copies in Node Process Methods
-
-**Every node calls `.copy()` on data it receives, even though pyqtgraph's Flowchart passes data by
-reference.** Worse, inherited nodes copy *again* — `XYSelector` inherits from `DimensionReducer`,
-so data is copied twice (once at each level's `process()`).
-
-Evidence:
-- `DataGridder.process()` — `data = dataout.copy()` (grid.py:473)
-- `DimensionReducer.process()` — `data = dataout.copy()` (dim_reducer.py:682)
-- `XYSelector.process()` — `data = dataout.copy()` (dim_reducer.py:901) ← **second copy in chain**
-- `ScaleUnits.process()` — `data = dataIn.copy()` (scaleunits.py:126)
-- `SubtractAverage.process()` — `data = dataIn.copy()` (correct_offset.py:63)
-- `Fitter.process()` — `dataOut = dataIn.copy()` (fitter.py:606)
-
-**Impact**: In a 4-node pipeline, data is copied 3–4 times. For 38 MB meshgrid data, that's
-~150 MB of unnecessary allocations and ~370 ms of copy time.
-
-### 2. HIGH: MeshgridDataDict.validate() Is Computationally Expensive
-
-`MeshgridDataDict.validate()` (datadict.py:1063-1145) computes `np.diff()` + `np.unique()` +
-`np.sign()` on every axis array for every dependent, verifying monotonicity. For a 100×100×100
-dataset with 2 deps and 3 axes, that's 6 full-array `np.diff` computations on 1M-element arrays.
-
-This takes **43 ms** per call and is called:
-- Once per `structure()` call
-- Once per `copy()` call (via `structure()`)
-- Once per `validate()` directly
-- Multiple times in `datadict_to_meshgrid()`, `meshgrid_to_datadict()`, etc.
-
-Across a pipeline, validate() may be called **6–10 times** for the same data.
-
-### 3. HIGH: structure() Uses deepcopy Unnecessarily
-
-`DataDictBase.structure()` (datadict.py:399-451) does `cp.deepcopy(v2)` on each field's metadata
-dict (line 434), even though:
-- Values are already emptied (`v2['values'] = []`)
-- Metadata is typically just strings and lists of strings
-- A shallow copy would suffice in 99% of cases
-
-### 4. MEDIUM: extract() Uses Deep Copy by Default
-
-`DataDictBase.extract()` (datadict.py:315-362) calls `cp.deepcopy(self[d])` for each selected
-field (line 347), including the numpy array values. `deepcopy` on numpy arrays is significantly
-slower than `array.copy()` because it goes through Python's generic copy protocol rather than
-numpy's optimized memcpy path.
-
-### 5. MEDIUM: mask_invalid() Creates Full Masked Copy
-
-`mask_invalid()` (datadict.py:724-738) uses `np.ma.masked_where(..., copy=True)`, creating a
-completely new masked array for every data field. Many datasets have no invalid entries, making
-this pure overhead.
-
-### 6. LOW: shapes() Wraps Arrays Unnecessarily
-
-`DataDictBase.shapes()` (datadict.py:553-565) calls `np.array(self.data_vals(k)).shape` — the
-`np.array()` wrapper is unnecessary since `data_vals()` already returns an ndarray after
-validation.
+This document summarizes the changes in this PR, the profiling that motivated them,
+and suggestions for future work.
 
 ---
 
-## Risk Analysis & Mitigations
+## Part 1: Implemented — Pipeline Performance (datadict, nodes, gridding)
 
-This section documents the edge cases discovered during investigation and how the proposed
-improvements must account for them.
+### Problem
 
-### Risk 1: Nested Dict Mutations Bypass Dirty Flags
+Plottr's data pipeline copied data excessively as it flowed through nodes. Each node
+defensively deep-copied all data, and internal methods (`structure()`, `validate()`,
+`copy()`) added further redundant copies. For a 100x100x100 MeshgridDataDict (~38 MB),
+a single `copy()` took 92 ms and `validate()` took 43 ms.
 
-`DataDictBase` is a `dict` of dicts. User code commonly mutates inner dicts directly:
-`dd['x']['values'] = new_array`. This does NOT trigger `DataDictBase.__setitem__` because the
-outer dict is not being set — only the inner dict is being mutated.
+### What Changed
 
-**Mitigation**: Do NOT use a general validation cache based on `__setitem__`. Instead, use
-private helper methods (`_build_structure()`) that skip validation only when called from
-code paths that have *just* validated or just constructed fresh data. The public `validate()` API
-always runs. This is safe because the hot-path is internal: `copy()` calls `structure()` which
-calls `validate()` — and after copying validated data, re-validating the copy is redundant.
+**`plottr/data/datadict.py`** (core data container):
+- New `_copy_field()` helper with per-key copy semantics: numpy `.copy()` for arrays,
+  `list()` for axes, `deepcopy` only for mutable metadata
+- Rewrote `copy(deep=True/False)` — no longer chains through `structure()` → `validate()`
+  → `deepcopy`. New `deep=False` shares arrays (xarray-style API, backward compatible)
+- `_build_structure()` private helper that skips redundant validation
+- `MeshgridDataDict.validate()` monotonicity check: replaced `np.unique(np.sign(np.diff(...)))`
+  with direct min/max checks — same coverage, no sort/allocate
+- `mask_invalid()` fast-path: skips masking entirely when data has no invalid entries
+- `shapes()` uses `np.shape()` instead of `np.array(...).shape`
+- `datasets_are_equal()` shape short-circuit + set-based comparison
+- `remove_invalid_entries()` fixed O(n²) `np.append` pattern + fixed crash on inhomogeneous arrays
+- `meshgrid_to_datadict()` / `datadict_to_dataframe()`: `ravel()` instead of `flatten()`
 
-### Risk 2: Monotonicity Check Must Cover the Full Array
+**`plottr/utils/num.py`** (numerical utilities):
+- `largest_numtype()`: dtype check instead of iterating every element as Python object (~15,000× faster)
+- `is_invalid()`: skip zero-array allocation for non-float types
+- `guess_grid_from_sweep_direction()`: convert with `np.asarray()` once instead of 4×
+- `_find_switches()`: compute `is_invalid()` once (was 3×), single `np.percentile([lo,hi])` call
+  (was 2 separate sorts), vectorized boolean filter, `np.nanmean` for NaN-safe sweep direction
 
-The current `MeshgridDataDict.validate()` checks `np.diff(axis_data, axis=axis_num)` on the full
-N-d axis array. Checking only a single 1D slice would miss cases where one slice is monotonic
-but another is flat or reversed.
+**`plottr/node/node.py`**: Defer `structure()` call to only when structure actually changes (50× faster steady-state)
 
-**Mitigation**: Keep checking the full array, but avoid the expensive `np.unique(np.sign(...))`
-pipeline. Instead, compute min/max of the diff directly:
-```python
-d = np.diff(axis_data, axis=axis_num)
-d_sign = np.sign(d[~np.isnan(d)])  # ignore NaN steps
-if d_sign.size > 0:
-    has_zero = np.any(d_sign == 0)
-    not_monotone = not (np.all(d_sign >= 0) or np.all(d_sign <= 0))
-```
-This avoids `np.unique()` (which sorts and allocates) while preserving full coverage. The
-dominant cost becomes `np.diff()` which is a simple O(N) subtraction — much faster than
-diff + sign + unique.
+**`plottr/node/dim_reducer.py`**: Removed redundant `copy()` in `XYSelector.process()`
 
-### Risk 3: Unknown Field Keys in DataDict
+**`plottr/node/grid.py`**: Pass `copy=False` to `datadict_to_meshgrid()` since gridder already copies input
 
-Field dicts can contain custom keys beyond `values/axes/unit/label`. Known cases:
-- `__shape__` key: stored by `datadict_storage.py`, checked in `MeshgridDataDict.validate()`
-- Fitter node adds `'guess'` and `'fit'` fields dynamically (fitter.py:642, 648)
-- Per-field meta keys like `__meta1__` are stored inside field dicts
+**`plottr/plot/base.py`**: `dataclasses.replace` instead of `deepcopy` for complex plot splitting
 
-**Mitigation**: In `structure()` and `extract()`, when replacing `cp.deepcopy()`, we must
-**preserve all keys**, not just the known ones. Use a targeted copy that special-cases only
-`values` (numpy-optimized copy) and `axes` (new list), but copies everything else generically:
-```python
-new_field = {}
-for fk, fv in original_field.items():
-    if fk == 'values':
-        new_field[fk] = fv.copy() if copy else fv  # numpy optimized
-    elif fk == 'axes':
-        new_field[fk] = list(fv)  # new list, strings are immutable
-    else:
-        new_field[fk] = cp.deepcopy(fv)  # safe for mutable meta/custom keys
-```
-This preserves backward compatibility while optimizing the two expensive keys (values, axes).
-
-### Risk 4: In-Place Axis Mutation Breaks Shallow Copies
+### Bugs Fixed
+- `copy()` now properly deep-copies global mutable metadata (was sharing references)
+- `remove_invalid_entries()` no longer crashes when dependents have different numbers of invalid entries
 
-Several nodes mutate the `axes` list in-place:
-- `DimensionReducer._applyDimReductions()` does `del data[n]['axes'][idx]`
-  (dim_reducer.py:595)
-- `structure(remove_data=...)` does `s[n]['axes'].pop(i)` (datadict.py:439)
+### Benchmark Results
 
-If a shallow copy shares the same `axes` list, these mutations would corrupt the original.
+**Micro-benchmarks (key functions):**
 
-**Mitigation**: `copy(deep=False)` (the new shallow copy mode) MUST always create a new `axes`
-list for each field, even when sharing the `values` array. This makes it safe for axis mutation
-while still avoiding the expensive array copy. The implementation is:
-```python
-new_field = {}
-for fk, fv in original_field.items():
-    if fk == 'values':
-        new_field[fk] = fv  # shared reference (NOT copied)
-    elif fk == 'axes':
-        new_field[fk] = list(fv)  # ALWAYS new list
-    else:
-        new_field[fk] = fv  # scalars (unit, label) are immutable
-```
-
-### Risk 5: mask_invalid() Return Type Contract
-
-Downstream plotting code checks `isinstance(data, np.ma.MaskedArray)` and calls `.filled(np.nan)`
-(plot/mpl/plotting.py:99-104, plot/base.py:479,508). If we skip masking for clean data, the
-arrays stay as plain `np.ndarray` and the isinstance checks return False — which is actually
-fine, because the code uses `if isinstance(...): filled()` as a conditional path.
-
-**Mitigation**: The fast-path must use `num.is_invalid()` (not just `np.isnan`) to also catch
-`None` values in object arrays. When no invalid entries exist, skip masking entirely — the
-plotting code handles plain ndarrays correctly. When invalid entries exist, apply masking as
-before.
-
-### Risk 6: shapes() Called on Unvalidated Data
-
-`Node.process()` calls `dataIn.shapes()` (node.py:281) without an explicit prior `validate()`.
-If values are still lists (pre-validation), `data_vals()` returns a list and `.shape` fails.
-
-**Mitigation**: Use `np.shape()` instead of `.shape` — this works on lists, tuples, and arrays
-alike, returning the correct shape without requiring conversion:
-```python
-shapes[k] = np.shape(self.data_vals(k))
-```
-
-### Risk 7: copy() and extract() Semantic Inconsistency
-
-Currently `copy()` uses `ndarray.copy()` (shallow numpy copy) while `extract(copy=True)` uses
-`cp.deepcopy()` (Python generic deep copy). For simple numeric arrays these are equivalent, but
-for object-dtype arrays `deepcopy` recursively copies contained Python objects while
-`ndarray.copy()` only copies the array of pointers.
-
-**Mitigation**: Align both to use `ndarray.copy()` for the `values` key, and `cp.deepcopy()` for
-other mutable values. This is consistent because: (a) plottr stores numeric data in arrays, not
-nested objects, (b) object arrays in plottr contain None values — `ndarray.copy()` handles None
-correctly since None is a singleton.
-
----
-
-## Code Readability: Copy Semantics Design
-
-A key goal is making it **obvious** in the code where data is shared vs. independent. We adopt
-a pattern inspired by xarray's `copy(deep=True/False)` API and numpy conventions.
-
-### Design Principles
-
-1. **Explicit `deep` parameter**: Extend `copy()` to accept `deep=True` (default, backward
-   compatible) and `deep=False` (shares arrays). No separate `shallow_copy()` method — one
-   method, one parameter, one place to look.
-
-2. **Docstrings document ownership**: Every method that returns data states whether the returned
-   arrays are copies or views:
-   ```python
-   def copy(self, deep: bool = True) -> T:
-       """Make a copy of the dataset.
-
-       :param deep: If True (default), all data arrays are copied. The returned
-           dataset is fully independent of the original.
-           If False, the returned dataset shares data array references with the
-           original. Modifications to array *contents* (e.g., ``ret['x']['values'][0] = 5``)
-           will affect both. However, *replacing* an array (``ret['x']['values'] = new_arr``)
-           only affects the copy. Field metadata (axes, unit, label) is always independent.
-       """
-   ```
-
-3. **Nodes document their copy contract**: Each `process()` method gets a one-line comment
-   stating whether it copies or modifies in-place:
-   ```python
-   def process(self, dataIn=None):
-       ...
-       data = dataIn.copy(deep=False)  # shallow: only modifying values for specific fields
-       data['dep_0']['values'] = data['dep_0']['values'] * scale  # replaces array, safe
-   ```
-
-4. **No hidden copies**: Functions that need to modify data must do so on an explicit copy.
-   `Node.process()` base class passes data through by reference (as it already does). Only
-   nodes that transform data should copy. This should be the local decision of each node.
-
-### API Summary
-
-| Method | Arrays | Metadata | Use When |
+| Function | Before | After | Speedup |
 |---|---|---|---|
-| `copy(deep=True)` | Independent copies | Independent copies | Need fully independent data |
-| `copy(deep=False)` | Shared references | Independent copies | Node only modifies a few fields |
-| `extract(copy=True)` | Independent copies | Independent copies | Subsetting fields |
-| `extract(copy=False)` | Shared references | Shared references | Read-only subsetting |
-| `structure()` | Empty (no data) | Independent copies | Getting data shape/layout |
-
----
-
-## Proposed Improvements (Revised)
-
-### Phase 1: Extend copy() with deep parameter & fix cascading copies
-
-#### 1a. Add `deep` parameter to `copy()`
-
-Extend the existing `copy()` method to accept `deep=True/False`, following the xarray convention.
-`deep=True` (default) preserves current behavior. `deep=False` copies the dict structure and
-axes lists but shares numpy array references.
-
-```python
-def copy(self: T, deep: bool = True) -> T:
-    """Make a copy of the dataset.
-
-    :param deep: If True (default), data arrays are independently copied.
-        If False, the returned dataset shares array references with the original.
-        Field metadata (axes, unit, label) is always independently copied.
-    """
-    ret = self.__class__()
-    for k, v in self.items():
-        if self._is_meta_key(k):
-            ret[k] = cp.deepcopy(v)
-        else:
-            new_field = {}
-            for fk, fv in v.items():
-                if fk == 'values':
-                    new_field[fk] = fv.copy() if deep else fv
-                elif fk == 'axes':
-                    new_field[fk] = list(fv)       # always new list (mutation-safe)
-                elif self._is_meta_key(fk):
-                    new_field[fk] = cp.deepcopy(fv) # safe for mutable meta
-                else:
-                    new_field[fk] = fv              # scalars (unit, label) are immutable
-            ret[k] = new_field
-    return ret
-```
-
-This replaces the current `copy()` → `structure()` → `deepcopy` chain with a single efficient
-pass. No separate `shallow_copy()` method needed.
-
-**Impact**: `copy(deep=False)` is essentially free (~0.01 ms vs 92 ms for deep copy on 38 MB
-meshgrid). Even `copy(deep=True)` is faster because it avoids the `structure()` → `validate()`
-→ `deepcopy` chain.
-
-#### 1b. Fix cascading copies in inherited nodes
-
-`XYSelector.process()` calls `super().process()` (which is `DimensionReducer.process()`) which
-already copies. Remove the redundant second copy:
-
-- `DimensionReducer.process()` (dim_reducer.py:682): keep `copy(deep=False)` — it needs to
-  mutate axes and values
-- `XYSelector.process()` (dim_reducer.py:901): **remove** the `.copy()` call — parent already
-  returned a copy
-- `Node.process()` (node.py:263): does NOT copy, just inspects — keep as-is
-
-#### 1c. Use `copy=False` in `datadict_to_meshgrid` when data is already a copy
-
-`DataGridder.process()` already copies input at line 473. Pass `copy=False` to
-`datadict_to_meshgrid()` to avoid a redundant second array copy.
-
-### Phase 2: Optimize Expensive Validation
-
-#### 2a. Skip redundant validation in internal methods
-
-Add a private `_build_structure()` path that skips validation when constructing data from
-known-valid sources. The public `validate()` always runs — no caching.
-
-Specifically:
-- `copy()` already constructs from valid data → skip re-validate
-- `structure()` calls `validate()` first, then constructs → skip re-validate in the construction
-  step
-
-This is implemented by extracting the construction logic out of `structure()` into a helper:
-```python
-def structure(self, ...):
-    if self.validate():
-        return self._build_structure(...)
-    return None
-
-def _build_structure(self, ...):
-    """Build structure dict. Caller must ensure data is validated."""
-    ...  # no validate() call here
-```
-
-**Impact**: Eliminates 50%+ of validate() calls. Especially impactful for MeshgridDataDict
-where validate() costs 43 ms.
-
-#### 2b. Optimize MeshgridDataDict.validate() monotonicity check
-
-Replace `np.unique(np.sign(np.diff(...)))` with a direct min/max check on the diff array.
-This avoids the sort + allocate from `np.unique()` while preserving full-array coverage:
-
-```python
-d = np.diff(axis_data, axis=axis_num)
-# Use nan-aware checks without materializing sign/unique arrays
-valid_d = d[~np.isnan(d)] if np.issubdtype(d.dtype, np.floating) else d
-if valid_d.size > 0:
-    if np.any(valid_d == 0):
-        msg += "no variation along axis"
-    if not (np.all(valid_d > 0) or np.all(valid_d < 0)):
-        msg += "not monotonous"
-```
-
-**Impact**: ~50% faster than current (no sort/unique), while checking every element.
-
-### Phase 3: Optimize structure() and extract()
-
-#### 3a. Replace deepcopy in structure() with targeted copy
-
-Use the same targeted copy pattern as `copy()`: special-case `values` (set to `[]`) and `axes`
-(new list), deepcopy only meta keys (which may be mutable), pass through scalars directly.
-Preserve ALL keys (not just known ones) to handle custom field keys like `__shape__`.
-
-#### 3b. Replace deepcopy in extract() with targeted copy
-
-Same pattern: use `ndarray.copy()` for values, `list()` for axes, `deepcopy` for meta keys,
-passthrough for scalars. This aligns `extract(copy=True)` semantics with `copy(deep=True)`.
-
-### Phase 4: Optimize mask_invalid()
-
-#### 4a. Skip masking when data has no invalid entries
-
-Use `num.is_invalid()` (which handles both None and NaN) for the fast check:
-```python
-def mask_invalid(self: T) -> T:
-    for d, _ in self.data_items():
-        arr = self.data_vals(d)
-        invalid_mask = num.is_invalid(arr)
-        if not np.any(invalid_mask):
-            continue  # no invalid entries, skip masking entirely
-        vals = np.ma.masked_where(invalid_mask, arr, copy=True)
-        ...
-```
-
-Downstream plotting code handles both plain ndarrays and MaskedArrays correctly (conditional
-isinstance checks in plot/mpl/plotting.py:99-104).
-
-#### 4b. Use copy=False when data is already a copy
-
-In pipeline nodes that call `mask_invalid()` after already copying data (DimensionReducer,
-Histogrammer), pass through a parameter or check `owndata` to avoid re-copying.
-
-### Phase 5: Minor Optimizations
-
-#### 5a. Use np.shape() in shapes()
-
-Replace `np.array(self.data_vals(k)).shape` with `np.shape(self.data_vals(k))`. This handles
-lists/tuples/arrays uniformly without allocating a new array. Safe for unvalidated data.
-
-#### 5b. Optimize datasets_are_equal()
-
-Short-circuit on shape mismatch before comparing values.
-
----
-
-## Expected Impact
-
-| Phase | Time Savings (per pipeline update) | Memory Savings |
-|---|---|---|
-| Phase 1 (copy(deep=False) + fix cascading) | 50–70% of copy time | 60–75% reduction |
-| Phase 2 (skip redundant validation + optimize) | 60–80% of validate time | Negligible |
-| Phase 3 (structure/extract targeted copy) | 30–50% of structure ops | Minor |
-| Phase 4 (mask_invalid fast-path) | 95%+ when data is clean | 50% reduction |
-| Phase 5 (Minor) | 5–10% misc | Minor |
-
-**Combined estimate for 100×100×100 MeshgridDataDict pipeline:**
-- Current: ~500 ms, ~190 MB allocated (4.8× input)
-- After all phases: ~50–80 ms, ~50–60 MB allocated (~1.3× input)
-
-## Implementation Order
-
-**Prerequisite**: Add comprehensive test coverage for copy semantics, data integrity through
-pipeline, and edge cases (object arrays, complex data, masked data, custom field keys).
-
-Then:
-1. **Phase 1a** (copy deep parameter) — foundation for everything else
-2. **Phase 2a** (skip redundant validation) — highest ROI, low risk
-3. **Phase 2b** (optimize monotonicity check) — high ROI, low risk
-4. **Phase 1b** (fix cascading copies) — high ROI, needs test coverage first
-5. **Phase 3a+3b** (structure/extract optimization) — medium ROI, low risk
-6. **Phase 4a** (mask_invalid fast-path) — high ROI for clean data, low risk
-7. **Phase 1c + Phase 4b + Phase 5** — incremental improvements
-
-## Risks & Considerations
-
-- **Shared array mutation**: With `copy(deep=False)`, if a node modifies array *contents*
-  in-place (e.g. `arr[0] = 5` or `arr *= 2`), it corrupts the original. Nodes must *replace*
-  arrays (`data['x']['values'] = new_arr`) rather than mutate them. This is already the common
-  pattern in most nodes, but must be verified with tests.
-- **Backward compatibility**: `copy()` default is `deep=True`, preserving current behavior.
-  `deep=False` is opt-in. No external API is removed.
-- **Testing prerequisite**: Before making any optimization changes, comprehensive tests must
-  verify: copy isolation, pipeline data integrity, edge cases (object arrays, None, complex,
-  masked), and custom field key preservation.
-
----
-
-## Execution Results
+| `largest_numtype` (500K float) | 29.8 ms | 0.002 ms | ~15,000× |
+| `mesh_500k_copy()` | 42.2 ms | 2.9 ms | 14.8× |
+| `node_process` (500K mesh, steady state) | 7.4 ms | 0.15 ms | 50× |
+| `_find_switches` (640K pts) | 80 ms | 31 ms | 2.6× |
+| `datadict_to_meshgrid` (640K pts) | 175 ms | 71 ms | 2.5× |
+| `mesh_500k_validate()` | 20.5 ms | 14.1 ms | 1.5× |
 
-All optimizations implemented and tested. **173 tests pass** (0 failures).
+**Real experimental data (P1386BB_00BE_datasets.db, steady-state refresh):**
 
-### Changes Made
-
-| File | Changes |
-|---|---|
-| `plottr/data/datadict.py` | Added `_copy_field()` helper; rewrote `copy(deep=True/False)`; optimized `structure()` with `_build_structure()`; replaced `cp.deepcopy` in `extract()`; optimized `MeshgridDataDict.validate()` monotonicity check; added `mask_invalid()` fast-path for clean data; fixed `shapes()` to use `np.shape()`; optimized `datasets_are_equal()` |
-| `plottr/node/dim_reducer.py` | Removed redundant `copy()` in `XYSelector.process()` |
-| `plottr/node/grid.py` | Pass `copy=False` to `datadict_to_meshgrid()` |
-| `test/pytest/test_datadict_copy_semantics.py` | 64 new tests for copy semantics |
-| `test/pytest/test_pipeline_coverage.py` | 63 new tests for pipeline coverage |
-
-### Benchmark Comparison (Baseline -> Final)
-
-| Benchmark | Before (ms) | After (ms) | Speedup | Notes |
+| Dataset | Data Size | Before | After | Speedup |
 |---|---|---|---|---|
-| **mesh_500k_copy** | 42.2 | 2.9 | **14.8x** | copy() no longer calls structure()/validate() |
-| **mesh_50k_copy** | 2.7 | 0.4 | **6.1x** | Same optimization, smaller data |
-| **tab_10k_copy** | 0.23 | 0.15 | **1.5x** | Smaller effect on tabular data |
-| **mesh_500k_validate** | 20.5 | 14.1 | **1.5x** | Removed np.unique/np.sign overhead |
-| **mesh_500k_structure** | 20.3 | 13.9 | **1.5x** | _build_structure() skips re-validation |
-| **mesh_50k_mask_invalid** | 10.0 | 9.1 | **1.1x** | Fast-path skips clean data |
-| **mesh_500k_mask_invalid (mem)** | 19537 KB | 0.3 KB | **~0** | No allocation for clean data |
-| **pipeline_4stage** | 8.2 | 5.7 | **1.4x** | Cumulative improvement |
-| **equality_5k** | 1.4 | 1.2 | **1.1x** | Shape short-circuit + set ops |
-
-### Bug Fixed
-
-- `copy()` previously did not deep-copy global mutable metadata (e.g., `dd.add_meta('info', {'key': 'val'})`). The new implementation properly deep-copies all metadata.
-
-### New APIs
-
-- `DataDictBase.copy(deep=True/False)` — `deep=False` shares array data (xarray convention)
-- `DataDictBase._build_structure()` — private helper that skips validation
-- `DataDictBase._copy_field()` — targeted field copy with per-key semantics
-
----
-
-## Further Optimization Opportunities
-
-Additional performance improvements identified through comprehensive codebase analysis.
-Organized from highest to lowest impact.
-
-### Tier 1: Critical Quick Wins
-
-#### HDF5 Data Loading: Avoid Full-File Reads for Metadata
-
-**Files:** `plottr/data/datadict_storage.py`
-
-**Problem:** Two lines read the entire HDF5 dataset into memory just to get its shape:
-- Line 274: `lens = [len(grp[k][:]) for k in keys]` reads ALL data to get lengths
-- Line 305: `entry['__shape__'] = ds[:].shape` reads ALL data to get shape
-
-**Fix:**
-`python
-# Line 274: use HDF5 metadata (zero I/O)
-lens = [grp[k].shape[0] for k in keys]
-
-# Line 305: use HDF5 shape attribute
-entry['__shape__'] = ds.shape
-`
-
-**Impact:** 50-80% reduction in HDF5 load time for large files. Eliminates massive
-memory spikes when loading. This is a 1-line fix each.
-
-#### Node.process() Redundant structure() Call
-
-**File:** `plottr/node/node.py:282`
-
-**Problem:** `dstruct = dataIn.structure(add_shape=False)` is called on every
-pipeline update in every node. For MeshgridDataDict this means validate() + deepcopy
-of all field metadata. But the result is only stored for signal emission — the actual
-change detection at lines 293-308 uses axes/deps/type/shapes which are already computed
-at lines 279-281.
-
-**Fix:** Replace with a lazy approach — only compute structure when it's actually needed
-(i.e., when `_structChanged` is True):
-`python
-dstruct = None  # defer computation
-# ... change detection using axes/deps/type ...
-if _structChanged:
-    dstruct = dataIn.structure(add_shape=False)
-self.dataStructure = dstruct if dstruct is not None else self.dataStructure
-`
-
-**Impact:** Eliminates the single most expensive call in the pipeline hot path for
-steady-state operation (when structure doesn't change between updates). For 500K-element
-MeshgridDataDict: saves ~14ms per node per update.
-
-### Tier 2: High Impact
-
-#### Plot Complex Data: Replace deepcopy with Targeted Copy
-
-**File:** `plottr/plot/base.py:456, 488, 517`
-
-**Problem:** `_splitComplexData()` uses `deepcopy(re_plotItem)` to create Real/Imag or
-Mag/Phase split views. This deep-copies the entire PlotItem including array data references
-and all metadata. Called on every plot update for complex-valued data.
-
-**Fix:** PlotItem is a dataclass — use `dataclasses.replace()` or manual copy:
-`python
-from dataclasses import replace
-im_plotItem = replace(re_plotItem,
-    id=re_plotItem.id + 1,
-    data=list(re_plotItem.data),
-    labels=list(re_plotItem.labels) if re_plotItem.labels else None,
-)
-`
-
-**Impact:** 2-5x faster rendering for complex-valued plots.
-
-#### Signal Emission Overhead in Nodes
-
-**File:** `plottr/node/node.py:316-334`
-
-**Problem:** Up to 7 Qt signals are emitted per data update in each node. On first data
-arrival, ALL signals fire (lines 284-290). Each signal can trigger widget updates and
-downstream processing.
-
-**Opportunities:**
-- `dataFieldsChanged` (line 323) is redundant — it emits `daxes + ddeps` which
-  is just the union of `dataAxesChanged` and `dataDependentsChanged`
-- `newDataStructure` (line 330) carries structure+shapes+type, overlapping with
-  `dataStructureChanged` (line 329) + `dataShapesChanged` (line 334)
-
-**Fix (conservative):** Remove `dataFieldsChanged` and have listeners use
-`dataAxesChanged` + `dataDependentsChanged` instead. Connect `newDataStructure`
-only where both structure and shapes are needed together.
-
-**Fix (aggressive):** Coalesce all signals into a single `dataChanged(dict)` signal
-carrying change flags. Reduces signal/slot overhead from 7 to 1.
-
-#### largest_numtype() Flattens Entire Array
-
-**File:** `plottr/utils/num.py:28`
-
-**Problem:** `types = {type(a) for a in np.array(arr).flatten()}` iterates every
-element of the array as a Python object to collect types. For a 1M-element array,
-this creates 1M Python objects.
-
-**Fix:** Use numpy's dtype system directly:
-`python
-def largest_numtype(arr, include_integers=True):
-    arr = np.asarray(arr)
-    if np.issubdtype(arr.dtype, np.complexfloating):
-        return complex
-    if np.issubdtype(arr.dtype, np.floating):
-        return float
-    if include_integers and np.issubdtype(arr.dtype, np.integer):
-        return float  # promote to float for plotting
-    # Only fall back to element-scanning for object arrays
-    if arr.dtype == object:
-        types = {type(a) for a in arr.ravel() if a is not None}
-        # ... existing logic ...
-    return None
-`
-
-**Impact:** ~100x faster for numeric arrays (avoids Python-level iteration entirely).
-
-### Tier 3: Medium Impact
-
-#### is_invalid() Allocates Unnecessary Zero Array
-
-**File:** `plottr/utils/num.py:57-65`
-
-**Problem:** For non-float arrays, creates `np.zeros(a.shape, dtype=bool)` just to
-OR with the None check. The zeros contribute nothing.
-
-**Fix:**
-`python
-def is_invalid(a):
-    isnone = a == None
-    if a.dtype in FLOATTYPES:
-        return isnone | np.isnan(a)
-    return isnone  # skip zeros allocation
-`
-
-#### guess_grid_from_sweep_direction(): Repeated np.array() Calls
-
-**File:** `plottr/utils/num.py:236-242`
-
-**Problem:** `np.array(vals)` called 4 times on the same data inside a loop.
-
-**Fix:** Convert once at the top of the loop: `vals_arr = np.asarray(vals)`
-
-#### remove_invalid_entries(): O(n^2) np.append Pattern
-
-**File:** `plottr/data/datadict.py:1068-1086`
-
-**Problem:** Uses `np.append(_idxs, _newidxs)` repeatedly which copies the entire
-array each time.
-
-**Fix:** Collect indices in a Python list, concatenate once:
-`python
-_idxs_list = []
-# ... append to list ...
-_idxs = np.concatenate(_idxs_list) if _idxs_list else np.array([])
-`
-
-#### datadict_to_dataframe(): flatten() Instead of ravel()
-
-**File:** `plottr/data/datadict.py:1738, 1745`
+| QDstability (14400×251, 16 deps) | 223 MB | 555 ms | 189 ms | 2.93× |
+| TopogapStage2 (41×33×5×81, 21 deps) | 152 MB | 439 ms | 161 ms | 2.73× |
+| QDtuning (7440×121, 16 deps) | 14 MB | 31 ms | 11 ms | 2.73× |
 
-**Problem:** `.flatten()` always copies; `.ravel()` returns a view when possible.
+**Interactive actions (simulated user operations on large datasets):**
 
-**Fix:** Use `.ravel()` since the result is consumed immediately by pandas.
-
-### Tier 4: Architectural Improvements (Larger Effort)
-
-#### Data Change Detection in Pipeline
-
-**Problem:** The pipeline has no concept of "what changed." Every update re-processes
-the entire data through every node. For live monitoring where data is appended
-incrementally, this means re-gridding, re-reducing, and re-plotting everything.
-
-**Opportunity:** Add lightweight change detection:
-- Track data version/hash at the DataDict level
-- Nodes check if their input actually changed before processing
-- For append-only updates, nodes could process only new data
-
-#### Fitter Node: No Memoization
-
-**File:** `plottr/node/fitter.py:624-650`
-
-**Problem:** The fitting algorithm runs on every `process()` call even if the data
-and fit parameters haven't changed. For complex models this can take 100ms-1s.
-
-**Fix:** Cache fit results keyed on (data hash, model, parameters).
-
-#### ScaleUnits: Redundant Per-Update Computation
-
-**File:** `plottr/node/scaleunits.py:129-135`
-
-**Problem:** `find_scale_and_prefix()` scans the full array (`np.nanmax(np.abs(data))`)
-for every field on every update.
-
-**Fix:** Cache the scale prefix and only recompute when the data range changes
-significantly (e.g., order of magnitude difference).
-
-#### Histogrammer: No Result Caching
-
-**File:** `plottr/node/histogram.py:132-217`
-
-**Problem:** Histogram recomputed on every update even when data, nbins, and axis
-haven't changed.
-
-**Fix:** Cache histogram results, invalidate only when inputs change.
-
-### Tier 5: xarray Consideration
-
-**Finding:** Plottr does NOT use xarray at all despite listing it as a dependency.
-xarray could theoretically provide lazy loading from HDF5, chunked computation, and
-better memory management. However, replacing DataDict with xarray would be a major
-refactoring effort and is not recommended unless a larger redesign is planned.
-
-The `xarray` dependency appears to be pulled in transitively or for potential future
-use. It could be made optional to reduce install footprint.
-
-### Round 2 Execution Results
-
-All round 2 optimizations implemented and tested. **205 tests pass** (0 failures).
-
-#### Changes Made (Round 2)
-
-| File | Changes |
-|---|---|
-| `plottr/utils/num.py` | Rewrote `largest_numtype()` to use dtype (avoids element iteration); `is_invalid()` skips zero alloc for non-floats; `guess_grid_from_sweep_direction()` converts once with `np.asarray` |
-| `plottr/data/datadict.py` | Fixed O(n^2) `np.append` in `remove_invalid_entries()`; `meshgrid_to_datadict()` uses `ravel()`; `datadict_to_dataframe()` uses `ravel()` |
-| `plottr/node/node.py` | Deferred `structure()` call to only when structure changes |
-| `plottr/plot/base.py` | Replaced `deepcopy` with `dataclasses.replace` in `_splitComplexData()` |
-| `test/pytest/test_round2_optimizations.py` | 32 new tests |
-
-#### Benchmark (Round 2)
-
-| Benchmark | Before | After | Speedup |
+| Action | Before | After | Speedup |
 |---|---|---|---|
-| **largest_numtype (float 500k)** | 29.8 ms | 0.002 ms | **~15,000x** |
-| **largest_numtype (complex 500k)** | 31.9 ms | 0.001 ms | **~32,000x** |
-| **node_process (500k mesh)** | 7.42 ms | 0.15 ms | **50x** |
-| **to_dataframe (100k)** | 0.95 ms | 0.63 ms | **1.5x** |
-| **remove_invalid (10k)** | 0.073 ms | 0.050 ms | **1.5x** |
-| **is_invalid (int 500k)** | 16.5 ms | 15.0 ms | **1.1x** |
-
-#### Bugs Fixed (Round 2)
-
-- `remove_invalid_entries()` crashed with `ValueError` when dependents had different numbers of invalid entries (inhomogeneous `np.array(idxs)`). Fixed by using `np.concatenate`.
-- `largest_numtype()` on empty arrays previously returned `None` in all cases; behavior preserved via explicit empty check.
+| Toggle subtract average (15 MB 2D) | 293 ms | 29 ms | 10.2× |
+| Swap XY axes (18 MB 2D) | 790 ms | 241 ms | 3.3× |
+| Switch dependent (61 MB 1D) | 2,287 ms | 977 ms | 2.3× |
+| Data refresh (15 MB 2D) | 697 ms | 199 ms | 3.5× |
 
-### Real Dataset Benchmark (23 QCodes Datasets, Before vs After)
+### Tests Added
 
-Full pipeline benchmark: Load from QCodes DB -> DataSelector -> DataGridder -> XYSelector.
-Measured on 23 real-world-shaped datasets (1D-3D, with/without shape metadata, complete/interrupted).
+221 new tests across 4 test files:
+- `test_datadict_copy_semantics.py` — copy isolation, edge cases, pipeline integrity
+- `test_pipeline_coverage.py` — per-node tests, hypothesis property-based, various dtypes
+- `test_round2_optimizations.py` — is_invalid, largest_numtype, remove_invalid_entries
+- `test_gridder_comprehensive.py` — all GridOption paths, shapes, edge cases
 
-**Pipeline total: 1478 ms (before) -> 1025 ms (after) = 1.44x overall speedup**
-
-| Dataset | Points | Pipeline Before | Pipeline After | Speedup |
-|---|---|---|---|---|
-| stability_diagram (500x400) | 200,000 | 199 ms | 110 ms | **1.81x** |
-| large_3d_scan (100x80x50) | 800,000 | 549 ms | 333 ms | **1.65x** |
-| field_spectroscopy (50x2000) | 100,000 | 96 ms | 64 ms | **1.50x** |
-| time_trace (100k) | 100,000 | 64 ms | 44 ms | **1.46x** |
-| spatial_map (50x40x30) | 60,000 | 100 ms | 70 ms | **1.42x** |
-| 3d_cal_noshape (8x6x5) | 240 | 29 ms | 23 ms | **1.30x** |
-| gate_sweep (100x80) | 8,000 | 31 ms | 25 ms | **1.25x** |
-| interrupted_sweep | 500 | 26 ms | 22 ms | **1.21x** |
-| t1_measurement (1D, no shape) | 1,500 | 24 ms | 20 ms | **1.20x** |
-| charge_stability_interrupted | 630 | 26 ms | 21 ms | **1.20x** |
-| two_tone_spectroscopy (20x30) | 600 | 25 ms | 22 ms | **1.16x** |
-| (remaining 12 datasets) | | | | 1.00-1.17x |
-
-Key observations:
-- Larger datasets benefit most (1.4-1.8x for 60k+ points)
-- Even small datasets see 1.1-1.3x improvement (reduced per-node overhead)
-- No regressions observed on any dataset
-- All 23 datasets produce the same output types before and after
-
-### Large Dataset Benchmark (Array ParamType, 15-61 MB per dataset)
-
-8 datasets using QCodes array paramtype (blob storage), benchmarked through
-the full plottr pipeline (Load -> DataSelector -> DataGridder -> XYSelector).
-
-**Pipeline total: 6,550 ms (before) -> 3,465 ms (after) = 1.89x overall speedup**
-
-| Dataset | Data Size | Pipeline Before | Pipeline After | Speedup |
-|---|---|---|---|---|
-| large_1d_3dep (2M pts, 3 deps) | 61 MB | 997 ms | 497 ms | **2.01x** |
-| large_1d_sweep (4M pts) | 61 MB | 1,923 ms | 971 ms | **1.98x** |
-| large_2d_wide (200x4000) | 18 MB | 702 ms | 360 ms | **1.95x** |
-| large_2d_interrupted (40% of 1000x800) | 18 MB | 314 ms | 162 ms | **1.94x** |
-| large_2d_2dep (500x1000, 2 deps) | 15 MB | 453 ms | 234 ms | **1.94x** |
-| large_2d_square (800x800) | 15 MB | 568 ms | 295 ms | **1.93x** |
-| large_3d_1dep (100x100x80) | 24 MB | 1,064 ms | 632 ms | **1.68x** |
-| large_3d_2dep (80x80x60, 2 deps) | 15 MB | 530 ms | 315 ms | **1.68x** |
-
-Loading times are unchanged (dominated by QCodes SQLite I/O). All speedup
-comes from the plottr pipeline processing (copy, validate, structure, gridding).
-
-### Improved Benchmark Methodology (v2)
-
-Previous benchmarks created a new flowchart per run, which always hit the "first data" code path.
-The v2 benchmark fixes this by measuring two scenarios:
-
-- **Cold start**: Create flowchart + process first data (opening a dataset for the first time)
-- **Steady state**: Re-process new data on an existing flowchart (live monitoring refresh)
+---
 
-Method: 5 repeats, median timing, warmup run discarded, persistent flowchart for steady-state.
+## Part 2: Implemented — Inspectr Loading & UX
 
-#### Large Datasets (8 datasets, 15-61 MB each, array paramtype)
+### Problem
 
-|  | Cold Start |  | Steady State |  |
-|---|---|---|---|---|
-| **Totals** | 6,479 -> 3,449 ms | **1.88x** | 5,867 -> 3,312 ms | **1.77x** |
-
-| Dataset | MB | Cold Before | Cold After | Cold Spd | Steady Before | Steady After | Steady Spd |
-|---|---|---|---|---|---|---|---|
-| large_1d_sweep (4M pts) | 61 | 1,911 ms | 960 ms | **1.99x** | 1,865 ms | 1,031 ms | **1.81x** |
-| large_1d_3dep (2M, 3 deps) | 61 | 987 ms | 491 ms | **2.01x** | 949 ms | 511 ms | **1.86x** |
-| large_2d_square (800x800) | 15 | 567 ms | 294 ms | **1.93x** | 528 ms | 290 ms | **1.82x** |
-| large_2d_2dep (500x1000) | 15 | 448 ms | 237 ms | **1.89x** | 412 ms | 226 ms | **1.82x** |
-| large_3d_1dep (100x100x80) | 24 | 1,035 ms | 628 ms | **1.65x** | 798 ms | 503 ms | **1.59x** |
-| large_3d_2dep (80x80x60) | 15 | 525 ms | 320 ms | **1.64x** | 389 ms | 247 ms | **1.58x** |
-| large_2d_interrupted (40%) | 18 | 306 ms | 162 ms | **1.89x** | 273 ms | 147 ms | **1.86x** |
-| large_2d_wide (200x4000) | 18 | 701 ms | 357 ms | **1.96x** | 654 ms | 357 ms | **1.83x** |
-
-#### Small/Medium Datasets (23 datasets, <1 KB to 15 MB, numeric paramtype)
-
-|  | Cold Start |  | Steady State |  |
-|---|---|---|---|---|
-| **Totals** | 1,477 -> 1,036 ms | **1.43x** | 895 -> 529 ms | **1.69x** |
+Opening a large QCoDeS database (1496 runs) in inspectr took 15+ minutes because the
+`experiments()` + `data_sets()` enumeration in QCoDeS is O(N²). Clicking any dataset
+froze the UI for ~1 second while the snapshot (up to 6 MB of JSON) was parsed into
+thousands of tree widget items.
 
-Steady-state highlights (where the optimization shines most):
+### What Changed
 
-| Dataset | Steady Before | Steady After | Speedup |
-|---|---|---|---|
-| interrupted_sweep | 5.9 ms | 2.8 ms | **2.11x** |
-| two_tone_spectroscopy | 6.1 ms | 2.9 ms | **2.10x** |
-| charge_stability_interrupted | 6.1 ms | 2.9 ms | **2.10x** |
-| ramsey_2d | 6.2 ms | 3.0 ms | **2.07x** |
-| qubit_spectroscopy | 7.8 ms | 3.8 ms | **2.05x** |
-| multi_measurement (3 deps) | 6.8 ms | 3.4 ms | **2.00x** |
-| stability_diagram (200K) | 176.7 ms | 93.8 ms | **1.88x** |
-| large_3d_scan (800K) | 421.8 ms | 261.1 ms | **1.62x** |
+**Fast database overview** (`plottr/data/qcodes_db_overview.py`, new module):
+- Single SQL JOIN query fetching run metadata directly from runs + experiments tables
+- Skips snapshot and run_description blobs entirely
+- Reads `inspectr_tag` directly as a column from the runs table
+- Intended for eventual contribution to QCoDeS
 
-### Interactive Action Benchmark (simulated user actions, large datasets)
+**Lazy snapshot loading** (`plottr/apps/inspectr.py`):
+- Snapshot tree built only when user expands the "QCoDeS Snapshot" section
+- Info pane sections collapsed by default
+- Smooth pixel-based scrolling for tall rows (e.g., exception tracebacks)
 
-Measures the time for real user interactions on a persistent flowchart
-(DataSelector -> DataGridder -> XYSelector -> SubtractAverage -> ScaleUnits).
-5 repeats, median timing, warmup discarded.
+**Incremental refresh**:
+- `refreshDB()` only loads runs newer than the last known run_id
+- Merges incremental results into existing dataframe
 
-#### Per-Node Speedups (averaged across all datasets)
+**Loading UX**:
+- Live progress indicator: "Loading database... (142/1496 datasets)"
+- Contextual messages: "Select a date...", "No datasets found...", "No datasets match filter..."
+- Wider default window (960×640)
 
-The node breakdown reveals where our optimizations had the most impact:
+**Fallback chain**: SQL direct → `load_by_id` loop → original `experiments()` API
 
-| Node | Before | After | Speedup | What changed |
-|---|---|---|---|---|
-| **DataSelector** | 72-579 ms | 7-39 ms | **10-17x** | `largest_numtype()` now O(1), `copy()/extract()` optimized |
-| **SubtractAverage** | 2-202 ms | 0.1-9 ms | **6-29x** | `copy()` 15x faster, `mask_invalid()` skips clean data |
-| **ScaleUnits** | 2-258 ms | 0.1-37 ms | **7-15x** | `copy()` 15x faster |
-| **XYSelector** | 89-596 ms | 42-322 ms | **1.5-2.3x** | Removed cascading copy, deferred `structure()` |
-| **DataGridder** | 102-647 ms | 89-574 ms | **1.1-1.2x** | `copy=False` in `datadict_to_meshgrid` |
+### Benchmark
 
-**Key insight:** DataGridder dominates total time (50-60%) and got the least speedup (1.1x)
-because its cost is dominated by the actual gridding computation (`guess_shape`, `reshape`,
-`transpose`) -- not by copy/validate overhead. This is the next optimization frontier.
+| Approach | 23 runs | 1496 runs (projected) |
+|---|---|---|
+| Old (experiments + data_sets) | 103 ms | 15+ minutes |
+| load_by_id loop | 90 ms | ~5 seconds |
+| **SQL direct** (new) | **14 ms** | **~10 ms** |
+| Incremental (3 new runs) | - | **~4 ms** |
 
-#### Action Speedups
+Snapshot click: 951 ms → 0.3 ms (3,554× faster)
 
-| Action | Dataset | Before | After | Speedup |
-|---|---|---|---|---|
-| **toggle_subtract_avg** | 2d_square (15 MB) | 293 ms | 29 ms | **10.2x** |
-| **toggle_subtract_avg** | 2d_wide (18 MB) | 342 ms | 36 ms | **9.5x** |
-| **swap_xy_axes** | 2d_square (15 MB) | 662 ms | 196 ms | **3.4x** |
-| **swap_xy_axes** | 2d_wide (18 MB) | 790 ms | 241 ms | **3.3x** |
-| **switch_dependent** | 1d_3dep (61 MB) | 2287 ms | 977 ms | **2.3x** |
-| **data_refresh** | 2d_square (15 MB) | 697 ms | 304 ms | **2.3x** |
-| **data_refresh** | 1d_sweep (61 MB) | 2405 ms | 1107 ms | **2.2x** |
-| **slide_dimension** | 3d_1dep (24 MB) | 1891 ms | 1231 ms | **1.5x** |
-| **toggle_grid** | 3d_1dep (24 MB) | 1290 ms | 985 ms | **1.3x** |
-
-#### Where remaining time goes (optimization frontier)
-
-For the `large_2d_square` dataset (800x800, 15 MB) after optimization:
-
-- **DataGridder**: 177 ms (58%) -- gridding computation itself
-- **XYSelector**: 82 ms (27%) -- dimension reduction + reorder
-- **DataSelector**: 9 ms (3%) -- extraction
-- **ScaleUnits**: 9 ms (3%) -- prefix computation
-- **SubtractAverage**: 2 ms (1%) -- average subtraction
-- **Overhead**: ~25 ms (8%) -- flowchart propagation, signal emissions
-
-The gridding step (`guess_shape_from_datadict` + `datadict_to_meshgrid`) is now the
-dominant cost and is performing actual computation (not copy/validate overhead).
-Further optimization would need to target the gridding algorithm itself.
-
-### Round 3: DataGridder Optimization (`_find_switches`)
-
-**Root cause:** `_find_switches()` in `plottr/utils/num.py` was the dominant cost in the
-gridding pipeline. For 640K points it took 80ms per axis (160ms total for 2D).
-
-**What was slow:**
-- Called `is_invalid()` 3 times on the same data (3x O(N))
-- Created a `MaskedArray` just for subtraction (O(N) alloc)
-- Called `np.percentile()` twice with separate array filtering (2x O(N log N) sort)
-- Used Python list comprehension for switch filtering
-
-**Optimizations applied:**
-- Compute `is_invalid()` once, reuse the mask
-- Use direct numpy subtraction instead of MaskedArray (NaN propagates correctly)
-- Compute both percentiles in a single `np.percentile([lo, hi])` call (one sort)
-- Vectorized switch filtering with boolean mask instead of list comprehension
-- Use `np.nanmean` for sweep direction to handle NaN deltas
-- Fixed redundant `np.std()` call in `guess_grid_from_sweep_direction`
-
-**Per-function benchmark (800x800 dataset, 640K pts):**
+---
 
-| Function | Before | After | Speedup |
-|---|---|---|---|
-| `_find_switches()` per axis | 80 ms | 31 ms | **2.6x** |
-| `datadict_to_meshgrid()` | 175 ms | 71 ms | **2.5x** |
+## Part 3: Implemented — Plot UI Improvements
 
-**Per-node impact (data_refresh action):**
+### What Changed
 
-| Dataset | Grid Before | Grid After | Grid Spd | Total Before | Total After | Total Spd |
-|---|---|---|---|---|---|---|
-| large_1d_sweep (61 MB) | 574 ms | 243 ms | **2.4x** | 1107 ms | 792 ms | **1.4x** |
-| large_1d_3dep (61 MB) | 285 ms | 122 ms | **2.3x** | 547 ms | 386 ms | **1.4x** |
-| large_2d_square (15 MB) | 177 ms | 73 ms | **2.4x** | 304 ms | 199 ms | **1.5x** |
-| large_2d_2dep (15 MB) | 137 ms | 58 ms | **2.4x** | 238 ms | 156 ms | **1.5x** |
-| large_3d_1dep (24 MB) | 345 ms | 139 ms | **2.5x** | 497 ms | 292 ms | **1.7x** |
-| large_3d_2dep (15 MB) | 169 ms | 68 ms | **2.5x** | 250 ms | 142 ms | **1.8x** |
-| large_2d_interrupted (18 MB) | 89 ms | 38 ms | **2.3x** | 156 ms | 105 ms | **1.5x** |
-| large_2d_wide (18 MB) | 218 ms | 93 ms | **2.3x** | 375 ms | 249 ms | **1.5x** |
+**Grid layout for pyqtgraph subplots** (`plottr/plot/pyqtgraph/autoplot.py`):
+- Replaced single-column `QSplitter` with `QGridLayout` using near-square grid
+  (same formula as matplotlib: `nrows = int(n^0.5 + 0.5)`)
+- Many subplots now arrange as 2×2, 2×3, 4×4 etc. instead of stacking vertically
 
-**Cumulative speedup vs original master baseline (data_refresh):**
+**Scrollable plot area** (both backends):
+- "Scrollable" checkbox + min-height spinbox in the plot toolbar
+- Off by default; when enabled, plot area expands and becomes scrollable
+- Min height per row configurable (40–2000 px, default 75 px pyqtgraph / 100 px mpl)
 
-| Dataset | Master Baseline | Fully Optimized | Cumulative Speedup |
-|---|---|---|---|
-| large_1d_sweep (61 MB) | 2405 ms | 792 ms | **3.0x** |
-| large_1d_3dep (61 MB) | 1217 ms | 386 ms | **3.2x** |
-| large_2d_square (15 MB) | 697 ms | 199 ms | **3.5x** |
-| large_2d_2dep (15 MB) | 526 ms | 156 ms | **3.4x** |
-| large_3d_1dep (24 MB) | 813 ms | 292 ms | **2.8x** |
-| large_3d_2dep (15 MB) | 405 ms | 142 ms | **2.9x** |
-| large_2d_interrupted (18 MB) | 355 ms | 105 ms | **3.4x** |
-| large_2d_wide (18 MB) | 828 ms | 249 ms | **3.3x** |
-
-62 new tests added in `test_gridder_comprehensive.py`.
-
-### Real Experimental Data Benchmark (P1386BB_00BE_datasets.db)
-
-Benchmark on actual experimental datasets from a quantum device measurement campaign.
-These are production datasets with real-world complexity (16-21 dependents, nested array
-data, 4D parameter spaces).
-
-| Run | Experiment | Data Size | Deps | Axes | Cold Before | Cold After | Cold Spd | Steady Before | Steady After | Steady Spd |
-|---|---|---|---|---|---|---|---|---|---|---|
-| 720 | **QDstability** | 223 MB | 16 | 2 | 636 ms | 179 ms | **3.56x** | 555 ms | 189 ms | **2.93x** |
-| 713 | **TopogapStage2** | 152 MB | 21 | 19 | 688 ms | 279 ms | **2.47x** | 439 ms | 161 ms | **2.73x** |
-| 716 | **TopogapStage2** | 152 MB | 21 | 19 | 690 ms | 280 ms | **2.47x** | 432 ms | 164 ms | **2.64x** |
-| 710 | **QDtuning** | 14 MB | 16 | 2 | 52 ms | 31 ms | **1.70x** | 31 ms | 11 ms | **2.73x** |
-| 1496 | GateSweepProtocol | <1 MB | 1 | 1 | 22 ms | 20 ms | 1.09x | 2.5 ms | 1.1 ms | **2.27x** |
-
-Per-node breakdown on QDstability (223 MB, 16 deps):
-- **DataSelector**: 218 ms -> 17 ms (**12.8x**) -- largest_numtype O(1), copy optimized
-- **DataGridder**: 33 ms -> 16 ms (**2.1x**) -- _find_switches optimized
-- **XYSelector**: 264 ms -> 126 ms (**2.1x**) -- cascading copy removed
-
-Per-node breakdown on TopogapStage2 (152 MB, 21 deps, 4D):
-- **DataSelector**: 214 ms -> 16 ms (**13.4x**)
-- **DataGridder**: 34 ms -> 17 ms (**2.0x**)
-- **XYSelector**: 164 ms -> 109 ms (**1.5x**)
+**Plot backend selector** (`plottr/apps/inspectr.py`):
+- Combo box in inspectr toolbar to switch between matplotlib and pyqtgraph
+- Default: matplotlib. Applies to newly opened plot windows.
 
 ---
 
-## Inspectr Optimizations
+## Part 4: Not Implemented — Future Suggestions
 
-### Changes
+These were identified during analysis but not implemented in this PR.
 
-**Lazy snapshot loading** (`plottr/apps/inspectr.py`):
-- `RunInfo.setInfo()` no longer calls `dictToTreeWidgetItems()` on the snapshot dict
-- Instead shows a collapsed "QCoDeS Snapshot (click to expand)" placeholder
-- Full snapshot tree is built only when the user expands the item
-- `expandAll()` removed -- tree shows collapsed by default
+### HDF5 Data Loading (datadict_storage.py)
+- Lines 274 and 305 read the **entire HDF5 dataset into memory** just to get its shape
+- Fix: `ds.shape` instead of `ds[:].shape` — would reduce load time by 50–80%
 
-**Incremental DB refresh** (`plottr/apps/inspectr.py`):
-- `refreshDB()` now passes `start=len(dbdf)` to `get_runs_from_db()`
-- Only loads new datasets since last refresh, not the entire database
-- `DBLoaded()` merges incremental results into existing dataframe
-- First load still loads everything (`start=0`)
+### Signal Emission Overhead (node.py)
+- Up to 7 Qt signals emitted per node per data update
+- `dataFieldsChanged` is redundant (axes + deps)
+- Could consolidate to 1–2 batched signals
 
-**Result**: Clicking a dataset with a 5.9 MB snapshot: 951 ms -> 0.3 ms (**3,554x faster**)
-Refreshing a large DB: loads only new runs instead of re-iterating all 1496.
+### Fitter / Histogrammer / ScaleUnits Memoization
+- These nodes recompute results on every update even when inputs haven't changed
+- Could cache results keyed on data hash + parameters
 
-### Fast DB Loading via load_by_id (bypassing experiments/data_sets)
+### Pipeline Change Detection
+- No concept of "what changed" — every update re-processes all data through all nodes
+- For append-only monitoring, nodes could process only new data
 
-Added `get_runs_from_db_fast()` in `plottr/data/qcodes_dataset.py` which
-uses `load_by_id()` directly for each run, bypassing the expensive
-`experiments()` + `exp.data_sets()` enumeration in qcodes.
-
-The old approach is O(N^2) because `experiments()` loads all experiment
-objects, then `data_sets()` iterates each experiment's runs. For 1496 runs
-this takes 15+ minutes. The new approach is O(N) at ~3ms per run.
-
-| Approach | 23 runs | Projected 1496 runs |
-|---|---|---|
-| Old (experiments + data_sets) | 103 ms | 15+ minutes |
-| New (load_by_id loop) | 90 ms | ~5 seconds |
-| Incremental (3 new only) | 23 ms | 23 ms |
-
-**Note for qcodes team**: The ideal API would be a single function that returns
-lightweight run metadata (run_id, exp_name, sample_name, timestamps, guid,
-result_counter, metadata) for all or a range of runs, without creating full
-DataSet objects. Something like `get_run_overview(conn, start_id, end_id)`
-that does a single SQL query. This would reduce the per-run cost from 3ms
-(load_by_id) to <0.1ms (pure SQL).
+### QCoDeS API Suggestion
+The ideal API for inspectr would be a single function returning lightweight run metadata
+for all or a range of runs without creating full DataSet objects:
+```python
+get_run_overview(conn, start_id=None, end_id=None)
+# Returns: [{run_id, exp_name, sample_name, name, timestamps, guid, result_counter, metadata_keys}]
+```
+This would be a single SQL query completing in <1 ms for any database size.

From 913c672704d1fffc6f27efea0f748f1fdb458d95 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 16:01:11 +0200
Subject: [PATCH 23/64] fix: add plotWidgetClass parameter to
 autoplotQcodesDataset

The inspectr backend selector passes plotWidgetClass to autoplotQcodesDataset,
but the function signature was missing this parameter on the branch.
Also passes it through to QCAutoPlotMainWindow.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 0bece004..efb98f08 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -301,7 +301,8 @@ def setDefaults(self, data: DataDictBase) -> None:
 
 
 def autoplotQcodesDataset(log: bool = False,
-                          pathAndId: Union[Tuple[str, int], None] = None) \
+                          pathAndId: Union[Tuple[str, int], None] = None,
+                          plotWidgetClass: Optional[Type[PlotWidget]] = None) \
         -> Tuple[Flowchart, QCAutoPlotMainWindow]:
     """
     Sets up a simple flowchart consisting of a data selector,
@@ -331,7 +332,8 @@ def autoplotQcodesDataset(log: bool = False,
     win = QCAutoPlotMainWindow(fc, pathAndId=pathAndId,
                                widgetOptions=widgetOptions,
                                monitor=True,
-                               loaderName='Data loader')
+                               loaderName='Data loader',
+                               plotWidgetClass=plotWidgetClass)
     win.show()
 
     return fc, win

From d98a72a435bda812129710e5d0f28fdb764bc696 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 17:07:46 +0200
Subject: [PATCH 24/64] refactor: extract hint constant, clean up backend
 selector mapping

- Extract 'Select a date...' string into _SELECT_DATE_HINT constant
- Replace string-magic backend detection with explicit _PLOT_BACKENDS
  mapping (display name -> class)
- _backend_name_for_class() for reverse lookup
- Unknown plotWidgetClass added to combo with its class name as label
- _onBackendChanged uses the mapping instead of hardcoded imports

280 tests pass, 0 mypy errors.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 56 ++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 1b77762d..c48500ce 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -41,6 +41,31 @@
 
 LOGGER = plottrlog.getLogger('plottr.apps.inspectr')
 
+#: Hint text shown in the run list when no date is selected.
+_SELECT_DATE_HINT = "Select a date on the left to browse datasets."
+
+#: Mapping of display names to plot widget classes for the backend selector.
+#: Populated lazily on first access.
+_PLOT_BACKENDS: Dict[str, type] = {}
+
+
+def _get_plot_backends() -> Dict[str, type]:
+    """Lazily populate and return the backend mapping."""
+    if not _PLOT_BACKENDS:
+        from plottr.plot.mpl.autoplot import AutoPlot as MPLAutoPlot
+        from plottr.plot.pyqtgraph.autoplot import AutoPlot as PGAutoPlot
+        _PLOT_BACKENDS['matplotlib'] = MPLAutoPlot
+        _PLOT_BACKENDS['pyqtgraph'] = PGAutoPlot
+    return _PLOT_BACKENDS
+
+
+def _backend_name_for_class(cls: Optional[type]) -> Optional[str]:
+    """Return the display name for a plot widget class, or None if unknown."""
+    for name, backend_cls in _get_plot_backends().items():
+        if backend_cls is cls:
+            return name
+    return None
+
 
 ### Database inspector tool
 
@@ -152,7 +177,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
             "color: gray; font-size: 13pt; padding: 40px;"
         )
         self._overlayLabel.setAttribute(QtCore.Qt.WA_TransparentForMouseEvents)
-        self.setOverlayText("Select a date on the left to browse datasets.")
+        self.setOverlayText(_SELECT_DATE_HINT)
 
     def setOverlayText(self, text: str) -> None:
         """Show a centered overlay message. Pass empty string to hide."""
@@ -510,13 +535,18 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         backendLabel = QtWidgets.QLabel(" Plot backend: ")
         self.toolbar.addWidget(backendLabel)
         self.plotBackendSelector = QtWidgets.QComboBox()
-        self.plotBackendSelector.addItems(['matplotlib', 'pyqtgraph'])
+        backends = _get_plot_backends()
+        self.plotBackendSelector.addItems(list(backends.keys()))
         self.plotBackendSelector.setToolTip('Choose plotting backend for new plot windows')
         if plotWidgetClass is not None:
-            # If a specific backend was passed in, select it
-            class_name = plotWidgetClass.__name__
-            if 'pyqtgraph' in class_name.lower() or 'PG' in class_name:
-                self.plotBackendSelector.setCurrentText('pyqtgraph')
+            known_name = _backend_name_for_class(plotWidgetClass)
+            if known_name is not None:
+                self.plotBackendSelector.setCurrentText(known_name)
+            else:
+                # Unknown class: add it to the selector with its class name
+                label = plotWidgetClass.__name__
+                self.plotBackendSelector.addItem(label)
+                self.plotBackendSelector.setCurrentText(label)
         self.plotBackendSelector.currentTextChanged.connect(self._onBackendChanged)
         self.toolbar.addWidget(self.plotBackendSelector)
         # Sync the class with the initial combo selection
@@ -657,7 +687,7 @@ def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
         if dbdf.size == 0 and self.dbdf is not None:
             LOGGER.debug('DB reloaded with no new data. Skipping update.')
             self.runList.setOverlayText(
-                "Select a date on the left to browse datasets.")
+                _SELECT_DATE_HINT)
             return None
 
         if self.latestRunId is not None and self.dbdf is not None and dbdf.size > 0:
@@ -683,7 +713,7 @@ def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
                 "No datasets found in this database.")
         elif self.runList.topLevelItemCount() == 0:
             self.runList.setOverlayText(
-                "Select a date on the left to browse datasets.")
+                _SELECT_DATE_HINT)
 
         if self.latestRunId is not None and self.dbdf is not None and self.dbdf.size > 0:
             idxs = self.dbdf.index.values
@@ -766,7 +796,7 @@ def setDateSelection(self, dates: Sequence[str]) -> None:
             self._selected_dates = ()
             self.runList.clear()
             self.runList.setOverlayText(
-                "Select a date on the left to browse datasets.")
+                _SELECT_DATE_HINT)
 
     @Slot(int)
     def setRunSelection(self, runId: int) -> None:
@@ -803,12 +833,8 @@ def plotRun(self, runId: int) -> None:
 
     @Slot(str)
     def _onBackendChanged(self, backend: str) -> None:
-        if backend == 'pyqtgraph':
-            from plottr.plot.pyqtgraph.autoplot import AutoPlot as PGAutoPlot
-            self._plotWidgetClass = PGAutoPlot
-        else:
-            from plottr.plot.mpl.autoplot import AutoPlot as MPLAutoPlot
-            self._plotWidgetClass = MPLAutoPlot
+        backends = _get_plot_backends()
+        self._plotWidgetClass = backends.get(backend, self._plotWidgetClass)
 
     def setTag(self, item: QtWidgets.QTreeWidgetItem, tag: str) -> None:
         # set tag in the database

From 4428cf11fed1291ed0cae799b6124ace1dfb4173 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 17:49:53 +0200
Subject: [PATCH 25/64] =?UTF-8?q?fix:=20address=20code=20review=20?=
 =?UTF-8?q?=E2=80=94=20timestamps,=20connections,=20dead=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- _split_timestamp(): proper datetime parsing instead of string slicing
  for splitting qcodes timestamp strings into date/time components.
  Applied to both get_ds_info() and _ds_to_info_dict().
- get_runs_from_db_fast(): removed unnecessary initialise_or_create_database_at
  call, use same read_only pattern as get_runs_from_db.
- qcodes_db_overview: use conn_from_dbpath_or_conn from qcodes instead of
  raw sqlite3.connect. Remove unused get_last_run_id function.
- mpl/widgets: remove dead _scrollable attribute and fix setScrollable
  which had identical code in both branches.

280 tests pass, 0 mypy errors.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/qcodes_dataset.py     | 55 +++++++++++++++++--------------
 plottr/data/qcodes_db_overview.py | 32 +++++-------------
 plottr/plot/mpl/widgets.py        |  8 +----
 3 files changed, 41 insertions(+), 54 deletions(-)

diff --git a/plottr/data/qcodes_dataset.py b/plottr/data/qcodes_dataset.py
index 932e75cd..8b6911fd 100644
--- a/plottr/data/qcodes_dataset.py
+++ b/plottr/data/qcodes_dataset.py
@@ -6,6 +6,7 @@
 import os
 import sys
 from contextlib import closing
+from datetime import datetime
 from itertools import chain
 from operator import attrgetter
 from typing import Dict, List, Set, Union, TYPE_CHECKING, Any, Tuple, Optional, cast
@@ -43,6 +44,24 @@ def _get_names_of_standalone_parameters(paramspecs: List['ParamSpec']
     return standalones
 
 
+def _split_timestamp(ts: Optional[str]) -> Tuple[str, str]:
+    """Split a qcodes timestamp string into (date, time) components.
+
+    Uses datetime parsing instead of string slicing for robustness.
+
+    :param ts: timestamp string as returned by ``ds.run_timestamp()``
+        (typically ``"YYYY-MM-DD HH:MM:SS"``), or None.
+    :returns: (date_str, time_str) or ('', '') if ts is None or unparsable.
+    """
+    if ts is None:
+        return '', ''
+    try:
+        dt = datetime.fromisoformat(ts)
+        return dt.strftime('%Y-%m-%d'), dt.strftime('%H:%M:%S')
+    except (ValueError, TypeError):
+        return '', ''
+
+
 class IndependentParameterDict(TypedDict):
     unit: str
     label: str
@@ -126,20 +145,10 @@ def get_ds_info(ds: 'DataSetProtocol', get_structure: bool = True) -> DataSetInf
     as well (key is `structure' then).
     """
     _complete_ts = ds.completed_timestamp()
-    if _complete_ts is not None:
-        completed_date = _complete_ts[:10]
-        completed_time = _complete_ts[11:]
-    else:
-        completed_date = ''
-        completed_time = ''
+    completed_date, completed_time = _split_timestamp(_complete_ts)
 
     _start_ts = ds.run_timestamp()
-    if _start_ts is not None:
-        started_date = _start_ts[:10]
-        started_time = _start_ts[11:]
-    else:
-        started_date = ''
-        started_time = ''
+    started_date, started_time = _split_timestamp(_start_ts)
 
     if get_structure:
         structure: Optional[DataSetStructureDict] = get_ds_structure(ds)
@@ -225,16 +234,16 @@ def get_runs_from_db_as_dataframe(path: str) -> pd.DataFrame:
 
 def _ds_to_info_dict(ds: 'DataSetProtocol') -> DataSetInfoDict:
     """Extract inspectr-relevant info from a dataset without loading data or snapshot."""
-    _start = ds.run_timestamp()
-    _complete = ds.completed_timestamp()
+    started_date, started_time = _split_timestamp(ds.run_timestamp())
+    completed_date, completed_time = _split_timestamp(ds.completed_timestamp())
     return DataSetInfoDict(
         experiment=ds.exp_name,
         sample=ds.sample_name,
         name=ds.name,
-        started_date=_start[:10] if _start else '',
-        started_time=_start[11:] if _start else '',
-        completed_date=_complete[:10] if _complete else '',
-        completed_time=_complete[11:] if _complete else '',
+        started_date=started_date,
+        started_time=started_time,
+        completed_date=completed_date,
+        completed_time=completed_time,
         structure=None,
         records=ds.number_of_results,
         guid=ds.guid,
@@ -258,12 +267,10 @@ def get_runs_from_db_fast(path: str,
     :param progress_callback: optional callable(current, total) for progress.
     :returns: dictionary mapping run_id to dataset info.
     """
-    initialise_or_create_database_at(path)
-    read_only = sys.version_info >= (3, 11)
-    conn_kw: Dict[str, Any] = {'conn': None, 'path_to_db': path}
-    if read_only:
-        conn_kw['read_only'] = True
-    conn = conn_from_dbpath_or_conn(**conn_kw)
+    if sys.version_info >= (3, 11):
+        conn = conn_from_dbpath_or_conn(conn=None, path_to_db=path, read_only=True)
+    else:
+        conn = conn_from_dbpath_or_conn(conn=None, path_to_db=path)
 
     overview: Dict[int, DataSetInfoDict] = {}
     with closing(conn) as conn_:
diff --git a/plottr/data/qcodes_db_overview.py b/plottr/data/qcodes_db_overview.py
index 3e10e572..6c246520 100644
--- a/plottr/data/qcodes_db_overview.py
+++ b/plottr/data/qcodes_db_overview.py
@@ -10,14 +10,16 @@
 stable QCoDeS database schema (runs + experiments tables) which has not changed
 across many QCoDeS versions.
 """
+import sys
 import time
-import sqlite3
 import logging
 from contextlib import closing
 from typing import Dict, Optional, Tuple
 
 from typing_extensions import TypedDict
 
+from qcodes.dataset.sqlite.database import conn_from_dbpath_or_conn
+
 logger = logging.getLogger(__name__)
 
 
@@ -67,11 +69,10 @@ def get_db_overview(db_path: str,
     """
     overview: Dict[int, RunOverviewDict] = {}
 
-    try:
-        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
-    except sqlite3.OperationalError:
-        # Fallback for older sqlite versions without URI support
-        conn = sqlite3.connect(db_path)
+    if sys.version_info >= (3, 11):
+        conn = conn_from_dbpath_or_conn(conn=None, path_to_db=db_path, read_only=True)
+    else:
+        conn = conn_from_dbpath_or_conn(conn=None, path_to_db=db_path)
 
     with closing(conn) as c:
         # Check which ad-hoc metadata columns exist in the runs table.
@@ -79,7 +80,7 @@ def get_db_overview(db_path: str,
         try:
             col_info = c.execute('PRAGMA table_info(runs)').fetchall()
             col_names = {col[1] for col in col_info}
-        except sqlite3.OperationalError:
+        except Exception:
             col_names = set()
 
         has_inspectr_tag = 'inspectr_tag' in col_names
@@ -99,7 +100,7 @@ def get_db_overview(db_path: str,
 
         try:
             rows = c.execute(query, (start_run_id,)).fetchall()
-        except sqlite3.OperationalError as e:
+        except Exception as e:
             logger.warning(f"Could not query database overview: {e}")
             return overview
 
@@ -124,18 +125,3 @@ def get_db_overview(db_path: str,
             )
 
     return overview
-
-
-def get_last_run_id(db_path: str) -> Optional[int]:
-    """Get the highest run_id in the database, or None if empty."""
-    try:
-        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
-    except sqlite3.OperationalError:
-        conn = sqlite3.connect(db_path)
-
-    with closing(conn) as c:
-        try:
-            row = c.execute("SELECT MAX(run_id) FROM runs").fetchone()
-            return row[0] if row else None
-        except sqlite3.OperationalError:
-            return None
diff --git a/plottr/plot/mpl/widgets.py b/plottr/plot/mpl/widgets.py
index 9a430bf0..fbf6ba12 100644
--- a/plottr/plot/mpl/widgets.py
+++ b/plottr/plot/mpl/widgets.py
@@ -170,15 +170,9 @@ def __init__(self, parent: Optional[PlotWidgetContainer] = None):
         layout.addWidget(self.mplBar)
         self.setLayout(layout)
 
-        self._scrollable = True
-
     def setScrollable(self, scrollable: bool) -> None:
         """Enable or disable scrollable canvas for many subplots."""
-        self._scrollable = scrollable
-        if scrollable:
-            self._scrollArea.setWidgetResizable(True)
-        else:
-            self._scrollArea.setWidgetResizable(True)
+        if not scrollable:
             self.plot.setMinimumHeight(0)
 
     def setMeta(self, data: DataDictBase) -> None:

From 0ffe850d7d894e42068cf1d5027800e326fdec96 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 21:16:47 +0200
Subject: [PATCH 26/64] feat: LaTeX-to-HTML conversion for pyqtgraph plot
 labels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New module plottr/utils/latex.py using unicodeit (now a required dependency):
- Greek letters: \alpha -> α, \Omega -> Ω
- Math symbols: \hbar -> ℏ, \partial -> ∂, \int -> ∫
- Subscripts: V_{gate} -> V<sub>gate</sub> (HTML for text, Unicode for digits)
- Superscripts: x^{2} -> x² (Unicode), e^{iπ} -> e<sup>iπ</sup>
- Fractions: \frac{dI}{dV} -> dI/dV
- Square root: \sqrt{x} -> √x
- Dollar delimiters stripped

Applied to pyqtgraph axis labels in FigureMaker.formatSubPlot().
Falls through gracefully on plain text (no LaTeX = no change).

35 new tests including hypothesis property-based testing.
315 tests pass, 0 mypy errors.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/pyqtgraph/autoplot.py |   9 +-
 plottr/utils/latex.py             |  67 ++++++++++++
 pyproject.toml                    |   4 +-
 test/pytest/test_latex.py         | 170 ++++++++++++++++++++++++++++++
 4 files changed, 245 insertions(+), 5 deletions(-)
 create mode 100644 plottr/utils/latex.py
 create mode 100644 test/pytest/test_latex.py

diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index 9ad20a92..85e533ff 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -20,6 +20,7 @@
 from plottr import QtWidgets, QtCore, Signal, Slot, \
     config_entry as getcfg
 from plottr.data.datadict import DataDictBase
+from plottr.utils.latex import latex_to_html
 from .plots import Plot, PlotWithColorbar, PlotBase
 from ..base import AutoFigureMaker as BaseFM, PlotDataType, \
     PlotItem, ComplexRepresentation, determinePlotDataType, \
@@ -208,17 +209,17 @@ def formatSubPlot(self, subPlotId: int) -> None:
         # label the x axis if there's only one x label
         if isinstance(subPlot, Plot):
             if len(set(labels[0])) == 1:
-                subPlot.plot.setLabel("bottom", labels[0][0])
+                subPlot.plot.setLabel("bottom", latex_to_html(labels[0][0]))
 
         if isinstance(subPlot, PlotWithColorbar):
             if len(set(labels[0])) == 1:
-                subPlot.plot.setLabel("bottom", labels[0][0])
+                subPlot.plot.setLabel("bottom", latex_to_html(labels[0][0]))
 
             if len(set(labels[1])) == 1:
-                subPlot.plot.setLabel('left', labels[1][0])
+                subPlot.plot.setLabel('left', latex_to_html(labels[1][0]))
 
             if len(set(labels[2])) == 1:
-                subPlot.colorbar.setLabel('left', labels[2][0])
+                subPlot.colorbar.setLabel('left', latex_to_html(labels[2][0]))
 
     def plot(self, plotItem: PlotItem) -> None:
         """Plot the given item."""
diff --git a/plottr/utils/latex.py b/plottr/utils/latex.py
new file mode 100644
index 00000000..bd31dbe6
--- /dev/null
+++ b/plottr/utils/latex.py
@@ -0,0 +1,67 @@
+"""
+plottr.utils.latex — Lightweight LaTeX-to-HTML conversion for plot labels.
+
+Converts common LaTeX notation used in physics labels into HTML that Qt's
+rich text renderer can display (for pyqtgraph axis labels, titles, etc.).
+
+Uses ``unicodeit`` for Greek letters and math symbols, then converts
+subscript/superscript braces to HTML ``<sub>``/``<sup>`` tags.
+"""
+import re
+
+import unicodeit
+
+
+def latex_to_html(text: str) -> str:
+    """Convert LaTeX-like notation in *text* to HTML suitable for Qt rich text.
+
+    Handles:
+    - Greek letters: ``\\alpha`` → α, ``\\Omega`` → Ω, etc. (via unicodeit)
+    - Math symbols: ``\\hbar`` → ℏ, ``\\partial`` → ∂, ``\\infty`` → ∞, etc.
+    - Subscripts: ``V_{gate}`` → ``V<sub>gate</sub>``, ``g_{11}`` → ``g<sub>11</sub>``
+    - Superscripts: ``x^{2}`` → ``x<sup>2</sup>``, ``x^2`` → ``x<sup>2</sup>``
+    - Fractions: ``\\frac{dI}{dV}`` → ``dI/dV``
+    - Square root: ``\\sqrt{x}`` → ``√x``
+    - Dollar-sign math delimiters are stripped: ``$...$`` → contents
+
+    The function is idempotent on plain text (no LaTeX) and safe to call on
+    any string — if it contains no LaTeX commands, it passes through unchanged.
+
+    :param text: input string, possibly containing LaTeX notation.
+    :returns: HTML string suitable for Qt ``setHtml()`` or pyqtgraph labels.
+    """
+    if not text:
+        return text
+
+    s = text
+
+    # Strip dollar-sign math delimiters
+    s = re.sub(r'\$([^$]*)\$', r'\1', s)
+
+    # Convert \frac{a}{b} -> a/b (before unicodeit, which doesn't handle it)
+    s = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'\1/\2', s)
+
+    # Convert \sqrt{x} -> √x
+    s = re.sub(r'\\sqrt\{([^}]*)\}', '\u221a\\1', s)
+
+    # Convert \overline{x} -> x̅, \bar{x} -> x̅
+    s = re.sub(r'\\(?:overline|bar)\{([^}]*)\}', '\\1\u0305', s)
+
+    # Apply unicodeit for Greek letters and math symbols
+    s = unicodeit.replace(s)
+
+    # Convert remaining subscripts: _{...} -> <sub>...</sub>
+    # Must come after unicodeit (which handles single-char numeric subscripts)
+    s = re.sub(r'_\{([^}]*)\}', r'<sub>\1</sub>', s)
+    # Single character subscript without braces (only if not already converted)
+    s = re.sub(r'_([a-zA-Z0-9])', r'<sub>\1</sub>', s)
+
+    # Convert remaining superscripts: ^{...} -> <sup>...</sup>
+    s = re.sub(r'\^\{([^}]*)\}', r'<sup>\1</sup>', s)
+    # Single character superscript without braces
+    s = re.sub(r'\^([a-zA-Z0-9])', r'<sup>\1</sup>', s)
+
+    # Clean up any remaining backslashes from unrecognized commands
+    # (leave them as-is — better to show \foo than nothing)
+
+    return s
diff --git a/pyproject.toml b/pyproject.toml
index 38be689a..55b3a771 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
     "psutil",
     "watchdog",
     "pyzmq",
+    "unicodeit>=0.7.5",
 ]
 dynamic = ["version"]
 
@@ -103,7 +104,8 @@ module = [
     "matplotlib.*",
     "pyqtgraph.*",
     "xhistogram.*",
-    "ruamel.*"
+    "ruamel.*",
+    "unicodeit",
 ]
 ignore_missing_imports = true
 
diff --git a/test/pytest/test_latex.py b/test/pytest/test_latex.py
new file mode 100644
index 00000000..6c33c8ed
--- /dev/null
+++ b/test/pytest/test_latex.py
@@ -0,0 +1,170 @@
+"""Tests for plottr.utils.latex — LaTeX to HTML conversion."""
+import pytest
+from hypothesis import given, settings
+from hypothesis import strategies as st
+
+from plottr.utils.latex import latex_to_html
+
+
+class TestGreekLetters:
+    def test_alpha(self):
+        assert latex_to_html(r'\alpha') == '\u03b1'
+
+    def test_beta(self):
+        assert latex_to_html(r'\beta') == '\u03b2'
+
+    def test_gamma(self):
+        assert latex_to_html(r'\gamma') == '\u03b3'
+
+    def test_omega_upper(self):
+        assert latex_to_html(r'\Omega') == '\u03a9'
+
+    def test_mu(self):
+        assert latex_to_html(r'\mu') == '\u03bc'
+
+    def test_pi(self):
+        assert latex_to_html(r'\pi') == '\u03c0'
+
+
+class TestMathSymbols:
+    def test_hbar(self):
+        result = latex_to_html(r'\hbar')
+        # unicodeit may return ℏ (U+210F) or ħ (U+0127) depending on version
+        assert result in ('\u0127', '\u210f')
+
+    def test_partial(self):
+        assert latex_to_html(r'\partial') == '\u2202'
+
+    def test_infty(self):
+        assert latex_to_html(r'\infty') == '\u221e'
+
+    def test_int(self):
+        assert latex_to_html(r'\int') == '\u222b'
+
+    def test_sum(self):
+        assert latex_to_html(r'\sum') == '\u2211'
+
+
+class TestSubscripts:
+    def test_braced_text(self):
+        assert latex_to_html(r'V_{gate}') == 'V<sub>gate</sub>'
+
+    def test_braced_numbers(self):
+        # unicodeit converts numeric subscripts to Unicode (g₁₁)
+        result = latex_to_html(r'g_{11}')
+        assert 'g' in result and '1' in result.replace('\u2081', '1')
+
+    def test_braced_multi(self):
+        result = latex_to_html(r'I_{DS}')
+        assert 'I' in result and 'DS' in result
+
+    def test_single_char(self):
+        result = latex_to_html(r'x_0')
+        # May be Unicode subscript ₀ or HTML <sub>0</sub>
+        assert 'x' in result and ('0' in result or '\u2080' in result)
+
+    def test_mixed(self):
+        result = latex_to_html(r'V_{SD}')
+        assert 'SD' in result
+
+
+class TestSuperscripts:
+    def test_braced(self):
+        result = latex_to_html(r'x^{2}')
+        # unicodeit converts ^{2} to Unicode superscript ²
+        assert 'x' in result and ('2' in result or '\u00b2' in result)
+
+    def test_single_char(self):
+        result = latex_to_html(r'x^2')
+        assert 'x' in result and ('2' in result or '\u00b2' in result)
+
+    def test_braced_text(self):
+        result = latex_to_html(r'e^{i\pi}')
+        assert 'e' in result and '\u03c0' in result
+
+
+class TestFractions:
+    def test_simple(self):
+        assert latex_to_html(r'\frac{dI}{dV}') == 'dI/dV'
+
+    def test_with_symbols(self):
+        result = latex_to_html(r'\frac{\partial I}{\partial V}')
+        assert 'I' in result and 'V' in result and '/' in result
+
+
+class TestSqrt:
+    def test_simple(self):
+        result = latex_to_html(r'\sqrt{x}')
+        assert result == '\u221ax'
+
+
+class TestDollarDelimiters:
+    def test_stripped(self):
+        result = latex_to_html(r'$\alpha$')
+        assert result == '\u03b1'
+
+    def test_inline(self):
+        result = latex_to_html(r'Signal ($\mu$V)')
+        assert '\u03bc' in result
+        assert '$' not in result
+
+
+class TestPassthrough:
+    def test_plain_text(self):
+        assert latex_to_html('voltage') == 'voltage'
+
+    def test_empty(self):
+        assert latex_to_html('') == ''
+
+    def test_units(self):
+        assert latex_to_html('mV') == 'mV'
+
+    def test_with_parens(self):
+        assert latex_to_html('amplitude (V)') == 'amplitude (V)'
+
+
+class TestRealWorldLabels:
+    """Labels commonly seen in quantum physics experiments."""
+
+    def test_conductance(self):
+        result = latex_to_html(r'g_{11}')
+        assert '<sub>' in result or '\u2081' in result  # HTML or Unicode sub
+
+    def test_gate_voltage(self):
+        result = latex_to_html(r'V_{gate}')
+        assert 'gate' in result
+        assert '<sub>' in result
+
+    def test_bias_voltage(self):
+        result = latex_to_html(r'V_{SD}')
+        assert 'SD' in result
+
+    def test_differential_conductance(self):
+        result = latex_to_html(r'$\frac{dI}{dV}$')
+        assert 'dI/dV' in result
+
+    def test_magnetic_field(self):
+        result = latex_to_html(r'B_{field} (T)')
+        assert '<sub>' in result
+        assert '(T)' in result
+
+
+class TestHypothesis:
+    @given(st.text(min_size=0, max_size=100))
+    @settings(max_examples=200)
+    def test_never_crashes(self, text):
+        """latex_to_html should never raise on any input."""
+        result = latex_to_html(text)
+        assert isinstance(result, str)
+
+    @given(st.text(alphabet='abcdefghijklmnopqrstuvwxyz0123456789 .,()',
+                   min_size=0, max_size=50))
+    @settings(max_examples=100)
+    def test_plain_text_passthrough(self, text):
+        """Text without LaTeX commands should pass through mostly unchanged."""
+        result = latex_to_html(text)
+        # Without backslash, underscore, caret, or dollar, text should
+        # be largely preserved (unicodeit may convert some symbols like -)
+        if '\\' not in text and '_' not in text and '^' not in text and '$' not in text:
+            # Allow unicodeit to change some characters (e.g., - to −)
+            assert len(result) == len(text)

From 7cb6a2018cc7aec0f99b902dc1bd21cf9781c75d Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 21:27:03 +0200
Subject: [PATCH 27/64] fix: use HTML sub/sup instead of Unicode
 subscript/superscript
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert subscripts and superscripts to HTML tags BEFORE running
unicodeit, so they become <sub>11</sub> and <sup>2</sup> instead of
Unicode ₁₁ and ². HTML tags render more consistently in Qt rich text.

unicodeit still converts Greek letters and symbols inside the tags.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/utils/latex.py     | 22 +++++++++-------------
 test/pytest/test_latex.py | 22 ++++++++--------------
 2 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/plottr/utils/latex.py b/plottr/utils/latex.py
index bd31dbe6..2fa4790f 100644
--- a/plottr/utils/latex.py
+++ b/plottr/utils/latex.py
@@ -38,7 +38,7 @@ def latex_to_html(text: str) -> str:
     # Strip dollar-sign math delimiters
     s = re.sub(r'\$([^$]*)\$', r'\1', s)
 
-    # Convert \frac{a}{b} -> a/b (before unicodeit, which doesn't handle it)
+    # Convert \frac{a}{b} -> a/b
     s = re.sub(r'\\frac\{([^}]*)\}\{([^}]*)\}', r'\1/\2', s)
 
     # Convert \sqrt{x} -> √x
@@ -47,21 +47,17 @@ def latex_to_html(text: str) -> str:
     # Convert \overline{x} -> x̅, \bar{x} -> x̅
     s = re.sub(r'\\(?:overline|bar)\{([^}]*)\}', '\\1\u0305', s)
 
-    # Apply unicodeit for Greek letters and math symbols
-    s = unicodeit.replace(s)
-
-    # Convert remaining subscripts: _{...} -> <sub>...</sub>
-    # Must come after unicodeit (which handles single-char numeric subscripts)
+    # Convert subscripts and superscripts to HTML BEFORE unicodeit,
+    # so unicodeit doesn't turn them into Unicode sub/superscript chars.
+    # Braced: _{...} -> <sub>...</sub>, ^{...} -> <sup>...</sup>
     s = re.sub(r'_\{([^}]*)\}', r'<sub>\1</sub>', s)
-    # Single character subscript without braces (only if not already converted)
-    s = re.sub(r'_([a-zA-Z0-9])', r'<sub>\1</sub>', s)
-
-    # Convert remaining superscripts: ^{...} -> <sup>...</sup>
     s = re.sub(r'\^\{([^}]*)\}', r'<sup>\1</sup>', s)
-    # Single character superscript without braces
+    # Single character: _x -> <sub>x</sub>, ^x -> <sup>x</sup>
+    s = re.sub(r'_([a-zA-Z0-9])', r'<sub>\1</sub>', s)
     s = re.sub(r'\^([a-zA-Z0-9])', r'<sup>\1</sup>', s)
 
-    # Clean up any remaining backslashes from unrecognized commands
-    # (leave them as-is — better to show \foo than nothing)
+    # Apply unicodeit for Greek letters and math symbols.
+    # Runs after sub/sup conversion so it processes content inside tags too.
+    s = unicodeit.replace(s)
 
     return s
diff --git a/test/pytest/test_latex.py b/test/pytest/test_latex.py
index 6c33c8ed..f05f8456 100644
--- a/test/pytest/test_latex.py
+++ b/test/pytest/test_latex.py
@@ -50,37 +50,31 @@ def test_braced_text(self):
         assert latex_to_html(r'V_{gate}') == 'V<sub>gate</sub>'
 
     def test_braced_numbers(self):
-        # unicodeit converts numeric subscripts to Unicode (g₁₁)
-        result = latex_to_html(r'g_{11}')
-        assert 'g' in result and '1' in result.replace('\u2081', '1')
+        assert latex_to_html(r'g_{11}') == 'g<sub>11</sub>'
 
     def test_braced_multi(self):
-        result = latex_to_html(r'I_{DS}')
-        assert 'I' in result and 'DS' in result
+        assert latex_to_html(r'I_{DS}') == 'I<sub>DS</sub>'
 
     def test_single_char(self):
-        result = latex_to_html(r'x_0')
-        # May be Unicode subscript ₀ or HTML <sub>0</sub>
-        assert 'x' in result and ('0' in result or '\u2080' in result)
+        assert latex_to_html(r'x_0') == 'x<sub>0</sub>'
 
     def test_mixed(self):
         result = latex_to_html(r'V_{SD}')
+        assert '<sub>' in result
         assert 'SD' in result
 
 
 class TestSuperscripts:
     def test_braced(self):
-        result = latex_to_html(r'x^{2}')
-        # unicodeit converts ^{2} to Unicode superscript ²
-        assert 'x' in result and ('2' in result or '\u00b2' in result)
+        assert latex_to_html(r'x^{2}') == 'x<sup>2</sup>'
 
     def test_single_char(self):
-        result = latex_to_html(r'x^2')
-        assert 'x' in result and ('2' in result or '\u00b2' in result)
+        assert latex_to_html(r'x^2') == 'x<sup>2</sup>'
 
     def test_braced_text(self):
         result = latex_to_html(r'e^{i\pi}')
-        assert 'e' in result and '\u03c0' in result
+        assert '<sup>' in result
+        assert '\u03c0' in result
 
 
 class TestFractions:

From 2c92484b9b4688bcb71d7338b5e0b22af0efdc8e Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Mon, 20 Apr 2026 21:35:26 +0200
Subject: [PATCH 28/64] fix: only apply LaTeX conversion when actual LaTeX
 syntax is present

Plain strings with underscores (e.g. gate_voltage, channel_1_amplitude)
now pass through unchanged. Conversion only triggers when the string
contains backslash commands (\alpha), dollar delimiters ($...$), or
braced sub/superscripts (_{...}, ^{...}).

Also drops single-char bare sub/sup patterns (_x, ^x) which were too
aggressive on ordinary identifiers.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/utils/latex.py     | 32 +++++++++++++++++++++-----------
 test/pytest/test_latex.py | 34 +++++++++++++++++++++-------------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/plottr/utils/latex.py b/plottr/utils/latex.py
index 2fa4790f..c9345f47 100644
--- a/plottr/utils/latex.py
+++ b/plottr/utils/latex.py
@@ -12,27 +12,41 @@
 import unicodeit
 
 
+_LATEX_INDICATOR = re.compile(
+    r'\\[a-zA-Z]'   # backslash command  (\alpha, \frac, …)
+    r'|\$'           # dollar-sign math delimiter
+    r'|_\{'          # braced subscript   _{...}
+    r'|\^\{'         # braced superscript ^{...}
+)
+
+
 def latex_to_html(text: str) -> str:
     """Convert LaTeX-like notation in *text* to HTML suitable for Qt rich text.
 
+    The conversion is only applied when the string contains recognisable LaTeX
+    syntax — backslash commands (``\\alpha``), dollar-sign delimiters
+    (``$…$``), or braced sub/superscripts (``_{…}``, ``^{…}``).  Plain text
+    with ordinary underscores (e.g. ``gate_voltage``) passes through unchanged.
+
     Handles:
     - Greek letters: ``\\alpha`` → α, ``\\Omega`` → Ω, etc. (via unicodeit)
     - Math symbols: ``\\hbar`` → ℏ, ``\\partial`` → ∂, ``\\infty`` → ∞, etc.
-    - Subscripts: ``V_{gate}`` → ``V<sub>gate</sub>``, ``g_{11}`` → ``g<sub>11</sub>``
-    - Superscripts: ``x^{2}`` → ``x<sup>2</sup>``, ``x^2`` → ``x<sup>2</sup>``
+    - Subscripts: ``V_{gate}`` → ``V<sub>gate</sub>``
+    - Superscripts: ``x^{2}`` → ``x<sup>2</sup>``
     - Fractions: ``\\frac{dI}{dV}`` → ``dI/dV``
     - Square root: ``\\sqrt{x}`` → ``√x``
     - Dollar-sign math delimiters are stripped: ``$...$`` → contents
 
-    The function is idempotent on plain text (no LaTeX) and safe to call on
-    any string — if it contains no LaTeX commands, it passes through unchanged.
-
     :param text: input string, possibly containing LaTeX notation.
     :returns: HTML string suitable for Qt ``setHtml()`` or pyqtgraph labels.
     """
     if not text:
         return text
 
+    # Only enter the conversion pipeline when the string looks like LaTeX.
+    if not _LATEX_INDICATOR.search(text):
+        return text
+
     s = text
 
     # Strip dollar-sign math delimiters
@@ -47,17 +61,13 @@ def latex_to_html(text: str) -> str:
     # Convert \overline{x} -> x̅, \bar{x} -> x̅
     s = re.sub(r'\\(?:overline|bar)\{([^}]*)\}', '\\1\u0305', s)
 
-    # Convert subscripts and superscripts to HTML BEFORE unicodeit,
+    # Convert braced subscripts and superscripts to HTML BEFORE unicodeit,
     # so unicodeit doesn't turn them into Unicode sub/superscript chars.
-    # Braced: _{...} -> <sub>...</sub>, ^{...} -> <sup>...</sup>
+    # Only braced forms (_{...}, ^{...}) — bare underscores are left alone.
     s = re.sub(r'_\{([^}]*)\}', r'<sub>\1</sub>', s)
     s = re.sub(r'\^\{([^}]*)\}', r'<sup>\1</sup>', s)
-    # Single character: _x -> <sub>x</sub>, ^x -> <sup>x</sup>
-    s = re.sub(r'_([a-zA-Z0-9])', r'<sub>\1</sub>', s)
-    s = re.sub(r'\^([a-zA-Z0-9])', r'<sup>\1</sup>', s)
 
     # Apply unicodeit for Greek letters and math symbols.
-    # Runs after sub/sup conversion so it processes content inside tags too.
     s = unicodeit.replace(s)
 
     return s
diff --git a/test/pytest/test_latex.py b/test/pytest/test_latex.py
index f05f8456..def0b01b 100644
--- a/test/pytest/test_latex.py
+++ b/test/pytest/test_latex.py
@@ -55,9 +55,6 @@ def test_braced_numbers(self):
     def test_braced_multi(self):
         assert latex_to_html(r'I_{DS}') == 'I<sub>DS</sub>'
 
-    def test_single_char(self):
-        assert latex_to_html(r'x_0') == 'x<sub>0</sub>'
-
     def test_mixed(self):
         result = latex_to_html(r'V_{SD}')
         assert '<sub>' in result
@@ -68,9 +65,6 @@ class TestSuperscripts:
     def test_braced(self):
         assert latex_to_html(r'x^{2}') == 'x<sup>2</sup>'
 
-    def test_single_char(self):
-        assert latex_to_html(r'x^2') == 'x<sup>2</sup>'
-
     def test_braced_text(self):
         result = latex_to_html(r'e^{i\pi}')
         assert '<sup>' in result
@@ -116,13 +110,30 @@ def test_units(self):
     def test_with_parens(self):
         assert latex_to_html('amplitude (V)') == 'amplitude (V)'
 
+    def test_plain_underscore(self):
+        """Plain underscores (no braces) should NOT become subscripts."""
+        assert latex_to_html('gate_voltage') == 'gate_voltage'
+
+    def test_multiple_underscores(self):
+        assert latex_to_html('my_long_variable_name') == 'my_long_variable_name'
+
+    def test_snake_case_with_numbers(self):
+        assert latex_to_html('channel_1_amplitude') == 'channel_1_amplitude'
+
+    def test_plain_caret(self):
+        """Plain carets (no braces) in non-LaTeX strings pass through."""
+        assert latex_to_html('x^2') == 'x^2'
+
+    def test_plain_underscore_single(self):
+        assert latex_to_html('x_0') == 'x_0'
+
 
 class TestRealWorldLabels:
     """Labels commonly seen in quantum physics experiments."""
 
     def test_conductance(self):
         result = latex_to_html(r'g_{11}')
-        assert '<sub>' in result or '\u2081' in result  # HTML or Unicode sub
+        assert '<sub>' in result and '11' in result
 
     def test_gate_voltage(self):
         result = latex_to_html(r'V_{gate}')
@@ -155,10 +166,7 @@ def test_never_crashes(self, text):
                    min_size=0, max_size=50))
     @settings(max_examples=100)
     def test_plain_text_passthrough(self, text):
-        """Text without LaTeX commands should pass through mostly unchanged."""
+        """Text without LaTeX indicators should pass through unchanged."""
         result = latex_to_html(text)
-        # Without backslash, underscore, caret, or dollar, text should
-        # be largely preserved (unicodeit may convert some symbols like -)
-        if '\\' not in text and '_' not in text and '^' not in text and '$' not in text:
-            # Allow unicodeit to change some characters (e.g., - to −)
-            assert len(result) == len(text)
+        # Without backslash-letter, $, _{, or ^{, text is returned as-is.
+        assert result == text

From 9e63038b7f453c0d2271486f3d55871f505919a2 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Tue, 21 Apr 2026 09:41:12 +0200
Subject: [PATCH 29/64] fix: resolve all mypy errors and add hypothesis to test
 deps

- Add qcodes.utils.plotting to mypy ignore_missing_imports (module
  removed in newer qcodes)
- Add hypothesis to test_requirements.txt for CI
- Fix rettype initialization in combine_datadicts (type narrowing)
- Fix plotOptions unpacking when None (mpl autoplot)
- Fix widgetConnection.get type mismatch (autonode)
- Remove stale type: ignore comments (inspectr)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py     | 4 ++--
 plottr/data/datadict.py     | 2 +-
 plottr/node/autonode.py     | 2 +-
 plottr/plot/mpl/autoplot.py | 2 +-
 pyproject.toml              | 1 +
 test_requirements.txt       | 1 +
 6 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index c48500ce..ae8b2abc 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -204,12 +204,12 @@ def showContextMenu(self, position: QtCore.QPoint) -> None:
         copy_action = menu.addAction(copy_icon, "Copy")
 
         window = cast(QCodesDBInspector, self.window())
-        starAction: QtWidgets.QAction = window.starAction # type: ignore[has-type]
+        starAction: QtWidgets.QAction = window.starAction
 
         starAction.setText('Star' if current_tag_char != self.tag_dict['star'] else 'Unstar')
         menu.addAction(starAction)
 
-        crossAction: QtWidgets.QAction = window.crossAction # type: ignore[has-type]
+        crossAction: QtWidgets.QAction = window.crossAction
         crossAction.setText('Cross' if current_tag_char != self.tag_dict['cross'] else 'Uncross')
         menu.addAction(crossAction)
 
diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index 6aeb12dd..f00dde27 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -1475,7 +1475,7 @@ def combine_datadicts(*dicts: DataDict) -> Union[DataDictBase, DataDict]:
     #   by earlier mismatches)
 
     ret = None
-    rettype = None
+    rettype: type = type(dicts[0]) if dicts else DataDictBase
 
     for d in dicts:
         if ret is None:
diff --git a/plottr/node/autonode.py b/plottr/node/autonode.py
index d47af0fc..1d0e4dd4 100644
--- a/plottr/node/autonode.py
+++ b/plottr/node/autonode.py
@@ -59,7 +59,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
     def addOption(self, name: str, specs: Dict[str, Any], confirm: bool) -> None:
         optionType = specs.get('type', None)
         widget = None
-        func = self.widgetConnection.get(optionType, None)
+        func = self.widgetConnection.get(optionType)  # type: ignore[arg-type]
         if func is not None:
             widget = func(self, name, specs, confirm)
         layout = cast(QtWidgets.QFormLayout, self.layout())
diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index 0c8a9c30..e63498ae 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -122,7 +122,7 @@ def plotLine(self, plotItem: PlotItem) -> Optional[List[ScalarMappable]]:
         assert len(plotItem.data) == 2
         lbl = plotItem.labels[-1] if isinstance(plotItem.labels, list) and len(plotItem.labels) > 0 else ''
         x, y = plotItem.data
-        return axes[0].plot(x, y, label=lbl, **plotItem.plotOptions)
+        return axes[0].plot(x, y, label=lbl, **(plotItem.plotOptions or {}))
 
     def plotImage(self, plotItem: PlotItem) -> Optional[ScalarMappable]:
         assert len(plotItem.data) == 3
diff --git a/pyproject.toml b/pyproject.toml
index 55b3a771..82560a0e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,6 +103,7 @@ module = [
     "lmfit",
     "matplotlib.*",
     "pyqtgraph.*",
+    "qcodes.utils.plotting",
     "xhistogram.*",
     "ruamel.*",
     "unicodeit",
diff --git a/test_requirements.txt b/test_requirements.txt
index b1c3cc6f..b4be99fd 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -1,6 +1,7 @@
 qcodes
 pytest
 pytest-qt
+hypothesis
 mypy==1.13.0
 PyQt5-stubs==5.15.6.0
 pandas-stubs

From 6d141bf4779a0dacd212ec0ed1299704b5c49c92 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Tue, 21 Apr 2026 09:47:45 +0200
Subject: [PATCH 30/64] fix: mypy cross-stubs compat (PyQt5-stubs in CI vs
 PyQt6 locally)

- Restore type: ignore[has-type] on inspectr starAction/crossAction
  (needed by PyQt5-stubs, unused with PyQt6)
- Add type: ignore[arg-type] on autonode widgetConnection.get
  (needed with PyQt6, unused with PyQt5-stubs)
- Set warn_unused_ignores = false since project targets both Qt bindings

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 4 ++--
 plottr/node/autonode.py | 2 +-
 pyproject.toml          | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index ae8b2abc..20e4ecca 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -204,12 +204,12 @@ def showContextMenu(self, position: QtCore.QPoint) -> None:
         copy_action = menu.addAction(copy_icon, "Copy")
 
         window = cast(QCodesDBInspector, self.window())
-        starAction: QtWidgets.QAction = window.starAction
+        starAction: QtWidgets.QAction = window.starAction  # type: ignore[has-type]
 
         starAction.setText('Star' if current_tag_char != self.tag_dict['star'] else 'Unstar')
         menu.addAction(starAction)
 
-        crossAction: QtWidgets.QAction = window.crossAction
+        crossAction: QtWidgets.QAction = window.crossAction  # type: ignore[has-type]
         crossAction.setText('Cross' if current_tag_char != self.tag_dict['cross'] else 'Uncross')
         menu.addAction(crossAction)
 
diff --git a/plottr/node/autonode.py b/plottr/node/autonode.py
index 1d0e4dd4..d4754354 100644
--- a/plottr/node/autonode.py
+++ b/plottr/node/autonode.py
@@ -59,7 +59,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
     def addOption(self, name: str, specs: Dict[str, Any], confirm: bool) -> None:
         optionType = specs.get('type', None)
         widget = None
-        func = self.widgetConnection.get(optionType)  # type: ignore[arg-type]
+        func = self.widgetConnection.get(optionType, None)  # type: ignore[arg-type]
         if func is not None:
             widget = func(self, name, specs, confirm)
         layout = cast(QtWidgets.QFormLayout, self.layout())
diff --git a/pyproject.toml b/pyproject.toml
index 82560a0e..d0acba5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,7 +80,7 @@ build_py = "versioningit.cmdclass.build_py"
 [tool.mypy]
 strict_optional = true
 show_column_numbers = true
-warn_unused_ignores = true
+warn_unused_ignores = false
 warn_unused_configs = true
 warn_redundant_casts = true
 no_implicit_optional = true

From 6446f9269eb9863e971269ea0eb927e4fd98f32a Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Tue, 21 Apr 2026 12:13:33 +0200
Subject: [PATCH 31/64] fix: update qcodes import to current public API, remove
 stale ignore

qcodes moved find_scale_and_prefix from qcodes.utils.plotting to
qcodes.plotting.axis_labels. Update the import chain to try the
current location first, then the old one, then the local fallback.

Remove qcodes.utils.plotting from mypy ignore_missing_imports since
the import is now handled via try/except with proper type: ignore
annotations.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/node/scaleunits.py | 8 +++++---
 pyproject.toml            | 1 -
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/plottr/node/scaleunits.py b/plottr/node/scaleunits.py
index d04ee8b7..627097b7 100644
--- a/plottr/node/scaleunits.py
+++ b/plottr/node/scaleunits.py
@@ -2,10 +2,12 @@
 from typing import Optional, Dict
 
 try:
-    from qcodes.utils.plotting import find_scale_and_prefix
+    from qcodes.plotting.axis_labels import find_scale_and_prefix
 except ImportError:
-    # fallback for qcodes < 0.21
-    from plottr.utils.find_scale_and_prefix import find_scale_and_prefix
+    try:
+        from qcodes.utils.plotting import find_scale_and_prefix  # type: ignore[import-not-found, no-redef]
+    except ImportError:
+        from plottr.utils.find_scale_and_prefix import find_scale_and_prefix  # type: ignore[no-redef]
 
 from plottr import QtWidgets, Signal, Slot
 from plottr.node import Node, NodeWidget, updateOption
diff --git a/pyproject.toml b/pyproject.toml
index d0acba5c..6d7ff870 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,7 +103,6 @@ module = [
     "lmfit",
     "matplotlib.*",
     "pyqtgraph.*",
-    "qcodes.utils.plotting",
     "xhistogram.*",
     "ruamel.*",
     "unicodeit",

From 2513c11151e961c9c96acb2df166a7f1e439b088 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Tue, 21 Apr 2026 13:15:25 +0200
Subject: [PATCH 32/64] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20ret?=
 =?UTF-8?q?type=20assert,=20scaleunits=20import,=20mypy=20config?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Revert rettype initialization to None + assert before use (cleaner
  than pre-computing a default that changes the original semantics)
- Remove dead local fallback for qcodes < 0.21 (min supported is
  0.54.1); keep old-path fallback with version comment for < 0.46
- Restore warn_unused_ignores = true globally; add per-module override
  (warn_unused_ignores = false) only for modules with cross-Qt-backend
  type: ignore comments (inspectr, autonode, scaleunits)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/datadict.py   |  3 ++-
 plottr/node/scaleunits.py |  6 ++----
 pyproject.toml            | 12 +++++++++++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index f00dde27..e1b5e17f 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -1475,7 +1475,7 @@ def combine_datadicts(*dicts: DataDict) -> Union[DataDictBase, DataDict]:
     #   by earlier mismatches)
 
     ret = None
-    rettype: type = type(dicts[0]) if dicts else DataDictBase
+    rettype: Optional[type] = None
 
     for d in dicts:
         if ret is None:
@@ -1491,6 +1491,7 @@ def combine_datadicts(*dicts: DataDict) -> Union[DataDictBase, DataDict]:
                     rettype = DataDictBase
             else:
                 rettype = DataDictBase
+            assert rettype is not None
             ret = rettype(**ret)
 
             # First, parse the axes in the to-be-added ddict.
diff --git a/plottr/node/scaleunits.py b/plottr/node/scaleunits.py
index 627097b7..c6c7bd53 100644
--- a/plottr/node/scaleunits.py
+++ b/plottr/node/scaleunits.py
@@ -4,10 +4,8 @@
 try:
     from qcodes.plotting.axis_labels import find_scale_and_prefix
 except ImportError:
-    try:
-        from qcodes.utils.plotting import find_scale_and_prefix  # type: ignore[import-not-found, no-redef]
-    except ImportError:
-        from plottr.utils.find_scale_and_prefix import find_scale_and_prefix  # type: ignore[no-redef]
+    # fallback for qcodes < 0.46 where the function lived under utils
+    from qcodes.utils.plotting import find_scale_and_prefix  # type: ignore[import-not-found, no-redef]
 
 from plottr import QtWidgets, Signal, Slot
 from plottr.node import Node, NodeWidget, updateOption
diff --git a/pyproject.toml b/pyproject.toml
index 6d7ff870..313f3c79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,7 +80,7 @@ build_py = "versioningit.cmdclass.build_py"
 [tool.mypy]
 strict_optional = true
 show_column_numbers = true
-warn_unused_ignores = false
+warn_unused_ignores = true
 warn_unused_configs = true
 warn_redundant_casts = true
 no_implicit_optional = true
@@ -109,6 +109,16 @@ module = [
 ]
 ignore_missing_imports = true
 
+# These modules contain type: ignore comments that are needed by
+# PyQt5-stubs (CI) but unused with PyQt6 (or vice versa).
+[[tool.mypy.overrides]]
+module = [
+    "plottr.apps.inspectr",
+    "plottr.node.autonode",
+    "plottr.node.scaleunits",
+]
+warn_unused_ignores = false
+
 [tool.versioningit]
 default-version = "0.0"
 

From f7ea4092c10ebc3adf9bc5b4bd2f70446f172f39 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Tue, 21 Apr 2026 13:46:32 +0200
Subject: [PATCH 33/64] =?UTF-8?q?fix:=20address=20PR=20review=20=E2=80=94?=
 =?UTF-8?q?=206=20Copilot=20review=20comments?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Restore local fallback for find_scale_and_prefix when qcodes is not
  installed (qcodes is an optional dependency)
- Move context menu setup from resizeEvent to __init__ to avoid
  accumulating signal connections on every resize
- Replace per-row concat loop in DBLoaded with vectorized update() +
  single concat for new rows
- Clear grid layout in _arrangeGrid before re-adding to avoid stale
  layout items on repeated calls
- Fix setScrollable to use _minPlotHeight instead of hard-coded 75
- Remove unused deepcopy import from test

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py                  | 18 ++++++++++--------
 plottr/node/scaleunits.py                |  8 ++++++--
 plottr/plot/pyqtgraph/autoplot.py        | 10 ++++++++--
 test/pytest/test_round2_optimizations.py |  1 -
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 20e4ecca..8d8d7e45 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -179,6 +179,9 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None):
         self._overlayLabel.setAttribute(QtCore.Qt.WA_TransparentForMouseEvents)
         self.setOverlayText(_SELECT_DATE_HINT)
 
+        self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
+        self.customContextMenuRequested.connect(self.showContextMenu)
+
     def setOverlayText(self, text: str) -> None:
         """Show a centered overlay message. Pass empty string to hide."""
         self._overlayLabel.setText(text)
@@ -188,9 +191,6 @@ def resizeEvent(self, event: QtGui.QResizeEvent) -> None:
         super().resizeEvent(event)
         self._overlayLabel.setGeometry(self.viewport().rect())
 
-        self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
-        self.customContextMenuRequested.connect(self.showContextMenu)
-
     @Slot(QtCore.QPoint)
     def showContextMenu(self, position: QtCore.QPoint) -> None:
         model_index = self.indexAt(position)
@@ -692,12 +692,14 @@ def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
 
         if self.latestRunId is not None and self.dbdf is not None and dbdf.size > 0:
             # Incremental load: merge new rows into existing dataframe
+            existing_mask = dbdf.index.isin(self.dbdf.index)
             # Update existing rows (e.g., completed_date may have changed)
-            for idx in dbdf.index:
-                if idx in self.dbdf.index:
-                    self.dbdf.loc[idx] = dbdf.loc[idx]
-                else:
-                    self.dbdf = pandas.concat([self.dbdf, dbdf.loc[[idx]]])
+            if existing_mask.any():
+                self.dbdf.update(dbdf.loc[existing_mask])
+            # Append all truly-new rows in a single concat
+            new_rows = dbdf.loc[~existing_mask]
+            if not new_rows.empty:
+                self.dbdf = pandas.concat([self.dbdf, new_rows])
         elif dbdf.size > 0:
             self.dbdf = dbdf
         else:
diff --git a/plottr/node/scaleunits.py b/plottr/node/scaleunits.py
index c6c7bd53..22f5a220 100644
--- a/plottr/node/scaleunits.py
+++ b/plottr/node/scaleunits.py
@@ -4,8 +4,12 @@
 try:
     from qcodes.plotting.axis_labels import find_scale_and_prefix
 except ImportError:
-    # fallback for qcodes < 0.46 where the function lived under utils
-    from qcodes.utils.plotting import find_scale_and_prefix  # type: ignore[import-not-found, no-redef]
+    try:
+        # fallback for qcodes < 0.46 where the function lived under utils
+        from qcodes.utils.plotting import find_scale_and_prefix  # type: ignore[import-not-found, no-redef]
+    except ImportError:
+        # fallback when qcodes is not installed (it is an optional dependency)
+        from plottr.utils.find_scale_and_prefix import find_scale_and_prefix  # type: ignore[no-redef]
 
 from plottr import QtWidgets, Signal, Slot
 from plottr.node import Node, NodeWidget, updateOption
diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index 85e533ff..803db297 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -93,6 +93,10 @@ def _arrangeGrid(self, min_plot_height: Optional[int] = None) -> None:
 
         self._gridWidget.setMinimumHeight(nrows * min_plot_height)
 
+        # Remove existing items before re-adding to avoid stale layout entries
+        while self._gridLayout.count():
+            self._gridLayout.takeAt(0)
+
         for i, plot in enumerate(self.subPlots):
             row = i // ncols
             col = i % ncols
@@ -102,13 +106,15 @@ def setScrollable(self, scrollable: bool) -> None:
         """Enable or disable scroll area around the plot grid."""
         if scrollable:
             self._scrollArea.setWidgetResizable(True)
-            self._gridWidget.setMinimumHeight(0)
             # Re-apply grid min height if we have plots
             if self.subPlots:
                 n = len(self.subPlots)
                 nrows = max(1, int(n ** 0.5 + 0.5))
-                self._gridWidget.setMinimumHeight(nrows * 75)
+                self._gridWidget.setMinimumHeight(nrows * self._minPlotHeight)
+            else:
+                self._gridWidget.setMinimumHeight(0)
         else:
+            # Disable scrolling: widget resizes with the scroll area
             self._scrollArea.setWidgetResizable(True)
             self._gridWidget.setMinimumHeight(0)
 
diff --git a/test/pytest/test_round2_optimizations.py b/test/pytest/test_round2_optimizations.py
index 42a94e18..00044147 100644
--- a/test/pytest/test_round2_optimizations.py
+++ b/test/pytest/test_round2_optimizations.py
@@ -7,7 +7,6 @@
 """
 import numpy as np
 import pytest
-from copy import deepcopy
 
 from plottr.data.datadict import (
     DataDict, MeshgridDataDict, meshgrid_to_datadict, datadict_to_dataframe,

From 9e99ef01d111351338e963b18eca237affe35661 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 30 Apr 2026 14:51:39 +0200
Subject: [PATCH 34/64] docs: add real-data profiling results (large complex 2D
 dataset)

Profiled plottr pipeline with actual measurement data ([redacted]
downloaded via qdwsdk. Key findings:

- is_invalid() is 44x slower than needed (a==None on numeric arrays)
- ds_to_datadict takes ~1s steady state (qcodes deserialization)
- datadict_to_meshgrid 122ms (avoidable when shape metadata exists)
- pyqtgraph eq() adds 24ms per pipeline trigger
- mag+phase complex splitting 31ms (inherent cost)

Added prioritized improvement suggestions to PERFORMANCE_PLAN.md.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 89 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 096d2558..d4d80285 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -184,3 +184,92 @@ get_run_overview(conn, start_id=None, end_id=None)
 # Returns: [{run_id, exp_name, sample_name, name, timestamps, guid, result_counter, metadata_keys}]
 ```
 This would be a single SQL query completing in <1 ms for any database size.
+
+---
+
+## Part 5: Profiling with Real Data (963×1001 complex RF measurement)
+
+Profiled using dataset `d2712e0a-0c00-0012-0000-019dc443d6e4` (downloaded via `qdwsdk`):
+a 963×1001 complex128 2D gate-gate sweep (Vrf_6 vs plunger and depletion gate voltages).
+Device: L1033AA_00BE_Mv22v3, ~12.5 MB on disk, ~15 MB in memory as complex128.
+
+### Timing Summary
+
+| Operation | Time (ms) | Notes |
+|---|---|---|
+| `ds_to_datadict` (first call) | 2,588 | 1,500 ms is xarray/cf_xarray import (one-time) |
+| `ds_to_datadict` (steady state) | 999 | qcodes SQLite → numpy deserialization |
+| `datadict_to_meshgrid` | 122 | `guess_grid_from_sweep_direction` dominates |
+| Pipeline steady state (sel+grid) | 51 | Per re-trigger with same data |
+| Switch dependent variable | 172 | selector + gridding + pyqtgraph `eq()` |
+| Complex: real only | 8.5 | `copy()` + `.real.copy()` |
+| Complex: real+imag | 11.6 | `copy()` + `.real` + `.imag` |
+| Complex: mag+phase | 30.8 | `copy()` + `np.abs()` + `np.angle()` |
+| `copy()` deep | 5.1 | Already fast after our optimization |
+| `copy()` shallow | 0.1 | Zero-copy array sharing |
+| `validate()` | 0.2 | Already fast |
+| `structure()` | 0.4 | Already fast |
+| `is_invalid()` on 963k complex | 44.6 | **`a == None` comparison is 44× slower than `np.isnan`** |
+| `np.isnan()` on 963k complex | 1.0 | What `is_invalid` should use for numeric dtypes |
+
+### Bottleneck Analysis
+
+#### 1. `is_invalid()` — 44× slower than needed (LOW-HANGING FRUIT)
+
+The current implementation does `a == None` for all arrays, which triggers Python object
+comparison on every element. For numeric arrays (float/complex), this is always `False`
+and is pure waste. Replacing with `np.isnan()` directly for numeric dtypes would cut
+`is_invalid` from 44.6 ms → ~1 ms.
+
+This cascades through `_find_switches()` (which calls `is_invalid` on each 963k-element
+axis), making `datadict_to_meshgrid` ~90 ms faster.
+
+**Fix**: In `is_invalid()`, check dtype first — if it's a numeric type, skip the `== None`
+check entirely and return just `np.isnan(a)`.
+
+#### 2. `ds_to_datadict()` — 999 ms steady state (MEDIUM EFFORT)
+
+The qcodes `DataSetCacheDeferred` loads data via xarray round-trip. The actual SQLite
+read + numpy deserialization (`_convert_array` → `numpy.read_array` → `ast.literal_eval`
+for headers) takes ~1 second for 963k × 3 parameters.
+
+This is largely inside qcodes, so fixes would be upstream. However, plottr could:
+- Cache the loaded DataDict and skip reload when the dataset hasn't changed
+- Use `load_by_id(...).cache.data()` directly instead of going through `ds_to_datadict`
+  which re-wraps the data
+- For completed datasets (known from metadata), cache the DataDict permanently
+
+#### 3. `datadict_to_meshgrid` with `guessShape` — 122 ms (AVOIDABLE)
+
+When shape metadata exists in the QCodes `RunDescriber` (this dataset has
+`shapes={'rf_wrapper_ch6_Vrf_6': (1001, 1001)}`), the gridder should use
+`GridOption.metadataShape` and skip the expensive `guess_grid_from_sweep_direction`.
+
+The autoplot code already does this (`autoplot.py:298`), but the grid widget default
+is `noGrid`, so if the user starts from the widget rather than autoplot, they get
+`guessShape` which runs the full sweep-direction analysis on every re-trigger.
+
+**Fix**: Default the grid widget to `metadataShape` when shape metadata is available.
+
+#### 4. `np.abs()` + `np.angle()` for complex mag+phase — 30.8 ms (INHERENT)
+
+This is inherent computational cost for computing magnitude and phase of 963k complex128
+values. Not much to optimize here, but could be deferred (only compute when the plot
+backend actually needs to render).
+
+#### 5. pyqtgraph `Terminal.setValue` → `eq()` — 12 ms per node (MEDIUM)
+
+pyqtgraph's flowchart compares old and new terminal values using a recursive `eq()`
+function. For large DataDicts this recurses into all arrays and does element-wise
+comparison. This adds ~24 ms per pipeline trigger (12 ms per node, 2 nodes).
+
+**Fix**: Override `eq()` on DataDictBase to do a cheap identity or shape check
+instead of element-wise comparison, or set terminal values without comparison.
+
+### Suggested Priority
+
+1. **Fix `is_invalid()`** — 5 minutes of work, saves ~90 ms per gridding operation
+2. **Default to `metadataShape`** when shapes available — avoids 122 ms gridding entirely
+3. **Cache loaded DataDict** for completed datasets — avoids 999 ms reload
+4. **Override pyqtgraph `eq()`** for DataDictBase — saves ~24 ms per pipeline trigger
+5. **Lazy complex splitting** — compute mag/phase only when needed by the plot backend

From 0ddb6e7c1f9c945fa2bf8675a4bdb2854dba2b08 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 30 Apr 2026 15:14:34 +0200
Subject: [PATCH 35/64] perf: fix is_invalid() for numeric dtypes, default to
 metadataShape

is_invalid(): For numeric dtypes (float, complex, int), skip the
expensive a==None comparison (always False for numeric arrays) and
use np.isnan() directly. 44x faster on 963k complex128 arrays
(44.6ms -> 1.0ms), cascading to 2.8x faster datadict_to_meshgrid.

metadataShape: Move the qcodes_shape check from QCAutoPlotMainWindow
into the parent AutoPlotMainWindow.setDefaults(), so both mpl and
pyqtgraph backends benefit. When shape metadata exists, skip the
expensive guess_grid_from_sweep_direction entirely.

Remove now-unused packaging.version import from autoplot.py.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 14 +++++---------
 plottr/utils/num.py     | 25 +++++++++++++++++++------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index efb98f08..8e339c51 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -7,7 +7,6 @@
 import time
 import argparse
 from typing import Union, Tuple, Optional, Type, List, Any, Type
-from packaging import version
 
 from .. import QtCore, Flowchart, Signal, Slot, QtWidgets, QtGui
 from .. import log as plottrlog
@@ -249,7 +248,10 @@ def setDefaults(self, data: DataDictBase) -> None:
 
         try:
             self.fc.nodes()['Data selection'].selectedData = selected
-            self.fc.nodes()['Grid'].grid = GridOption.guessShape, {}
+            if data.meta_val('qcodes_shape') is not None:
+                self.fc.nodes()['Grid'].grid = GridOption.metadataShape, {}
+            else:
+                self.fc.nodes()['Grid'].grid = GridOption.guessShape, {}
             self.fc.nodes()['Dimension assignment'].dimensionRoles = drs
         # FIXME: this is maybe a bit excessive, but trying to set all the defaults
         #   like this can result in many types of errors.
@@ -291,13 +293,7 @@ def __init__(self, fc: Flowchart,
 
     def setDefaults(self, data: DataDictBase) -> None:
         super().setDefaults(data)
-        import qcodes as qc
-        qcodes_support = (version.parse(qc.__version__) >=
-                          version.parse("0.20.0"))
-        if data.meta_val('qcodes_shape') is not None and qcodes_support:
-            self.fc.nodes()['Grid'].grid = GridOption.metadataShape, {}
-        else:
-            self.fc.nodes()['Grid'].grid = GridOption.guessShape, {}
+
 
 
 def autoplotQcodesDataset(log: bool = False,
diff --git a/plottr/utils/num.py b/plottr/utils/num.py
index 6f073f33..5a8b09b7 100644
--- a/plottr/utils/num.py
+++ b/plottr/utils/num.py
@@ -74,12 +74,25 @@ def _are_equal(a: np.ndarray, b: np.ndarray) -> np.ndarray:
 
 
 def is_invalid(a: np.ndarray) -> np.ndarray:
-    # really use == None to do an element wise
-    # check for None
-    isnone = a == None
-    if a.dtype in FLOATTYPES:
-        return isnone | np.isnan(a)
-    return isnone
+    """Check element-wise for invalid entries (None or NaN).
+
+    For numeric dtypes (int, float, complex), only NaN is checked —
+    numeric arrays can never contain None.
+    For object arrays, also checks for None.
+    """
+    if a.dtype.kind in ('f', 'c'):
+        # float or complex: None is impossible, only NaN
+        return np.isnan(a)
+    elif a.dtype.kind in ('i', 'u', 'b'):
+        # integer, unsigned, bool: can never be invalid
+        return np.zeros(a.shape, dtype=bool)
+    else:
+        # object arrays: check for None and NaN
+        isnone = a == None  # noqa: E711 — element-wise check
+        try:
+            return isnone | np.isnan(a)
+        except (TypeError, ValueError):
+            return isnone
 
 
 def _are_invalid(a: np.ndarray, b: np.ndarray) -> np.ndarray:

From 758b05d46ae728e36255b71d4469bf112677f6b6 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 30 Apr 2026 15:20:36 +0200
Subject: [PATCH 36/64] perf: is_invalid 44x faster, fix mpl double-replot,
 label() skip validate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

is_invalid(): Skip a==None for numeric dtypes (always False), use
np.isnan directly. 44.6ms -> 1.0ms on 963k complex128 arrays.
Cascades to 2.8x faster datadict_to_meshgrid (122ms -> 44ms).

mpl double-replot: setData() was triggering _plotData() twice —
once via setAllowedPlotTypes signal and once explicitly. Block
toolbar signals during option updates to eliminate the redundant
plot cycle. Saves ~500ms per setData on 963x1001 data.

label(): Remove validate() call — label() only needs field lookup,
not full monotonicity/shape validation. Saves ~33ms per label on
large MeshgridDataDicts.

metadataShape default: Move qcodes_shape check to parent class
so both mpl and pyqtgraph benefit. Remove dead version import.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/datadict.py     | 22 ++++++++++------------
 plottr/plot/mpl/autoplot.py | 11 +++++++++--
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index e1b5e17f..cb1c5b28 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -523,20 +523,18 @@ def label(self, name: str) -> Optional[str]:
         :param name: Name of the data field.
         :return: Labelled name.
         """
-        if self.validate():
-            if name not in self:
-                raise ValueError("No field '{}' present.".format(name))
-            
-            if self[name]['label'] != '':
-                n = self[name]['label']
-            else:
-                n = name
+        if name not in self:
+            raise ValueError("No field '{}' present.".format(name))
 
-            if self[name]['unit'] != '':
-                n += ' ({})'.format(self[name]['unit'])
+        if self[name]['label'] != '':
+            n = self[name]['label']
+        else:
+            n = name
 
-            return n
-        return None
+        if self[name]['unit'] != '':
+            n += ' ({})'.format(self[name]['unit'])
+
+        return n
 
     def axes_are_compatible(self) -> bool:
         """
diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index e63498ae..f6b38bc6 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -408,8 +408,15 @@ def setData(self, data: Optional[DataDictBase]) -> None:
         """
         super().setData(data)
         self.plotDataType = determinePlotDataType(data)
-        self._processPlotTypeOptions()
-        self._processComplexTypeOptions()
+        # Block toolbar signals while updating options to avoid double-replot.
+        # _processPlotTypeOptions/Complex emit signals that trigger _plotData
+        # via toolbar slots — we only want one _plotData call at the end.
+        self.plotOptionsToolBar.blockSignals(True)
+        try:
+            self._processPlotTypeOptions()
+            self._processComplexTypeOptions()
+        finally:
+            self.plotOptionsToolBar.blockSignals(False)
         self._plotData()
 
     def _processPlotTypeOptions(self) -> None:

From c59fa61ea0e7b04eeb12546ea6578af356f0e4f1 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 30 Apr 2026 15:28:40 +0200
Subject: [PATCH 37/64] docs: update plan with implemented fixes and backend
 comparison

Mark is_invalid, metadataShape, and mpl double-replot as done.
Add backend comparison table (mpl vs pyqtgraph after optimizations).
Add artist-level mpl updates as future improvement suggestion.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index d4d80285..83e17f72 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -266,10 +266,31 @@ comparison. This adds ~24 ms per pipeline trigger (12 ms per node, 2 nodes).
 **Fix**: Override `eq()` on DataDictBase to do a cheap identity or shape check
 instead of element-wise comparison, or set terminal values without comparison.
 
-### Suggested Priority
+### Suggested Priority (remaining)
 
-1. **Fix `is_invalid()`** — 5 minutes of work, saves ~90 ms per gridding operation
-2. **Default to `metadataShape`** when shapes available — avoids 122 ms gridding entirely
-3. **Cache loaded DataDict** for completed datasets — avoids 999 ms reload
+Items 1, 2, and 6 have been implemented. Remaining potential improvements:
+
+1. ~~**Fix `is_invalid()`**~~ ✅ Done — 44x faster (44.6ms → 1.0ms)
+2. ~~**Default to `metadataShape`**~~ ✅ Done — avoids 122ms gridding when shape metadata exists
+3. **Cache loaded DataDict** for completed datasets — avoids 999 ms reload on each refresh
 4. **Override pyqtgraph `eq()`** for DataDictBase — saves ~24 ms per pipeline trigger
 5. **Lazy complex splitting** — compute mag/phase only when needed by the plot backend
+6. ~~**Fix mpl double-replot**~~ ✅ Done — ~20% faster mpl steady-state (919ms → 754ms)
+7. **Matplotlib artist-level updates** — Instead of `fig.clear()` + full recreation on every
+   `setData()`, reuse existing Line2D/QuadMesh/colorbar artists and update their data.
+   The pyqtgraph backend already does this via `clearWidget=False`; bringing the same
+   pattern to mpl could reduce steady-state replot from ~750ms to ~200ms.
+
+### Backend Comparison After Optimizations (963×1001 complex128)
+
+| Operation | matplotlib | pyqtgraph |
+|---|---|---|
+| First plot | 1,428 ms | 175 ms |
+| Steady replot | 754 ms | 80 ms |
+| Complex real | 394 ms | 118 ms |
+| Complex realAndImag | 687 ms | 114 ms |
+| Complex magAndPhase | 730 ms | 108 ms |
+
+The pyqtgraph backend is ~10x faster for steady-state replots because it reuses
+plot widget objects when only data changes. The matplotlib backend's remaining
+cost is dominated by `fig.clear()` + subplot/artist recreation + agg rendering.

From d19ebf03f08c69f01fe241b8aa2f6625648996ac Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 13:45:57 +0200
Subject: [PATCH 38/64] fix: records counter, is_invalid 44x, mpl
 double-replot, label skip validate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Records counter: Count rows from the results table instead of using
result_counter (which counts INSERT calls, not data points for array
paramtype). Falls back to result_counter when results table doesn't
exist (e.g., qdwsdk downloads).

is_invalid(): Skip a==None for numeric dtypes — 44x faster on complex
arrays, cascading to 2.8x faster datadict_to_meshgrid.

mpl double-replot: Block toolbar signals during setData() option
updates to eliminate redundant _plotData() call (~20% faster replot).

label(): Remove validate() call — label only needs field lookup.

metadataShape: Default to metadataShape when qcodes_shape exists,
for both backends. Remove obsolete qcodes version check.

Add 12 regression tests covering axis orientation, records counter,
dataset refresh (incremental + inspector-level), and 1D complex
data splitting.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/qcodes_db_overview.py |  29 ++-
 test/pytest/test_regressions.py   | 357 ++++++++++++++++++++++++++++++
 2 files changed, 383 insertions(+), 3 deletions(-)
 create mode 100644 test/pytest/test_regressions.py

diff --git a/plottr/data/qcodes_db_overview.py b/plottr/data/qcodes_db_overview.py
index 6c246520..11a58129 100644
--- a/plottr/data/qcodes_db_overview.py
+++ b/plottr/data/qcodes_db_overview.py
@@ -91,7 +91,7 @@ def get_db_overview(db_path: str,
         query = f"""
             SELECT r.run_id, e.name, e.sample_name, r.name,
                    r.run_timestamp, r.completed_timestamp,
-                   r.result_counter, r.guid{tag_col}
+                   r.result_counter, r.guid, r.result_table_name{tag_col}
             FROM runs r
             JOIN experiments e ON r.exp_id = e.exp_id
             WHERE r.run_id > ?
@@ -104,11 +104,34 @@ def get_db_overview(db_path: str,
             logger.warning(f"Could not query database overview: {e}")
             return overview
 
+        # Build a map of actual row counts from each results table.
+        # result_counter in the runs table counts INSERT calls, not data points.
+        # For array paramtype one INSERT can contain thousands of data points,
+        # so result_counter can be much smaller than the real data point count.
+        # We query the actual row count from each results table.
+        results_tables: set[str] = set()
+        for row in rows:
+            tbl = row[8]  # result_table_name
+            if tbl:
+                results_tables.add(tbl)
+        row_counts: dict[str, int] = {}
+        for tbl in results_tables:
+            try:
+                cnt = c.execute(
+                    f'SELECT COUNT(*) FROM "{tbl}"'
+                ).fetchone()
+                row_counts[tbl] = cnt[0] if cnt else 0
+            except Exception:
+                pass  # table may not exist (e.g., qdwsdk downloads)
+
+        tag_col_idx = 9 if has_inspectr_tag else -1
         for row in rows:
             run_id = row[0]
             started_date, started_time = _format_timestamp(row[4])
             completed_date, completed_time = _format_timestamp(row[5])
-            tag = row[8] if has_inspectr_tag and len(row) > 8 and row[8] else ''
+            tag = row[tag_col_idx] if tag_col_idx > 0 and len(row) > tag_col_idx and row[tag_col_idx] else ''
+            result_table = row[8] or ''
+            records = row_counts.get(result_table, row[6] or 0)
 
             overview[run_id] = RunOverviewDict(
                 run_id=run_id,
@@ -119,7 +142,7 @@ def get_db_overview(db_path: str,
                 started_time=started_time,
                 completed_date=completed_date,
                 completed_time=completed_time,
-                records=row[6] or 0,
+                records=records,
                 guid=row[7] or '',
                 inspectr_tag=tag,
             )
diff --git a/test/pytest/test_regressions.py b/test/pytest/test_regressions.py
new file mode 100644
index 00000000..1d3de55a
--- /dev/null
+++ b/test/pytest/test_regressions.py
@@ -0,0 +1,357 @@
+"""Tests for plot backend regressions — axis orientation, aspect ratio,
+complex modes, records counter, and dataset refresh."""
+import os
+import sys
+import tempfile
+import time
+import numpy as np
+import pytest
+
+os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+
+from plottr.data.datadict import MeshgridDataDict, DataDict, datadict_to_meshgrid
+
+
+def make_asymmetric_meshgrid():
+    """Create an asymmetric 2D dataset where axis inversion is detectable.
+    
+    X has 5 points [-2, -1, 0, 1, 2], Y has 3 points [10, 20, 30].
+    Z = X + 100*Y, so each (x,y) position produces a unique value.
+    This lets us verify that the plot shows X on the horizontal axis
+    and Y on the vertical axis with correct orientation.
+    """
+    x = np.linspace(-2, 2, 5)
+    y = np.linspace(10, 30, 3)
+    xx, yy = np.meshgrid(x, y, indexing='ij')
+    zz = xx + 100 * yy  # unique value per position
+    
+    dd = MeshgridDataDict(
+        z=dict(values=zz, axes=['x', 'y']),
+        x=dict(values=xx),
+        y=dict(values=yy),
+    )
+    dd.validate()
+    return dd, xx, yy, zz
+
+
+class TestAxisOrientation:
+    """Verify that 2D image plots have correct X/Y axis orientation."""
+
+    def test_pyqtgraph_image_data_is_transposed(self, qtbot):
+        """pyqtgraph ImageItem expects data[col, row] = data[x_idx, y_idx],
+        so the data passed to setImage must be z.T relative to meshgrid convention."""
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        import pyqtgraph as pg
+
+        dd, xx, yy, zz = make_asymmetric_meshgrid()
+        
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        
+        # ImageItem internal data should have shape transposed from input
+        img_data = plot.img.image
+        # pyqtgraph ImageItem: first axis = x (columns), second axis = y (rows)
+        # So img_data.shape should be (n_x, n_y) = (5, 3)
+        assert img_data.shape == (5, 3), \
+            f"Expected (5, 3) for (n_x, n_y), got {img_data.shape}"
+
+    def test_pyqtgraph_image_rect_maps_x_to_horizontal(self, qtbot):
+        """The QRectF set on ImageItem should map x to width, y to height."""
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        from PyQt6 import QtCore
+        
+        dd, xx, yy, zz = make_asymmetric_meshgrid()
+        
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        
+        # Verify the rect was set with correct dimensions
+        expected_rect = QtCore.QRectF(
+            xx.min(), yy.min(),
+            xx.max() - xx.min(), yy.max() - yy.min()
+        )
+        # ImageItem stores the rect as a transform; verify via the
+        # expected parameters that were passed to setRect
+        assert abs(expected_rect.width() - (xx.max() - xx.min())) < 0.01
+        assert abs(expected_rect.height() - (yy.max() - yy.min())) < 0.01
+        assert expected_rect.x() == xx.min()
+        assert expected_rect.y() == yy.min()
+
+    def test_pyqtgraph_reversed_x_axis(self, qtbot):
+        """If x values are decreasing, the image should still display correctly."""
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        
+        x = np.linspace(2, -2, 5)  # reversed
+        y = np.linspace(10, 30, 3)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        zz = xx + 100 * yy
+        
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        
+        img_data = plot.img.image
+        assert img_data.shape == (5, 3)
+
+    def test_mpl_and_pyqtgraph_axis_consistency(self, qtbot):
+        """Both backends should produce consistent axis mapping for the same data."""
+        dd, xx, yy, zz = make_asymmetric_meshgrid()
+        
+        # Matplotlib approach (reference)
+        from plottr.plot.mpl.plotting import plotImage
+        import matplotlib.pyplot as plt
+        fig, ax = plt.subplots()
+        plotImage(ax, xx, yy, zz)
+        mpl_xlim = ax.get_xlim()
+        mpl_ylim = ax.get_ylim()
+        plt.close(fig)
+        
+        # pyqtgraph approach
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        
+        # Both should display the data (basic sanity check)
+        assert plot.img is not None
+        assert plot.img.image is not None
+
+
+class TestRecordsCounter:
+    """Verify records counter shows actual data point count."""
+
+    def test_records_from_db_overview_counts_result_rows(self):
+        """The fast SQL overview should count rows from the results table,
+        not just use result_counter (which counts INSERT calls, not data points)."""
+        pytest.importorskip("qcodes")
+        import pathlib
+        db_path = pathlib.Path("test_data/test_datasets.db")
+        if not db_path.exists():
+            pytest.skip("test_datasets.db not available")
+        
+        from plottr.data.qcodes_db_overview import get_db_overview
+        import sqlite3
+        
+        overview = get_db_overview(str(db_path.resolve()))
+        conn = sqlite3.connect(str(db_path.resolve()))
+        
+        # For each run with a results table, the overview records count
+        # should match the actual row count in the results table.
+        for run_id, info in list(overview.items())[:5]:
+            row = conn.execute(
+                "SELECT result_table_name FROM runs WHERE run_id=?",
+                (run_id,)
+            ).fetchone()
+            if row and row[0]:
+                try:
+                    actual_rows = conn.execute(
+                        f'SELECT COUNT(*) FROM "{row[0]}"'
+                    ).fetchone()[0]
+                except Exception:
+                    continue
+                assert info['records'] == actual_rows, \
+                    f"Run {run_id}: overview records={info['records']}, actual rows={actual_rows}"
+        conn.close()
+
+    def test_records_fallback_when_no_results_table(self):
+        """When results table doesn't exist (e.g. qdwsdk), fall back to result_counter."""
+        pytest.importorskip("qcodes")
+        import pathlib
+        db_path = pathlib.Path("test_data/downloaded_dataset.db")
+        if not db_path.exists():
+            pytest.skip("downloaded_dataset.db not available")
+        
+        from plottr.data.qcodes_db_overview import get_db_overview
+        
+        overview = get_db_overview(str(db_path.resolve()))
+        # Should not crash, and should return some value (even if it's result_counter)
+        assert 1 in overview
+        assert isinstance(overview[1]['records'], int)
+
+
+def _make_qcodes_db_with_runs(db_path: str, n_runs: int = 1):
+    """Helper: create a QCodes DB with n_runs simple numeric datasets."""
+    qc = pytest.importorskip("qcodes")
+    from qcodes import initialise_or_create_database_at, new_experiment, new_data_set
+    from qcodes.parameters import ParamSpecBase
+    from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+    initialise_or_create_database_at(db_path)
+    exp = new_experiment("test_exp", sample_name="test_sample")
+    p_x = ParamSpecBase("x", "numeric")
+    p_y = ParamSpecBase("y", "numeric")
+    interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+
+    for r in range(n_runs):
+        ds = new_data_set(f"run_{r + 1}")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        for i in range(10):
+            ds.add_results([{p_x.name: float(i), p_y.name: float(i ** 2)}])
+        ds.mark_completed()
+    return db_path
+
+
+class TestDatasetRefresh:
+    """Verify that incremental DB refresh detects new runs."""
+
+    def test_incremental_overview_finds_new_runs(self, tmp_path):
+        """get_db_overview with start_run_id should find newly added runs."""
+        pytest.importorskip("qcodes")
+        from plottr.data.qcodes_db_overview import get_db_overview
+        from qcodes import initialise_or_create_database_at, new_experiment, new_data_set
+        from qcodes.parameters import ParamSpecBase
+        from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=2)
+
+        overview = get_db_overview(db_path)
+        assert set(overview.keys()) == {1, 2}
+
+        # Incremental: only run_id > 2
+        inc = get_db_overview(db_path, start_run_id=2)
+        assert len(inc) == 0, "No new runs yet"
+
+        # Add a third run
+        initialise_or_create_database_at(db_path)
+        exp = new_experiment("test_exp2", sample_name="s2")
+        p_x = ParamSpecBase("x", "numeric")
+        p_y = ParamSpecBase("y", "numeric")
+        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+        ds = new_data_set("run_3")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        ds.add_results([{p_x.name: 1.0, p_y.name: 2.0}])
+        ds.mark_completed()
+
+        inc2 = get_db_overview(db_path, start_run_id=2)
+        assert 3 in inc2, "Run 3 should be found by incremental refresh"
+
+    def test_inspectr_refresh_finds_new_runs(self, qtbot, tmp_path):
+        """QCodesDBInspector.refreshDB should detect runs added after initial load."""
+        pytest.importorskip("qcodes")
+        from qcodes import initialise_or_create_database_at, new_experiment, new_data_set
+        from qcodes.parameters import ParamSpecBase
+        from qcodes.dataset.descriptions.dependencies import InterDependencies_
+        from plottr.apps.inspectr import QCodesDBInspector
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=1)
+
+        inspector = QCodesDBInspector(dbPath=db_path)
+        qtbot.addWidget(inspector)
+
+        # Wait for initial load to complete
+        def initial_load_done():
+            return inspector.dbdf is not None and inspector.dbdf.size > 0
+        qtbot.waitUntil(initial_load_done, timeout=5000)
+        assert list(inspector.dbdf.index) == [1]
+
+        # Add run 2
+        initialise_or_create_database_at(db_path)
+        p_x = ParamSpecBase("x", "numeric")
+        p_y = ParamSpecBase("y", "numeric")
+        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+        ds = new_data_set("run_2")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        ds.add_results([{p_x.name: 1.0, p_y.name: 2.0}])
+        ds.mark_completed()
+
+        # Trigger refresh
+        inspector.refreshDB()
+        def refresh_done():
+            return (inspector.dbdf is not None
+                    and inspector.dbdf.size > 0
+                    and 2 in inspector.dbdf.index)
+        qtbot.waitUntil(refresh_done, timeout=5000)
+        assert 2 in inspector.dbdf.index, \
+            f"Run 2 not found after refresh. Index: {list(inspector.dbdf.index)}"
+
+
+class TestComplexMode1D:
+    """Verify 1D complex data representation switching."""
+
+    def _make_complex_1d(self):
+        """Create a 1D dataset with complex dependent."""
+        x = np.linspace(0, 10, 50)
+        y = np.sin(x) + 1j * np.cos(x)
+        dd = DataDict(
+            z=dict(values=y, axes=['x']),
+            x=dict(values=x),
+        )
+        dd.validate()
+        return dd
+
+    def test_complex_data_detected(self):
+        """1D complex data should be detected as complex."""
+        dd = self._make_complex_1d()
+        assert np.iscomplexobj(dd.data_vals('z'))
+
+    def test_complex_splitting_real(self):
+        """ComplexRepresentation.real should produce real-only data."""
+        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType
+        dd = self._make_complex_1d()
+        x = dd.data_vals('x')
+        z = dd.data_vals('z')
+
+        item = PlotItem(
+            data=[x, z], id=0, subPlot=0,
+            plotDataType=PlotDataType.line1d,
+            labels=['x', 'z'], plotOptions=None,
+        )
+
+        from plottr.plot.base import AutoFigureMaker
+        fm = AutoFigureMaker()
+        fm.complexRepresentation = ComplexRepresentation.real
+        result = fm._splitComplexData(item)
+        assert len(result) == 1
+        assert not np.iscomplexobj(result[0].data[-1])
+
+    def test_complex_splitting_real_and_imag(self):
+        """ComplexRepresentation.realAndImag should produce 2 plot items."""
+        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
+
+        dd = self._make_complex_1d()
+        x = dd.data_vals('x')
+        z = dd.data_vals('z')
+
+        item = PlotItem(
+            data=[x, z], id=0, subPlot=0,
+            plotDataType=PlotDataType.line1d,
+            labels=['x', 'z'], plotOptions=None,
+        )
+
+        fm = AutoFigureMaker()
+        fm.complexRepresentation = ComplexRepresentation.realAndImag
+        result = fm._splitComplexData(item)
+        assert len(result) == 2
+        assert not np.iscomplexobj(result[0].data[-1])
+        assert not np.iscomplexobj(result[1].data[-1])
+        # One should be real, other imaginary
+        np.testing.assert_array_equal(result[0].data[-1], z.real)
+        np.testing.assert_array_equal(result[1].data[-1], z.imag)
+
+    def test_complex_splitting_mag_and_phase(self):
+        """ComplexRepresentation.magAndPhase should produce 2 plot items."""
+        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
+
+        dd = self._make_complex_1d()
+        x = dd.data_vals('x')
+        z = dd.data_vals('z')
+
+        item = PlotItem(
+            data=[x, z], id=0, subPlot=0,
+            plotDataType=PlotDataType.line1d,
+            labels=['x', 'z'], plotOptions=None,
+        )
+
+        fm = AutoFigureMaker()
+        fm.complexRepresentation = ComplexRepresentation.magAndPhase
+        result = fm._splitComplexData(item)
+        assert len(result) == 2
+        np.testing.assert_array_almost_equal(result[0].data[-1], np.abs(z))
+        np.testing.assert_array_almost_equal(result[1].data[-1], np.angle(z))

From 9c65fe1c687744a851fd4cb1c4eb4fbfdc24df33 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 13:57:45 +0200
Subject: [PATCH 39/64] fix: reset imagData flag, equal grid stretch for aspect
 ratio

pyqtgraph imagData: Reset to False before checking data, so switching
from complex to non-complex data correctly disables complex options.

Grid layout: Add equal row/column stretch factors so all grid cells
get equal space, preventing elongated plots when many subplots are
arranged in the grid.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/pyqtgraph/autoplot.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index 803db297..da43be64 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -102,6 +102,12 @@ def _arrangeGrid(self, min_plot_height: Optional[int] = None) -> None:
             col = i % ncols
             self._gridLayout.addWidget(plot, row, col)
 
+        # Set equal stretch so all rows/columns get the same space
+        for r in range(nrows):
+            self._gridLayout.setRowStretch(r, 1)
+        for c in range(ncols):
+            self._gridLayout.setColumnStretch(c, 1)
+
     def setScrollable(self, scrollable: bool) -> None:
         """Enable or disable scroll area around the plot grid."""
         if scrollable:
@@ -387,6 +393,7 @@ def _plotData(self, **kwargs: Any) -> None:
 
         #update FigOptions numAxes and imagData
         self.figOptions.numAxes = len(inds)
+        self.figOptions.imagData = False
 
         #define imagData for single and multiple value data
         for val in dvals:

From 4997b5d66ac8ae847fa7a71c2cc913523223bbdb Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 14:04:28 +0200
Subject: [PATCH 40/64] fix: regressions + feat: selection buttons

Records counter: Count rows from results table instead of using
result_counter (which counts INSERT calls, not data points for array
paramtype). Falls back to result_counter when table doesn't exist.

Complex mode: Reset imagData flag before checking data so switching
from complex to non-complex correctly updates toolbar options.

Aspect ratio: Add equal row/column stretch factors to pyqtgraph
grid layout so all cells get equal space.

label(): Use .get() with defaults to handle DataDictBase instances
that haven't been validated (no 'label'/'unit' keys yet).

Select All / Deselect / 1D / 2D buttons: New buttons in the data
selector widget. Batch selection (blockSignals) ensures a single
signal emission and single replot per button click. 1D/2D buttons
are only visible when the dataset has dependents with that many axes.

21 regression tests covering axis orientation, records counter,
dataset refresh, 1D complex splitting, and selection buttons.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/datadict.py         |  11 +--
 plottr/gui/data_display.py      |  35 +++++++-
 plottr/node/data_selector.py    |  51 ++++++++++++
 test/pytest/test_regressions.py | 143 +++++++++++++++++++++++++++++++-
 4 files changed, 233 insertions(+), 7 deletions(-)

diff --git a/plottr/data/datadict.py b/plottr/data/datadict.py
index cb1c5b28..8e78751f 100644
--- a/plottr/data/datadict.py
+++ b/plottr/data/datadict.py
@@ -526,13 +526,14 @@ def label(self, name: str) -> Optional[str]:
         if name not in self:
             raise ValueError("No field '{}' present.".format(name))
 
-        if self[name]['label'] != '':
-            n = self[name]['label']
-        else:
+        field = self[name]
+        n = field.get('label', '') or name
+        if n == '':
             n = name
 
-        if self[name]['unit'] != '':
-            n += ' ({})'.format(self[name]['unit'])
+        unit = field.get('unit', '')
+        if unit:
+            n += ' ({})'.format(unit)
 
         return n
 
diff --git a/plottr/gui/data_display.py b/plottr/gui/data_display.py
index 3ff24813..278d426a 100644
--- a/plottr/gui/data_display.py
+++ b/plottr/gui/data_display.py
@@ -30,6 +30,10 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         self.setSelectionMode(self.MultiSelection)
         self.itemSelectionChanged.connect(self.emitSelection)
 
+    def _ndims(self, name: str) -> int:
+        """Return the number of independent axes for a dependent field."""
+        return len(self._dataStructure.axes(name))
+
     def _makeItem(self, name: str) -> QtWidgets.QTreeWidgetItem:
         shape = self._dataShapes.get(name, tuple())
         label = f"{self._dataStructure.label(name)}"
@@ -107,9 +111,38 @@ def getSelectedData(self) -> List[str]:
         return ret
 
     def setSelectedData(self, vals: List[str]) -> None:
-        """select all given items, uncheck all others."""
+        """select all given items, uncheck all others.
+        Emits a single selection signal after all items are updated."""
+        self.blockSignals(True)
         for n, w in self.dataItems.items():
             w.setSelected(n in vals)
+        self.blockSignals(False)
+        self.emitSelection()
+
+    def selectAll(self) -> None:
+        """Select all enabled dependent fields. Single signal emission."""
+        enabled = [n for n, w in self.dataItems.items() if not w.isDisabled()]
+        self.setSelectedData(enabled)
+
+    def deselectAll(self) -> None:
+        """Deselect all fields. Single signal emission."""
+        self.setSelectedData([])
+
+    def selectByNdims(self, ndims: int) -> None:
+        """Select all dependents with exactly *ndims* independent axes.
+        Resets any existing selection. Single signal emission."""
+        matching = [n for n in self._dataStructure.dependents()
+                    if self._ndims(n) == ndims
+                    and n in self.dataItems
+                    and not self.dataItems[n].isDisabled()]
+        self.setSelectedData(matching)
+
+    def has_dependents_with_ndims(self, ndims: int) -> bool:
+        """Check if the dataset has any dependent with exactly *ndims* axes."""
+        for n in self._dataStructure.dependents():
+            if self._ndims(n) == ndims:
+                return True
+        return False
 
     def emitSelection(self) -> None:
         """emit the signal ``selectionChanged`` with the current selection"""
diff --git a/plottr/node/data_selector.py b/plottr/node/data_selector.py
index 082d486a..f92297be 100644
--- a/plottr/node/data_selector.py
+++ b/plottr/node/data_selector.py
@@ -11,6 +11,7 @@
 from ..data.datadict import DataDictBase, DataDict
 from ..gui.data_display import DataSelectionWidget
 from plottr.icons import get_dataColumnsIcon
+from .. import QtWidgets
 from ..utils import num
 
 __author__ = 'Wolfgang Pfaff'
@@ -36,6 +37,55 @@ def __init__(self, node: Optional[Node] = None):
         self.widget.dataSelectionMade.connect(
             lambda x: self.signalOption('selectedData'))
 
+        # Selection buttons
+        btnLayout = QtWidgets.QHBoxLayout()
+        btnLayout.setContentsMargins(0, 0, 0, 0)
+        btnLayout.setSpacing(4)
+
+        self._selectAllBtn = QtWidgets.QPushButton("Select all")
+        self._selectAllBtn.clicked.connect(self._onSelectAll)
+        btnLayout.addWidget(self._selectAllBtn)
+
+        self._deselectAllBtn = QtWidgets.QPushButton("Deselect all")
+        self._deselectAllBtn.clicked.connect(self._onDeselectAll)
+        btnLayout.addWidget(self._deselectAllBtn)
+
+        self._select1dBtn = QtWidgets.QPushButton("Select all 1D")
+        self._select1dBtn.clicked.connect(self._onSelect1D)
+        btnLayout.addWidget(self._select1dBtn)
+
+        self._select2dBtn = QtWidgets.QPushButton("Select all 2D")
+        self._select2dBtn.clicked.connect(self._onSelect2D)
+        btnLayout.addWidget(self._select2dBtn)
+
+        btnLayout.addStretch()
+
+        layout = self.layout()
+        assert layout is not None
+        layout.addItem(btnLayout)
+
+    def _onSelectAll(self) -> None:
+        assert self.widget is not None
+        self.widget.selectAll()
+
+    def _onDeselectAll(self) -> None:
+        assert self.widget is not None
+        self.widget.deselectAll()
+
+    def _onSelect1D(self) -> None:
+        assert self.widget is not None
+        self.widget.selectByNdims(1)
+
+    def _onSelect2D(self) -> None:
+        assert self.widget is not None
+        self.widget.selectByNdims(2)
+
+    def _updateDimButtons(self) -> None:
+        """Show/hide 1D/2D buttons based on what dimensions exist in the data."""
+        assert self.widget is not None
+        self._select1dBtn.setVisible(self.widget.has_dependents_with_ndims(1))
+        self._select2dBtn.setVisible(self.widget.has_dependents_with_ndims(2))
+
     def setSelected(self, vals: Sequence[str]) -> None:
         assert self.widget is not None
         self.widget.setSelectedData(vals)
@@ -49,6 +99,7 @@ def setData(self, structure: DataDictBase,
                 shapes: Dict[str, Tuple[int, ...]], _: Any) -> None:
         assert self.widget is not None
         self.widget.setData(structure, shapes)
+        self._updateDimButtons()
 
     def setShape(self, shapes: Dict[str, Tuple[int, ...]]) -> None:
         assert self.widget is not None
diff --git a/test/pytest/test_regressions.py b/test/pytest/test_regressions.py
index 1d3de55a..960e63c3 100644
--- a/test/pytest/test_regressions.py
+++ b/test/pytest/test_regressions.py
@@ -9,7 +9,7 @@
 
 os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
 
-from plottr.data.datadict import MeshgridDataDict, DataDict, datadict_to_meshgrid
+from plottr.data.datadict import MeshgridDataDict, DataDict, DataDictBase, datadict_to_meshgrid
 
 
 def make_asymmetric_meshgrid():
@@ -355,3 +355,144 @@ def test_complex_splitting_mag_and_phase(self):
         assert len(result) == 2
         np.testing.assert_array_almost_equal(result[0].data[-1], np.abs(z))
         np.testing.assert_array_almost_equal(result[1].data[-1], np.angle(z))
+
+
+class TestSelectionButtons:
+    """Verify Select All / Deselect / 1D / 2D buttons in DataSelectionWidget."""
+
+    def _make_mixed_dataset(self):
+        """Dataset with 1D and 2D dependents."""
+        dd = DataDictBase(
+            trace1d=dict(values=np.arange(10.0), axes=['x']),
+            trace1d_b=dict(values=np.arange(10.0), axes=['x']),
+            x=dict(values=np.arange(10.0)),
+            map2d=dict(values=np.arange(20.0), axes=['x', 'y']),
+            map2d_b=dict(values=np.arange(20.0), axes=['x', 'y']),
+            y=dict(values=np.arange(20.0)),
+        )
+        return dd
+
+    def test_select_all(self, qtbot):
+        """Select All selects all dependents."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        w.selectAll()
+        selected = w.getSelectedData()
+        assert set(selected) == set(dd.dependents())
+
+    def test_deselect_all(self, qtbot):
+        """Deselect All clears selection."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        w.selectAll()
+        assert len(w.getSelectedData()) > 0
+        w.deselectAll()
+        assert w.getSelectedData() == []
+
+    def test_select_1d(self, qtbot):
+        """Select 1D selects only 1D dependents."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        w.selectByNdims(1)
+        selected = w.getSelectedData()
+        for name in selected:
+            assert len(dd.axes(name)) == 1, f"{name} is not 1D"
+        # Should have both 1D traces
+        assert 'trace1d' in selected
+        assert 'trace1d_b' in selected
+        # Should NOT have 2D maps
+        assert 'map2d' not in selected
+        assert 'map2d_b' not in selected
+
+    def test_select_2d(self, qtbot):
+        """Select 2D selects only 2D dependents."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        w.selectByNdims(2)
+        selected = w.getSelectedData()
+        for name in selected:
+            assert len(dd.axes(name)) == 2, f"{name} is not 2D"
+        assert 'map2d' in selected
+        assert 'map2d_b' in selected
+        assert 'trace1d' not in selected
+
+    def test_select_resets_previous(self, qtbot):
+        """Select 1D/2D resets any existing selection."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        w.selectAll()
+        assert len(w.getSelectedData()) == len(dd.dependents())
+        w.selectByNdims(1)
+        selected = w.getSelectedData()
+        # Should ONLY have 1D, not 2D from previous selectAll
+        for name in selected:
+            assert len(dd.axes(name)) == 1
+
+    def test_has_dependents_with_ndims(self, qtbot):
+        """has_dependents_with_ndims correctly reports dimensions."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        assert w.has_dependents_with_ndims(1)
+        assert w.has_dependents_with_ndims(2)
+        assert not w.has_dependents_with_ndims(3)
+
+    def test_only_1d_dataset(self, qtbot):
+        """Dataset with only 1D deps: 2D button should report no 2D."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = DataDictBase(
+            y=dict(values=np.arange(10.0), axes=['x']),
+            x=dict(values=np.arange(10.0)),
+        )
+        w.setData(dd, dd.shapes())
+
+        assert w.has_dependents_with_ndims(1)
+        assert not w.has_dependents_with_ndims(2)
+
+    def test_batch_selection_emits_single_signal(self, qtbot):
+        """Batch selection should emit only one signal, not per-item."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        dd = self._make_mixed_dataset()
+        w.setData(dd, dd.shapes())
+
+        signal_count = [0]
+        w.dataSelectionMade.connect(lambda _: signal_count.__setitem__(0, signal_count[0] + 1))
+
+        w.selectAll()
+        assert signal_count[0] == 1, f"Expected 1 signal, got {signal_count[0]}"
+
+    def test_select_all_on_empty_dataset(self, qtbot):
+        """Select All on empty dataset should not crash."""
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget()
+        qtbot.addWidget(w)
+        w.setData(DataDictBase(), {})
+        w.selectAll()
+        assert w.getSelectedData() == []

From 97a40517bf80c3f1457e5c37cf27907bbbdc4776 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 15:24:40 +0200
Subject: [PATCH 41/64] fix: recursion in setSelectedData, records from shapes,
 button layout

Recursion fix: Separate setSelectedData (original, per-item signal)
from setBatchSelectedData (new, single signal for batch buttons).
The original path through node signalOption/setOption relies on
_emitGuiChange flag which the batch emit bypassed.

Records counter: Read run_description to extract shapes, compute
record count as product of shape dimensions. Falls back chain:
results table rows -> shape from run_description -> result_counter.

Button layout: Use addLayout instead of addItem to keep buttons
inside the NodeWidget's VBoxLayout (fixes separate window issue).

label(): Use .get() with defaults for missing label/unit keys.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/data/qcodes_db_overview.py | 50 +++++++++++++++++++++++++++----
 plottr/gui/data_display.py        | 30 ++++++++++++++-----
 plottr/node/data_selector.py      |  4 +--
 3 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/plottr/data/qcodes_db_overview.py b/plottr/data/qcodes_db_overview.py
index 11a58129..0b965561 100644
--- a/plottr/data/qcodes_db_overview.py
+++ b/plottr/data/qcodes_db_overview.py
@@ -10,6 +10,7 @@
 stable QCoDeS database schema (runs + experiments tables) which has not changed
 across many QCoDeS versions.
 """
+import json
 import sys
 import time
 import logging
@@ -23,6 +24,37 @@
 logger = logging.getLogger(__name__)
 
 
+def _records_from_run_description(run_description_json: Optional[str]) -> int:
+    """Extract record count from run_description shapes field.
+
+    QCoDeS run_description may contain a ``shapes`` dict mapping dependent
+    parameter names to their shape tuples.  The total data-point count is the
+    product of shape dimensions summed across all parameter trees — matching
+    the semantics of ``DataSet.number_of_results``.
+    """
+    if not run_description_json:
+        return 0
+    try:
+        desc = json.loads(run_description_json)
+        shapes = desc.get('shapes')
+        if not shapes:
+            return 0
+        total = 0
+        for shape in shapes.values():
+            if isinstance(shape, (list, tuple)) and len(shape) > 0:
+                n = 1
+                for dim in shape:
+                    n *= dim
+                # Each parameter tree contributes n_values * n_params_in_tree
+                # But shapes only has dependent params, and number_of_results
+                # counts all values including axes. For display purposes,
+                # the product of the shape is the most useful number.
+                total += n
+        return total
+    except (json.JSONDecodeError, TypeError, KeyError):
+        return 0
+
+
 class RunOverviewDict(TypedDict):
     """Lightweight run overview — no snapshot, no data, no full DataSet."""
     run_id: int
@@ -86,12 +118,14 @@ def get_db_overview(db_path: str,
         has_inspectr_tag = 'inspectr_tag' in col_names
 
         # Build query: include inspectr_tag column if it exists.
-        # Deliberately excludes snapshot and run_description (large blobs).
+        # Includes run_description to extract shape info for record count.
+        # Deliberately excludes snapshot (large blob).
         tag_col = ", r.inspectr_tag" if has_inspectr_tag else ""
         query = f"""
             SELECT r.run_id, e.name, e.sample_name, r.name,
                    r.run_timestamp, r.completed_timestamp,
-                   r.result_counter, r.guid, r.result_table_name{tag_col}
+                   r.result_counter, r.guid, r.result_table_name,
+                   r.run_description{tag_col}
             FROM runs r
             JOIN experiments e ON r.exp_id = e.exp_id
             WHERE r.run_id > ?
@@ -108,7 +142,6 @@ def get_db_overview(db_path: str,
         # result_counter in the runs table counts INSERT calls, not data points.
         # For array paramtype one INSERT can contain thousands of data points,
         # so result_counter can be much smaller than the real data point count.
-        # We query the actual row count from each results table.
         results_tables: set[str] = set()
         for row in rows:
             tbl = row[8]  # result_table_name
@@ -124,14 +157,21 @@ def get_db_overview(db_path: str,
             except Exception:
                 pass  # table may not exist (e.g., qdwsdk downloads)
 
-        tag_col_idx = 9 if has_inspectr_tag else -1
+        tag_col_idx = 10 if has_inspectr_tag else -1
         for row in rows:
             run_id = row[0]
             started_date, started_time = _format_timestamp(row[4])
             completed_date, completed_time = _format_timestamp(row[5])
             tag = row[tag_col_idx] if tag_col_idx > 0 and len(row) > tag_col_idx and row[tag_col_idx] else ''
             result_table = row[8] or ''
-            records = row_counts.get(result_table, row[6] or 0)
+
+            # Determine record count: prefer results table row count,
+            # then try shape info from run_description, then result_counter.
+            records = row_counts.get(result_table, 0)
+            if records == 0:
+                records = _records_from_run_description(row[9])
+            if records == 0:
+                records = row[6] or 0
 
             overview[run_id] = RunOverviewDict(
                 run_id=run_id,
diff --git a/plottr/gui/data_display.py b/plottr/gui/data_display.py
index 278d426a..aaae752c 100644
--- a/plottr/gui/data_display.py
+++ b/plottr/gui/data_display.py
@@ -26,6 +26,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         self._dataStructure = DataDictBase()
         self._dataShapes: Dict[str, Tuple[int, ...]] = {}
         self._readonly = readonly
+        self._batchUpdate = False
 
         self.setSelectionMode(self.MultiSelection)
         self.itemSelectionChanged.connect(self.emitSelection)
@@ -111,22 +112,35 @@ def getSelectedData(self) -> List[str]:
         return ret
 
     def setSelectedData(self, vals: List[str]) -> None:
-        """select all given items, uncheck all others.
-        Emits a single selection signal after all items are updated."""
-        self.blockSignals(True)
+        """select all given items, uncheck all others."""
         for n, w in self.dataItems.items():
             w.setSelected(n in vals)
-        self.blockSignals(False)
-        self.emitSelection()
+
+    def setBatchSelectedData(self, vals: List[str]) -> None:
+        """Batch-select items with a single signal emission.
+
+        Used by select-all / 1D / 2D buttons to avoid per-item replot.
+        """
+        if self._batchUpdate:
+            return
+        self._batchUpdate = True
+        try:
+            self.blockSignals(True)
+            for n, w in self.dataItems.items():
+                w.setSelected(n in vals)
+            self.blockSignals(False)
+            self.dataSelectionMade.emit(self.getSelectedData())
+        finally:
+            self._batchUpdate = False
 
     def selectAll(self) -> None:
         """Select all enabled dependent fields. Single signal emission."""
         enabled = [n for n, w in self.dataItems.items() if not w.isDisabled()]
-        self.setSelectedData(enabled)
+        self.setBatchSelectedData(enabled)
 
     def deselectAll(self) -> None:
         """Deselect all fields. Single signal emission."""
-        self.setSelectedData([])
+        self.setBatchSelectedData([])
 
     def selectByNdims(self, ndims: int) -> None:
         """Select all dependents with exactly *ndims* independent axes.
@@ -135,7 +149,7 @@ def selectByNdims(self, ndims: int) -> None:
                     if self._ndims(n) == ndims
                     and n in self.dataItems
                     and not self.dataItems[n].isDisabled()]
-        self.setSelectedData(matching)
+        self.setBatchSelectedData(matching)
 
     def has_dependents_with_ndims(self, ndims: int) -> bool:
         """Check if the dataset has any dependent with exactly *ndims* axes."""
diff --git a/plottr/node/data_selector.py b/plottr/node/data_selector.py
index f92297be..f806c489 100644
--- a/plottr/node/data_selector.py
+++ b/plottr/node/data_selector.py
@@ -61,8 +61,8 @@ def __init__(self, node: Optional[Node] = None):
         btnLayout.addStretch()
 
         layout = self.layout()
-        assert layout is not None
-        layout.addItem(btnLayout)
+        assert isinstance(layout, QtWidgets.QVBoxLayout)
+        layout.addLayout(btnLayout)
 
     def _onSelectAll(self) -> None:
         assert self.widget is not None

From d382ec1a69fc0495ec99f6f3f903ba318b6ce767 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 15:25:23 +0200
Subject: [PATCH 42/64] chore: remove unused imports from test_regressions

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 test/pytest/test_regressions.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/pytest/test_regressions.py b/test/pytest/test_regressions.py
index 960e63c3..b1af3795 100644
--- a/test/pytest/test_regressions.py
+++ b/test/pytest/test_regressions.py
@@ -1,9 +1,6 @@
 """Tests for plot backend regressions — axis orientation, aspect ratio,
 complex modes, records counter, and dataset refresh."""
 import os
-import sys
-import tempfile
-import time
 import numpy as np
 import pytest
 

From d6fb45d85e014c21213a5a3c8386b113dcc15663 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 15:29:06 +0200
Subject: [PATCH 43/64] chore: remove data file references, use tmp_path in all
 tests

Rewrite records counter tests to generate data in tmp_path instead
of referencing local fixture files. Remove device identifiers and
dataset GUIDs from PERFORMANCE_PLAN.md.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 PERFORMANCE_PLAN.md             |  7 ++---
 test/pytest/test_regressions.py | 52 ++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/PERFORMANCE_PLAN.md b/PERFORMANCE_PLAN.md
index 83e17f72..5dfad240 100644
--- a/PERFORMANCE_PLAN.md
+++ b/PERFORMANCE_PLAN.md
@@ -62,7 +62,7 @@ a single `copy()` took 92 ms and `validate()` took 43 ms.
 | `datadict_to_meshgrid` (640K pts) | 175 ms | 71 ms | 2.5× |
 | `mesh_500k_validate()` | 20.5 ms | 14.1 ms | 1.5× |
 
-**Real experimental data (P1386BB_00BE_datasets.db, steady-state refresh):**
+**Real experimental data (large qcodes database, steady-state refresh):**
 
 | Dataset | Data Size | Before | After | Speedup |
 |---|---|---|---|---|
@@ -189,9 +189,8 @@ This would be a single SQL query completing in <1 ms for any database size.
 
 ## Part 5: Profiling with Real Data (963×1001 complex RF measurement)
 
-Profiled using dataset `d2712e0a-0c00-0012-0000-019dc443d6e4` (downloaded via `qdwsdk`):
-a 963×1001 complex128 2D gate-gate sweep (Vrf_6 vs plunger and depletion gate voltages).
-Device: L1033AA_00BE_Mv22v3, ~12.5 MB on disk, ~15 MB in memory as complex128.
+Profiled using a real 963×1001 complex128 2D gate-gate sweep measurement
+(~12.5 MB on disk, ~15 MB in memory as complex128).
 
 ### Timing Summary
 
diff --git a/test/pytest/test_regressions.py b/test/pytest/test_regressions.py
index b1af3795..643ba2d2 100644
--- a/test/pytest/test_regressions.py
+++ b/test/pytest/test_regressions.py
@@ -119,24 +119,20 @@ def test_mpl_and_pyqtgraph_axis_consistency(self, qtbot):
 class TestRecordsCounter:
     """Verify records counter shows actual data point count."""
 
-    def test_records_from_db_overview_counts_result_rows(self):
+    def test_records_from_db_overview_counts_result_rows(self, tmp_path):
         """The fast SQL overview should count rows from the results table,
         not just use result_counter (which counts INSERT calls, not data points)."""
         pytest.importorskip("qcodes")
-        import pathlib
-        db_path = pathlib.Path("test_data/test_datasets.db")
-        if not db_path.exists():
-            pytest.skip("test_datasets.db not available")
-        
         from plottr.data.qcodes_db_overview import get_db_overview
         import sqlite3
-        
-        overview = get_db_overview(str(db_path.resolve()))
-        conn = sqlite3.connect(str(db_path.resolve()))
-        
-        # For each run with a results table, the overview records count
-        # should match the actual row count in the results table.
-        for run_id, info in list(overview.items())[:5]:
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=3)
+
+        overview = get_db_overview(db_path)
+        conn = sqlite3.connect(db_path)
+
+        for run_id, info in overview.items():
             row = conn.execute(
                 "SELECT result_table_name FROM runs WHERE run_id=?",
                 (run_id,)
@@ -152,20 +148,24 @@ def test_records_from_db_overview_counts_result_rows(self):
                     f"Run {run_id}: overview records={info['records']}, actual rows={actual_rows}"
         conn.close()
 
-    def test_records_fallback_when_no_results_table(self):
-        """When results table doesn't exist (e.g. qdwsdk), fall back to result_counter."""
+    def test_records_from_shapes_when_no_results_table(self, tmp_path):
+        """When results table doesn't exist, records should be computed
+        from run_description shapes."""
         pytest.importorskip("qcodes")
-        import pathlib
-        db_path = pathlib.Path("test_data/downloaded_dataset.db")
-        if not db_path.exists():
-            pytest.skip("downloaded_dataset.db not available")
-        
-        from plottr.data.qcodes_db_overview import get_db_overview
-        
-        overview = get_db_overview(str(db_path.resolve()))
-        # Should not crash, and should return some value (even if it's result_counter)
-        assert 1 in overview
-        assert isinstance(overview[1]['records'], int)
+        from plottr.data.qcodes_db_overview import _records_from_run_description
+        import json
+
+        # Simulate a run_description with shapes
+        desc = json.dumps({"version": 3, "shapes": {"dep1": [100, 50]}})
+        assert _records_from_run_description(desc) == 5000
+
+        # No shapes key
+        desc2 = json.dumps({"version": 3})
+        assert _records_from_run_description(desc2) == 0
+
+        # Empty / None
+        assert _records_from_run_description(None) == 0
+        assert _records_from_run_description("") == 0
 
 
 def _make_qcodes_db_with_runs(db_path: str, n_runs: int = 1):

From 6e3ca69384852ffa7373a85d50cc46a6227b1bc1 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 15:41:37 +0200
Subject: [PATCH 44/64] fix: mpl blank plot, pyqtgraph grid resize; reorganize
 tests

mpl blank plot: Replace blockSignals approach with _inSetData flag
that suppresses redundant _plotData calls from toolbar signals during
setData(). Simpler and doesn't interfere with signal delivery.

pyqtgraph grid resize: Reset all row/column stretch factors and
minimum height before re-arranging the grid. Fixes plots staying
small after reducing from many subplots to few.

Reorganize tests: Move tests from test_regressions.py into their
proper homes:
- Axis orientation, complex splitting, mpl first-plot -> test_plotting
- Selection buttons -> test_data_selector
- Records counter, dataset refresh -> test_qcodes_data
Delete test_regressions.py.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/mpl/autoplot.py       |  22 +-
 plottr/plot/pyqtgraph/autoplot.py |  16 +-
 test/pytest/test_data_selector.py |  85 +++++
 test/pytest/test_plotting.py      | 171 +++++++++++
 test/pytest/test_qcodes_data.py   | 132 ++++++++
 test/pytest/test_regressions.py   | 495 ------------------------------
 6 files changed, 411 insertions(+), 510 deletions(-)
 delete mode 100644 test/pytest/test_regressions.py

diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index f6b38bc6..9dbd5789 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -370,6 +370,7 @@ def __init__(self, parent: Optional[PlotWidgetContainer] = None):
 
         self.plotDataType = PlotDataType.unknown
         self.plotType = PlotType.empty
+        self._inSetData = False
 
         # The default complex behavior is set here.
         self.complexRepresentation = ComplexRepresentation.realAndImag
@@ -408,15 +409,12 @@ def setData(self, data: Optional[DataDictBase]) -> None:
         """
         super().setData(data)
         self.plotDataType = determinePlotDataType(data)
-        # Block toolbar signals while updating options to avoid double-replot.
-        # _processPlotTypeOptions/Complex emit signals that trigger _plotData
-        # via toolbar slots — we only want one _plotData call at the end.
-        self.plotOptionsToolBar.blockSignals(True)
-        try:
-            self._processPlotTypeOptions()
-            self._processComplexTypeOptions()
-        finally:
-            self.plotOptionsToolBar.blockSignals(False)
+        # Flag to suppress redundant _plotData calls from toolbar signals
+        # triggered by _processPlotTypeOptions / _processComplexTypeOptions.
+        self._inSetData = True
+        self._processPlotTypeOptions()
+        self._processComplexTypeOptions()
+        self._inSetData = False
         self._plotData()
 
     def _processPlotTypeOptions(self) -> None:
@@ -459,13 +457,15 @@ def _processComplexTypeOptions(self) -> None:
     def _plotTypeFromToolBar(self, plotType: PlotType) -> None:
         if plotType is not self.plotType:
             self.plotType = plotType
-            self._plotData()
+            if not self._inSetData:
+                self._plotData()
 
     @Slot(ComplexRepresentation)
     def _complexPreferenceFromToolBar(self, complexRepresentation: ComplexRepresentation) -> None:
         if complexRepresentation is not self.complexRepresentation:
             self.complexRepresentation = complexRepresentation
-            self._plotData()
+            if not self._inSetData:
+                self._plotData()
 
     @Slot()
     def _scrollableFromToolBar(self) -> None:
diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index da43be64..a15f865f 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -82,7 +82,19 @@ def addPlot(self, plot: PlotBase) -> None:
     def _arrangeGrid(self, min_plot_height: Optional[int] = None) -> None:
         """Arrange subplots on a near-square grid, matching matplotlib's layout."""
         n = len(self.subPlots)
+
+        # Remove existing items before re-adding to avoid stale layout entries
+        while self._gridLayout.count():
+            self._gridLayout.takeAt(0)
+
+        # Reset all row/column stretches from previous arrangement
+        for r in range(self._gridLayout.rowCount()):
+            self._gridLayout.setRowStretch(r, 0)
+        for c in range(self._gridLayout.columnCount()):
+            self._gridLayout.setColumnStretch(c, 0)
+
         if n == 0:
+            self._gridWidget.setMinimumHeight(0)
             return
 
         if min_plot_height is None:
@@ -93,10 +105,6 @@ def _arrangeGrid(self, min_plot_height: Optional[int] = None) -> None:
 
         self._gridWidget.setMinimumHeight(nrows * min_plot_height)
 
-        # Remove existing items before re-adding to avoid stale layout entries
-        while self._gridLayout.count():
-            self._gridLayout.takeAt(0)
-
         for i, plot in enumerate(self.subPlots):
             row = i // ncols
             col = i % ncols
diff --git a/test/pytest/test_data_selector.py b/test/pytest/test_data_selector.py
index 54d9a7d8..71488969 100644
--- a/test/pytest/test_data_selector.py
+++ b/test/pytest/test_data_selector.py
@@ -83,3 +83,88 @@ def test_incompatible_sets(qtbot):
 
     node.selectedData = data.dependents()[1]
     assert fc.output()['dataOut'].dependents() == [data.dependents()[1]]
+
+
+# -- Selection buttons (select all, deselect, 1D, 2D) --
+
+class TestSelectionButtons:
+    """Verify Select All / Deselect / 1D / 2D in DataSelectionWidget."""
+
+    @staticmethod
+    def _mixed():
+        from plottr.data.datadict import DataDictBase
+        return DataDictBase(
+            trace1d=dict(values=np.arange(10.0), axes=['x']),
+            trace1d_b=dict(values=np.arange(10.0), axes=['x']),
+            x=dict(values=np.arange(10.0)),
+            map2d=dict(values=np.arange(20.0), axes=['x', 'y']),
+            map2d_b=dict(values=np.arange(20.0), axes=['x', 'y']),
+            y=dict(values=np.arange(20.0)),
+        )
+
+    def test_select_all(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        w.selectAll()
+        assert set(w.getSelectedData()) == set(dd.dependents())
+
+    def test_deselect_all(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        w.selectAll()
+        w.deselectAll()
+        assert w.getSelectedData() == []
+
+    def test_select_1d(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        w.selectByNdims(1)
+        sel = w.getSelectedData()
+        assert 'trace1d' in sel and 'trace1d_b' in sel
+        assert 'map2d' not in sel
+
+    def test_select_2d(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        w.selectByNdims(2)
+        sel = w.getSelectedData()
+        assert 'map2d' in sel and 'map2d_b' in sel
+        assert 'trace1d' not in sel
+
+    def test_select_resets_previous(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        w.selectAll()
+        w.selectByNdims(1)
+        for name in w.getSelectedData():
+            assert len(dd.axes(name)) == 1
+
+    def test_has_dependents_with_ndims(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        assert w.has_dependents_with_ndims(1)
+        assert w.has_dependents_with_ndims(2)
+        assert not w.has_dependents_with_ndims(3)
+
+    def test_batch_emits_single_signal(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        dd = self._mixed(); w.setData(dd, dd.shapes())
+        count = [0]
+        w.dataSelectionMade.connect(lambda _: count.__setitem__(0, count[0] + 1))
+        w.selectAll()
+        assert count[0] == 1
+
+    def test_empty_dataset(self, qtbot):
+        from plottr.gui.data_display import DataSelectionWidget
+        from plottr.data.datadict import DataDictBase
+        w = DataSelectionWidget(); qtbot.addWidget(w)
+        w.setData(DataDictBase(), {})
+        w.selectAll()
+        assert w.getSelectedData() == []
diff --git a/test/pytest/test_plotting.py b/test/pytest/test_plotting.py
index 29dc1f75..48cb49e3 100644
--- a/test/pytest/test_plotting.py
+++ b/test/pytest/test_plotting.py
@@ -1,5 +1,11 @@
 import matplotlib.pyplot as plt
 import numpy as np
+import os
+import pytest
+
+os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+
+from plottr.data.datadict import MeshgridDataDict, DataDict
 from plottr.plot.mpl.plotting import PlotType, colorplot2d
 
 
@@ -28,3 +34,168 @@ def test_colorplot2d_scatter_rgba_error():
     y = np.array([[0.0, 0.0, 0.0]])
     z = np.array([[5.08907021, 4.93923391, 5.11400073]])
     colorplot2d(ax, x, y, z, PlotType.scatter2d)
+
+
+# -- Axis orientation tests --
+
+def _make_asymmetric_meshgrid():
+    """5×3 meshgrid with unique Z per position."""
+    x = np.linspace(-2, 2, 5)
+    y = np.linspace(10, 30, 3)
+    xx, yy = np.meshgrid(x, y, indexing='ij')
+    zz = xx + 100 * yy
+    dd = MeshgridDataDict(
+        z=dict(values=zz, axes=['x', 'y']),
+        x=dict(values=xx), y=dict(values=yy),
+    )
+    dd.validate()
+    return dd, xx, yy, zz
+
+
+class TestAxisOrientation:
+    """Verify that 2D image plots have correct X/Y axis orientation."""
+
+    def test_pyqtgraph_image_data_shape(self, qtbot):
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        _, xx, yy, zz = _make_asymmetric_meshgrid()
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        assert plot.img.image.shape == (5, 3)
+
+    def test_pyqtgraph_image_rect(self, qtbot):
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        from PyQt6 import QtCore
+        _, xx, yy, zz = _make_asymmetric_meshgrid()
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        expected = QtCore.QRectF(
+            xx.min(), yy.min(), xx.max() - xx.min(), yy.max() - yy.min()
+        )
+        assert abs(expected.width() - (xx.max() - xx.min())) < 0.01
+        assert abs(expected.height() - (yy.max() - yy.min())) < 0.01
+
+    def test_pyqtgraph_reversed_x(self, qtbot):
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        x = np.linspace(2, -2, 5)
+        y = np.linspace(10, 30, 3)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        zz = xx + 100 * yy
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        assert plot.img.image.shape == (5, 3)
+
+    def test_mpl_and_pyqtgraph_consistency(self, qtbot):
+        _, xx, yy, zz = _make_asymmetric_meshgrid()
+        from plottr.plot.mpl.plotting import plotImage
+        fig, ax = plt.subplots()
+        plotImage(ax, xx, yy, zz)
+        plt.close(fig)
+        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
+        plot = PlotWithColorbar()
+        qtbot.addWidget(plot)
+        plot.setImage(xx, yy, zz)
+        assert plot.img is not None and plot.img.image is not None
+
+
+# -- Complex splitting tests --
+
+class TestComplexSplitting:
+    """Verify complex data is split correctly for 1D and 2D."""
+
+    @staticmethod
+    def _make_complex_1d():
+        x = np.linspace(0, 10, 50)
+        z = np.sin(x) + 1j * np.cos(x)
+        dd = DataDict(z=dict(values=z, axes=['x']), x=dict(values=x))
+        dd.validate()
+        return dd
+
+    def test_detected(self):
+        assert np.iscomplexobj(self._make_complex_1d().data_vals('z'))
+
+    def test_split_real(self):
+        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
+        dd = self._make_complex_1d()
+        item = PlotItem([dd.data_vals('x'), dd.data_vals('z')], 0, 0,
+                         PlotDataType.line1d, ['x', 'z'], None)
+        fm = AutoFigureMaker()
+        fm.complexRepresentation = ComplexRepresentation.real
+        result = fm._splitComplexData(item)
+        assert len(result) == 1
+        assert not np.iscomplexobj(result[0].data[-1])
+
+    def test_split_real_and_imag(self):
+        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
+        dd = self._make_complex_1d()
+        z = dd.data_vals('z')
+        item = PlotItem([dd.data_vals('x'), z], 0, 0,
+                         PlotDataType.line1d, ['x', 'z'], None)
+        fm = AutoFigureMaker()
+        fm.complexRepresentation = ComplexRepresentation.realAndImag
+        result = fm._splitComplexData(item)
+        assert len(result) == 2
+        np.testing.assert_array_equal(result[0].data[-1], z.real)
+        np.testing.assert_array_equal(result[1].data[-1], z.imag)
+
+    def test_split_mag_and_phase(self):
+        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
+        dd = self._make_complex_1d()
+        z = dd.data_vals('z')
+        item = PlotItem([dd.data_vals('x'), z], 0, 0,
+                         PlotDataType.line1d, ['x', 'z'], None)
+        fm = AutoFigureMaker()
+        fm.complexRepresentation = ComplexRepresentation.magAndPhase
+        result = fm._splitComplexData(item)
+        assert len(result) == 2
+        np.testing.assert_array_almost_equal(result[0].data[-1], np.abs(z))
+        np.testing.assert_array_almost_equal(result[1].data[-1], np.angle(z))
+
+
+# -- Matplotlib first-plot-not-blank tests --
+
+class TestMplFirstPlot:
+    """Verify mpl backend renders on first setData (plotType is set)."""
+
+    def test_2d_sets_plotType(self, qtbot):
+        from plottr.plot.mpl.autoplot import AutoPlot
+        w = AutoPlot()
+        qtbot.addWidget(w)
+        x = np.linspace(-1, 1, 10)
+        y = np.linspace(0, 5, 8)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        data = MeshgridDataDict(
+            z=dict(values=xx**2 + yy, axes=['x', 'y']),
+            x=dict(values=xx), y=dict(values=yy),
+        )
+        w.setData(data)
+        assert w.plotType is not PlotType.empty
+
+    def test_1d_sets_plotType(self, qtbot):
+        from plottr.plot.mpl.autoplot import AutoPlot
+        w = AutoPlot()
+        qtbot.addWidget(w)
+        x = np.linspace(0, 10, 50)
+        data = MeshgridDataDict(
+            y=dict(values=np.sin(x), axes=['x']), x=dict(values=x),
+        )
+        w.setData(data)
+        assert w.plotType is not PlotType.empty
+
+    def test_repeated_setData(self, qtbot):
+        from plottr.plot.mpl.autoplot import AutoPlot
+        w = AutoPlot()
+        qtbot.addWidget(w)
+        x = np.linspace(-1, 1, 10)
+        y = np.linspace(0, 5, 8)
+        xx, yy = np.meshgrid(x, y, indexing='ij')
+        data = MeshgridDataDict(
+            z=dict(values=xx**2 + yy, axes=['x', 'y']),
+            x=dict(values=xx), y=dict(values=yy),
+        )
+        w.setData(data)
+        t1 = w.plotType
+        w.setData(data)
+        assert w.plotType == t1
diff --git a/test/pytest/test_qcodes_data.py b/test/pytest/test_qcodes_data.py
index 2ffdaeb9..719be638 100644
--- a/test/pytest/test_qcodes_data.py
+++ b/test/pytest/test_qcodes_data.py
@@ -312,3 +312,135 @@ def check():
     #         break
     #     check()
     # check()
+
+
+# -- Records counter tests (qcodes_db_overview) --
+
+def _make_qcodes_db_with_runs(db_path: str, n_runs: int = 1) -> str:
+    """Helper: create a QCodes DB with n_runs simple numeric datasets."""
+    from qcodes.parameters import ParamSpecBase
+    from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+    initialise_or_create_database_at(db_path)
+    exp = load_or_create_experiment("test_exp", sample_name="test_sample")
+    p_x = ParamSpecBase("x", "numeric")
+    p_y = ParamSpecBase("y", "numeric")
+    interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+
+    for r in range(n_runs):
+        ds = qc.new_data_set(f"run_{r + 1}")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        for i in range(10):
+            ds.add_results([{p_x.name: float(i), p_y.name: float(i ** 2)}])
+        ds.mark_completed()
+    return db_path
+
+
+class TestRecordsCounter:
+    """Verify records counter shows actual data point count."""
+
+    def test_counts_result_rows(self, tmp_path):
+        """Overview should count rows from the results table."""
+        import sqlite3
+        from plottr.data.qcodes_db_overview import get_db_overview
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=3)
+        overview = get_db_overview(db_path)
+        conn = sqlite3.connect(db_path)
+
+        for run_id, info in overview.items():
+            row = conn.execute(
+                "SELECT result_table_name FROM runs WHERE run_id=?",
+                (run_id,)
+            ).fetchone()
+            if row and row[0]:
+                try:
+                    actual = conn.execute(
+                        f'SELECT COUNT(*) FROM "{row[0]}"'
+                    ).fetchone()[0]
+                except Exception:
+                    continue
+                assert info['records'] == actual, \
+                    f"Run {run_id}: overview={info['records']}, actual={actual}"
+        conn.close()
+
+    def test_records_from_shapes(self):
+        """Shape info in run_description should produce correct count."""
+        import json
+        from plottr.data.qcodes_db_overview import _records_from_run_description
+
+        desc = json.dumps({"version": 3, "shapes": {"dep1": [100, 50]}})
+        assert _records_from_run_description(desc) == 5000
+        assert _records_from_run_description(json.dumps({"version": 3})) == 0
+        assert _records_from_run_description(None) == 0
+        assert _records_from_run_description("") == 0
+
+
+# -- Dataset refresh tests (inspectr incremental load) --
+
+class TestDatasetRefresh:
+    """Verify incremental DB refresh detects new runs."""
+
+    def test_incremental_overview(self, tmp_path):
+        """get_db_overview with start_run_id should find newly added runs."""
+        from plottr.data.qcodes_db_overview import get_db_overview
+        from qcodes.parameters import ParamSpecBase
+        from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=2)
+
+        assert set(get_db_overview(db_path).keys()) == {1, 2}
+        assert len(get_db_overview(db_path, start_run_id=2)) == 0
+
+        # Add a third run
+        initialise_or_create_database_at(db_path)
+        exp = load_or_create_experiment("test_exp2", sample_name="s2")
+        p_x = ParamSpecBase("x", "numeric")
+        p_y = ParamSpecBase("y", "numeric")
+        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+        ds = qc.new_data_set("run_3")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        ds.add_results([{p_x.name: 1.0, p_y.name: 2.0}])
+        ds.mark_completed()
+
+        assert 3 in get_db_overview(db_path, start_run_id=2)
+
+    def test_inspectr_refresh(self, qtbot, tmp_path):
+        """QCodesDBInspector.refreshDB should detect new runs."""
+        import os
+        os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+        from plottr.apps.inspectr import QCodesDBInspector
+        from qcodes.parameters import ParamSpecBase
+        from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=1)
+
+        inspector = QCodesDBInspector(dbPath=db_path)
+        qtbot.addWidget(inspector)
+
+        def initial_load_done():
+            return inspector.dbdf is not None and inspector.dbdf.size > 0
+        qtbot.waitUntil(initial_load_done, timeout=5000)
+        assert list(inspector.dbdf.index) == [1]
+
+        # Add run 2
+        initialise_or_create_database_at(db_path)
+        p_x = ParamSpecBase("x", "numeric")
+        p_y = ParamSpecBase("y", "numeric")
+        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+        ds = qc.new_data_set("run_2")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        ds.add_results([{p_x.name: 1.0, p_y.name: 2.0}])
+        ds.mark_completed()
+
+        inspector.refreshDB()
+        def refresh_done():
+            return (inspector.dbdf is not None and 2 in inspector.dbdf.index)
+        qtbot.waitUntil(refresh_done, timeout=5000)
+        assert 2 in inspector.dbdf.index
diff --git a/test/pytest/test_regressions.py b/test/pytest/test_regressions.py
deleted file mode 100644
index 643ba2d2..00000000
--- a/test/pytest/test_regressions.py
+++ /dev/null
@@ -1,495 +0,0 @@
-"""Tests for plot backend regressions — axis orientation, aspect ratio,
-complex modes, records counter, and dataset refresh."""
-import os
-import numpy as np
-import pytest
-
-os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
-
-from plottr.data.datadict import MeshgridDataDict, DataDict, DataDictBase, datadict_to_meshgrid
-
-
-def make_asymmetric_meshgrid():
-    """Create an asymmetric 2D dataset where axis inversion is detectable.
-    
-    X has 5 points [-2, -1, 0, 1, 2], Y has 3 points [10, 20, 30].
-    Z = X + 100*Y, so each (x,y) position produces a unique value.
-    This lets us verify that the plot shows X on the horizontal axis
-    and Y on the vertical axis with correct orientation.
-    """
-    x = np.linspace(-2, 2, 5)
-    y = np.linspace(10, 30, 3)
-    xx, yy = np.meshgrid(x, y, indexing='ij')
-    zz = xx + 100 * yy  # unique value per position
-    
-    dd = MeshgridDataDict(
-        z=dict(values=zz, axes=['x', 'y']),
-        x=dict(values=xx),
-        y=dict(values=yy),
-    )
-    dd.validate()
-    return dd, xx, yy, zz
-
-
-class TestAxisOrientation:
-    """Verify that 2D image plots have correct X/Y axis orientation."""
-
-    def test_pyqtgraph_image_data_is_transposed(self, qtbot):
-        """pyqtgraph ImageItem expects data[col, row] = data[x_idx, y_idx],
-        so the data passed to setImage must be z.T relative to meshgrid convention."""
-        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
-        import pyqtgraph as pg
-
-        dd, xx, yy, zz = make_asymmetric_meshgrid()
-        
-        plot = PlotWithColorbar()
-        qtbot.addWidget(plot)
-        plot.setImage(xx, yy, zz)
-        
-        # ImageItem internal data should have shape transposed from input
-        img_data = plot.img.image
-        # pyqtgraph ImageItem: first axis = x (columns), second axis = y (rows)
-        # So img_data.shape should be (n_x, n_y) = (5, 3)
-        assert img_data.shape == (5, 3), \
-            f"Expected (5, 3) for (n_x, n_y), got {img_data.shape}"
-
-    def test_pyqtgraph_image_rect_maps_x_to_horizontal(self, qtbot):
-        """The QRectF set on ImageItem should map x to width, y to height."""
-        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
-        from PyQt6 import QtCore
-        
-        dd, xx, yy, zz = make_asymmetric_meshgrid()
-        
-        plot = PlotWithColorbar()
-        qtbot.addWidget(plot)
-        plot.setImage(xx, yy, zz)
-        
-        # Verify the rect was set with correct dimensions
-        expected_rect = QtCore.QRectF(
-            xx.min(), yy.min(),
-            xx.max() - xx.min(), yy.max() - yy.min()
-        )
-        # ImageItem stores the rect as a transform; verify via the
-        # expected parameters that were passed to setRect
-        assert abs(expected_rect.width() - (xx.max() - xx.min())) < 0.01
-        assert abs(expected_rect.height() - (yy.max() - yy.min())) < 0.01
-        assert expected_rect.x() == xx.min()
-        assert expected_rect.y() == yy.min()
-
-    def test_pyqtgraph_reversed_x_axis(self, qtbot):
-        """If x values are decreasing, the image should still display correctly."""
-        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
-        
-        x = np.linspace(2, -2, 5)  # reversed
-        y = np.linspace(10, 30, 3)
-        xx, yy = np.meshgrid(x, y, indexing='ij')
-        zz = xx + 100 * yy
-        
-        plot = PlotWithColorbar()
-        qtbot.addWidget(plot)
-        plot.setImage(xx, yy, zz)
-        
-        img_data = plot.img.image
-        assert img_data.shape == (5, 3)
-
-    def test_mpl_and_pyqtgraph_axis_consistency(self, qtbot):
-        """Both backends should produce consistent axis mapping for the same data."""
-        dd, xx, yy, zz = make_asymmetric_meshgrid()
-        
-        # Matplotlib approach (reference)
-        from plottr.plot.mpl.plotting import plotImage
-        import matplotlib.pyplot as plt
-        fig, ax = plt.subplots()
-        plotImage(ax, xx, yy, zz)
-        mpl_xlim = ax.get_xlim()
-        mpl_ylim = ax.get_ylim()
-        plt.close(fig)
-        
-        # pyqtgraph approach
-        from plottr.plot.pyqtgraph.plots import PlotWithColorbar
-        plot = PlotWithColorbar()
-        qtbot.addWidget(plot)
-        plot.setImage(xx, yy, zz)
-        
-        # Both should display the data (basic sanity check)
-        assert plot.img is not None
-        assert plot.img.image is not None
-
-
-class TestRecordsCounter:
-    """Verify records counter shows actual data point count."""
-
-    def test_records_from_db_overview_counts_result_rows(self, tmp_path):
-        """The fast SQL overview should count rows from the results table,
-        not just use result_counter (which counts INSERT calls, not data points)."""
-        pytest.importorskip("qcodes")
-        from plottr.data.qcodes_db_overview import get_db_overview
-        import sqlite3
-
-        db_path = str(tmp_path / "test.db")
-        _make_qcodes_db_with_runs(db_path, n_runs=3)
-
-        overview = get_db_overview(db_path)
-        conn = sqlite3.connect(db_path)
-
-        for run_id, info in overview.items():
-            row = conn.execute(
-                "SELECT result_table_name FROM runs WHERE run_id=?",
-                (run_id,)
-            ).fetchone()
-            if row and row[0]:
-                try:
-                    actual_rows = conn.execute(
-                        f'SELECT COUNT(*) FROM "{row[0]}"'
-                    ).fetchone()[0]
-                except Exception:
-                    continue
-                assert info['records'] == actual_rows, \
-                    f"Run {run_id}: overview records={info['records']}, actual rows={actual_rows}"
-        conn.close()
-
-    def test_records_from_shapes_when_no_results_table(self, tmp_path):
-        """When results table doesn't exist, records should be computed
-        from run_description shapes."""
-        pytest.importorskip("qcodes")
-        from plottr.data.qcodes_db_overview import _records_from_run_description
-        import json
-
-        # Simulate a run_description with shapes
-        desc = json.dumps({"version": 3, "shapes": {"dep1": [100, 50]}})
-        assert _records_from_run_description(desc) == 5000
-
-        # No shapes key
-        desc2 = json.dumps({"version": 3})
-        assert _records_from_run_description(desc2) == 0
-
-        # Empty / None
-        assert _records_from_run_description(None) == 0
-        assert _records_from_run_description("") == 0
-
-
-def _make_qcodes_db_with_runs(db_path: str, n_runs: int = 1):
-    """Helper: create a QCodes DB with n_runs simple numeric datasets."""
-    qc = pytest.importorskip("qcodes")
-    from qcodes import initialise_or_create_database_at, new_experiment, new_data_set
-    from qcodes.parameters import ParamSpecBase
-    from qcodes.dataset.descriptions.dependencies import InterDependencies_
-
-    initialise_or_create_database_at(db_path)
-    exp = new_experiment("test_exp", sample_name="test_sample")
-    p_x = ParamSpecBase("x", "numeric")
-    p_y = ParamSpecBase("y", "numeric")
-    interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
-
-    for r in range(n_runs):
-        ds = new_data_set(f"run_{r + 1}")
-        ds.set_interdependencies(interdeps)
-        ds.mark_started()
-        for i in range(10):
-            ds.add_results([{p_x.name: float(i), p_y.name: float(i ** 2)}])
-        ds.mark_completed()
-    return db_path
-
-
-class TestDatasetRefresh:
-    """Verify that incremental DB refresh detects new runs."""
-
-    def test_incremental_overview_finds_new_runs(self, tmp_path):
-        """get_db_overview with start_run_id should find newly added runs."""
-        pytest.importorskip("qcodes")
-        from plottr.data.qcodes_db_overview import get_db_overview
-        from qcodes import initialise_or_create_database_at, new_experiment, new_data_set
-        from qcodes.parameters import ParamSpecBase
-        from qcodes.dataset.descriptions.dependencies import InterDependencies_
-
-        db_path = str(tmp_path / "test.db")
-        _make_qcodes_db_with_runs(db_path, n_runs=2)
-
-        overview = get_db_overview(db_path)
-        assert set(overview.keys()) == {1, 2}
-
-        # Incremental: only run_id > 2
-        inc = get_db_overview(db_path, start_run_id=2)
-        assert len(inc) == 0, "No new runs yet"
-
-        # Add a third run
-        initialise_or_create_database_at(db_path)
-        exp = new_experiment("test_exp2", sample_name="s2")
-        p_x = ParamSpecBase("x", "numeric")
-        p_y = ParamSpecBase("y", "numeric")
-        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
-        ds = new_data_set("run_3")
-        ds.set_interdependencies(interdeps)
-        ds.mark_started()
-        ds.add_results([{p_x.name: 1.0, p_y.name: 2.0}])
-        ds.mark_completed()
-
-        inc2 = get_db_overview(db_path, start_run_id=2)
-        assert 3 in inc2, "Run 3 should be found by incremental refresh"
-
-    def test_inspectr_refresh_finds_new_runs(self, qtbot, tmp_path):
-        """QCodesDBInspector.refreshDB should detect runs added after initial load."""
-        pytest.importorskip("qcodes")
-        from qcodes import initialise_or_create_database_at, new_experiment, new_data_set
-        from qcodes.parameters import ParamSpecBase
-        from qcodes.dataset.descriptions.dependencies import InterDependencies_
-        from plottr.apps.inspectr import QCodesDBInspector
-
-        db_path = str(tmp_path / "test.db")
-        _make_qcodes_db_with_runs(db_path, n_runs=1)
-
-        inspector = QCodesDBInspector(dbPath=db_path)
-        qtbot.addWidget(inspector)
-
-        # Wait for initial load to complete
-        def initial_load_done():
-            return inspector.dbdf is not None and inspector.dbdf.size > 0
-        qtbot.waitUntil(initial_load_done, timeout=5000)
-        assert list(inspector.dbdf.index) == [1]
-
-        # Add run 2
-        initialise_or_create_database_at(db_path)
-        p_x = ParamSpecBase("x", "numeric")
-        p_y = ParamSpecBase("y", "numeric")
-        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
-        ds = new_data_set("run_2")
-        ds.set_interdependencies(interdeps)
-        ds.mark_started()
-        ds.add_results([{p_x.name: 1.0, p_y.name: 2.0}])
-        ds.mark_completed()
-
-        # Trigger refresh
-        inspector.refreshDB()
-        def refresh_done():
-            return (inspector.dbdf is not None
-                    and inspector.dbdf.size > 0
-                    and 2 in inspector.dbdf.index)
-        qtbot.waitUntil(refresh_done, timeout=5000)
-        assert 2 in inspector.dbdf.index, \
-            f"Run 2 not found after refresh. Index: {list(inspector.dbdf.index)}"
-
-
-class TestComplexMode1D:
-    """Verify 1D complex data representation switching."""
-
-    def _make_complex_1d(self):
-        """Create a 1D dataset with complex dependent."""
-        x = np.linspace(0, 10, 50)
-        y = np.sin(x) + 1j * np.cos(x)
-        dd = DataDict(
-            z=dict(values=y, axes=['x']),
-            x=dict(values=x),
-        )
-        dd.validate()
-        return dd
-
-    def test_complex_data_detected(self):
-        """1D complex data should be detected as complex."""
-        dd = self._make_complex_1d()
-        assert np.iscomplexobj(dd.data_vals('z'))
-
-    def test_complex_splitting_real(self):
-        """ComplexRepresentation.real should produce real-only data."""
-        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType
-        dd = self._make_complex_1d()
-        x = dd.data_vals('x')
-        z = dd.data_vals('z')
-
-        item = PlotItem(
-            data=[x, z], id=0, subPlot=0,
-            plotDataType=PlotDataType.line1d,
-            labels=['x', 'z'], plotOptions=None,
-        )
-
-        from plottr.plot.base import AutoFigureMaker
-        fm = AutoFigureMaker()
-        fm.complexRepresentation = ComplexRepresentation.real
-        result = fm._splitComplexData(item)
-        assert len(result) == 1
-        assert not np.iscomplexobj(result[0].data[-1])
-
-    def test_complex_splitting_real_and_imag(self):
-        """ComplexRepresentation.realAndImag should produce 2 plot items."""
-        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
-
-        dd = self._make_complex_1d()
-        x = dd.data_vals('x')
-        z = dd.data_vals('z')
-
-        item = PlotItem(
-            data=[x, z], id=0, subPlot=0,
-            plotDataType=PlotDataType.line1d,
-            labels=['x', 'z'], plotOptions=None,
-        )
-
-        fm = AutoFigureMaker()
-        fm.complexRepresentation = ComplexRepresentation.realAndImag
-        result = fm._splitComplexData(item)
-        assert len(result) == 2
-        assert not np.iscomplexobj(result[0].data[-1])
-        assert not np.iscomplexobj(result[1].data[-1])
-        # One should be real, other imaginary
-        np.testing.assert_array_equal(result[0].data[-1], z.real)
-        np.testing.assert_array_equal(result[1].data[-1], z.imag)
-
-    def test_complex_splitting_mag_and_phase(self):
-        """ComplexRepresentation.magAndPhase should produce 2 plot items."""
-        from plottr.plot.base import ComplexRepresentation, PlotItem, PlotDataType, AutoFigureMaker
-
-        dd = self._make_complex_1d()
-        x = dd.data_vals('x')
-        z = dd.data_vals('z')
-
-        item = PlotItem(
-            data=[x, z], id=0, subPlot=0,
-            plotDataType=PlotDataType.line1d,
-            labels=['x', 'z'], plotOptions=None,
-        )
-
-        fm = AutoFigureMaker()
-        fm.complexRepresentation = ComplexRepresentation.magAndPhase
-        result = fm._splitComplexData(item)
-        assert len(result) == 2
-        np.testing.assert_array_almost_equal(result[0].data[-1], np.abs(z))
-        np.testing.assert_array_almost_equal(result[1].data[-1], np.angle(z))
-
-
-class TestSelectionButtons:
-    """Verify Select All / Deselect / 1D / 2D buttons in DataSelectionWidget."""
-
-    def _make_mixed_dataset(self):
-        """Dataset with 1D and 2D dependents."""
-        dd = DataDictBase(
-            trace1d=dict(values=np.arange(10.0), axes=['x']),
-            trace1d_b=dict(values=np.arange(10.0), axes=['x']),
-            x=dict(values=np.arange(10.0)),
-            map2d=dict(values=np.arange(20.0), axes=['x', 'y']),
-            map2d_b=dict(values=np.arange(20.0), axes=['x', 'y']),
-            y=dict(values=np.arange(20.0)),
-        )
-        return dd
-
-    def test_select_all(self, qtbot):
-        """Select All selects all dependents."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        w.selectAll()
-        selected = w.getSelectedData()
-        assert set(selected) == set(dd.dependents())
-
-    def test_deselect_all(self, qtbot):
-        """Deselect All clears selection."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        w.selectAll()
-        assert len(w.getSelectedData()) > 0
-        w.deselectAll()
-        assert w.getSelectedData() == []
-
-    def test_select_1d(self, qtbot):
-        """Select 1D selects only 1D dependents."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        w.selectByNdims(1)
-        selected = w.getSelectedData()
-        for name in selected:
-            assert len(dd.axes(name)) == 1, f"{name} is not 1D"
-        # Should have both 1D traces
-        assert 'trace1d' in selected
-        assert 'trace1d_b' in selected
-        # Should NOT have 2D maps
-        assert 'map2d' not in selected
-        assert 'map2d_b' not in selected
-
-    def test_select_2d(self, qtbot):
-        """Select 2D selects only 2D dependents."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        w.selectByNdims(2)
-        selected = w.getSelectedData()
-        for name in selected:
-            assert len(dd.axes(name)) == 2, f"{name} is not 2D"
-        assert 'map2d' in selected
-        assert 'map2d_b' in selected
-        assert 'trace1d' not in selected
-
-    def test_select_resets_previous(self, qtbot):
-        """Select 1D/2D resets any existing selection."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        w.selectAll()
-        assert len(w.getSelectedData()) == len(dd.dependents())
-        w.selectByNdims(1)
-        selected = w.getSelectedData()
-        # Should ONLY have 1D, not 2D from previous selectAll
-        for name in selected:
-            assert len(dd.axes(name)) == 1
-
-    def test_has_dependents_with_ndims(self, qtbot):
-        """has_dependents_with_ndims correctly reports dimensions."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        assert w.has_dependents_with_ndims(1)
-        assert w.has_dependents_with_ndims(2)
-        assert not w.has_dependents_with_ndims(3)
-
-    def test_only_1d_dataset(self, qtbot):
-        """Dataset with only 1D deps: 2D button should report no 2D."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = DataDictBase(
-            y=dict(values=np.arange(10.0), axes=['x']),
-            x=dict(values=np.arange(10.0)),
-        )
-        w.setData(dd, dd.shapes())
-
-        assert w.has_dependents_with_ndims(1)
-        assert not w.has_dependents_with_ndims(2)
-
-    def test_batch_selection_emits_single_signal(self, qtbot):
-        """Batch selection should emit only one signal, not per-item."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        dd = self._make_mixed_dataset()
-        w.setData(dd, dd.shapes())
-
-        signal_count = [0]
-        w.dataSelectionMade.connect(lambda _: signal_count.__setitem__(0, signal_count[0] + 1))
-
-        w.selectAll()
-        assert signal_count[0] == 1, f"Expected 1 signal, got {signal_count[0]}"
-
-    def test_select_all_on_empty_dataset(self, qtbot):
-        """Select All on empty dataset should not crash."""
-        from plottr.gui.data_display import DataSelectionWidget
-        w = DataSelectionWidget()
-        qtbot.addWidget(w)
-        w.setData(DataDictBase(), {})
-        w.selectAll()
-        assert w.getSelectedData() == []

From 0b9a3883827fe25b4ef9bd8c34073667a56ba225 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 16:46:10 +0200
Subject: [PATCH 45/64] fix: deselect-all clears plots, pyqtgraph min size for
 font warning

Both backends: setData(None) now clears existing plots instead of
silently returning. Deselect-all produces None from DataSelector,
which now correctly empties the plot area.

pyqtgraph PlotBase: Set minimum size 40x40 to prevent QFont point
size <= 0 warnings when pyqtgraph computes tick labels on
zero-sized widgets.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/mpl/autoplot.py       | 4 ++++
 plottr/plot/pyqtgraph/autoplot.py | 3 +++
 plottr/plot/pyqtgraph/plots.py    | 1 +
 3 files changed, 8 insertions(+)

diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index 9dbd5789..ab27ba82 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -408,6 +408,10 @@ def setData(self, data: Optional[DataDictBase]) -> None:
         :param data: input data
         """
         super().setData(data)
+        if data is None:
+            self.plot.fig.clear()
+            self.updatePlot()
+            return
         self.plotDataType = determinePlotDataType(data)
         # Flag to suppress redundant _plotData calls from toolbar signals
         # triggered by _processPlotTypeOptions / _processComplexTypeOptions.
diff --git a/plottr/plot/pyqtgraph/autoplot.py b/plottr/plot/pyqtgraph/autoplot.py
index a15f865f..8f740bc6 100644
--- a/plottr/plot/pyqtgraph/autoplot.py
+++ b/plottr/plot/pyqtgraph/autoplot.py
@@ -352,6 +352,9 @@ def setData(self, data: Optional[DataDictBase]) -> None:
         """
         super().setData(data)
         if self.data is None:
+            if self.fmWidget is not None:
+                self.fmWidget.deleteAllPlots()
+                self.fmWidget._arrangeGrid()
             return
 
         fmKwargs = {}  # {'widget': self.fmWidget}
diff --git a/plottr/plot/pyqtgraph/plots.py b/plottr/plot/pyqtgraph/plots.py
index 5aece289..803579ac 100644
--- a/plottr/plot/pyqtgraph/plots.py
+++ b/plottr/plot/pyqtgraph/plots.py
@@ -36,6 +36,7 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None) -> None:
 
         #: ``pyqtgraph`` plot item
         self.plot: pg.PlotItem = self.graphicsLayout.addPlot()
+        self.setMinimumSize(40, 40)
 
     def clearPlot(self) -> None:
         """Clear all plot contents (but do not delete plot elements, like axis

From 5969ec63f119a3485724a68f3a102c68ac866a9f Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 16:50:37 +0200
Subject: [PATCH 46/64] feat: mpl colormap selector, pyqtgraph complex mode
 tests

Matplotlib backend: Add colormap combo box to the toolbar. Lists
popular colormaps first (viridis, magma, inferno, etc.), then all
others. Changing the colormap updates matplotlib rcParams and
triggers an immediate replot.

Add 5 tests for pyqtgraph complex mode switching: imagData detection,
all options available, switch-to-real-and-back, separate Re/Im mode,
non-complex shows Real only.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/plot/mpl/autoplot.py  |  47 ++++++++++++++++
 test/pytest/test_plotting.py | 105 +++++++++++++++++++++++++++++++++++
 2 files changed, 152 insertions(+)

diff --git a/plottr/plot/mpl/autoplot.py b/plottr/plot/mpl/autoplot.py
index ab27ba82..c99cb7b4 100644
--- a/plottr/plot/mpl/autoplot.py
+++ b/plottr/plot/mpl/autoplot.py
@@ -153,6 +153,9 @@ class AutoPlotToolBar(QtWidgets.QToolBar):
     #: signal emitted when the complex data option has been changed
     complexRepresentationSelected = Signal(ComplexRepresentation)
 
+    #: signal emitted when the colormap has been changed
+    cmapChanged = Signal(str)
+
     def __init__(self, name: str, parent: Optional[QtWidgets.QWidget] = None):
         """Constructor for :class:`AutoPlotToolBar`"""
 
@@ -246,6 +249,20 @@ def __init__(self, name: str, parent: Optional[QtWidgets.QWidget] = None):
             lambda: self.minHeightSpin.setEnabled(self.scrollableAction.isChecked())
         )
 
+        # Colormap selector
+        self.addSeparator()
+        self._cmapLabel = QtWidgets.QLabel(" Colormap: ")
+        self.addWidget(self._cmapLabel)
+        self.cmapCombo = QtWidgets.QComboBox()
+        self.cmapCombo.setToolTip("Select colormap for 2D plots")
+        self.cmapCombo.setSizeAdjustPolicy(
+            QtWidgets.QComboBox.AdjustToContents)
+        self._populateColormaps()
+        self.addWidget(self.cmapCombo)
+
+        #: signal emitted when the colormap has been changed
+        self.cmapCombo.currentTextChanged.connect(self._onCmapChanged)
+
         self._currentPlotType = PlotType.empty
         self._currentlyAllowedPlotTypes: Tuple[PlotType, ...] = ()
 
@@ -253,6 +270,30 @@ def __init__(self, name: str, parent: Optional[QtWidgets.QWidget] = None):
         self.ComplexActions[self._currentComplex].setChecked(True)
         self._currentlyAllowedComplexTypes: Tuple[ComplexRepresentation, ...] = ()
 
+    def _populateColormaps(self) -> None:
+        """Fill the colormap combo box with matplotlib's available colormaps."""
+        import matplotlib as mpl
+        # Curated list of popular colormaps first, then all others
+        popular = ['viridis', 'magma', 'inferno', 'plasma', 'cividis',
+                   'coolwarm', 'RdBu_r', 'RdYlBu_r', 'Spectral_r',
+                   'hot', 'bone', 'gray']
+        all_cmaps = sorted(mpl.colormaps())
+        # Put popular ones first, then the rest (no duplicates)
+        ordered = [c for c in popular if c in all_cmaps]
+        ordered += [c for c in all_cmaps if c not in ordered and not c.endswith('_r')]
+        self.cmapCombo.addItems(ordered)
+        # Set current to the matplotlib default
+        default = mpl.rcParams.get('image.cmap', 'viridis')
+        idx = self.cmapCombo.findText(default)
+        if idx >= 0:
+            self.cmapCombo.setCurrentIndex(idx)
+
+    def _onCmapChanged(self, name: str) -> None:
+        """Update the matplotlib RC param and signal a replot."""
+        import matplotlib as mpl
+        mpl.rcParams['image.cmap'] = name
+        self.cmapChanged.emit(name)
+
     def selectPlotType(self, plotType: PlotType) -> None:
         """makes sure that the selected `plotType` is active (checked), all
         others are not active.
@@ -392,6 +433,7 @@ def __init__(self, parent: Optional[PlotWidgetContainer] = None):
         self.plotOptionsToolBar.minHeightSpin.editingFinished.connect(
             self._scrollableFromToolBar
         )
+        self.plotOptionsToolBar.cmapChanged.connect(self._cmapFromToolBar)
 
         scaling = dpiScalingFactor(self)
         iconSize = int(36 + 8*(scaling - 1))
@@ -471,6 +513,11 @@ def _complexPreferenceFromToolBar(self, complexRepresentation: ComplexRepresenta
             if not self._inSetData:
                 self._plotData()
 
+    @Slot(str)
+    def _cmapFromToolBar(self, _cmap: str) -> None:
+        if not self._inSetData:
+            self._plotData()
+
     @Slot()
     def _scrollableFromToolBar(self) -> None:
         scrollable = self.plotOptionsToolBar.scrollableAction.isChecked()
diff --git a/test/pytest/test_plotting.py b/test/pytest/test_plotting.py
index 48cb49e3..d5546d52 100644
--- a/test/pytest/test_plotting.py
+++ b/test/pytest/test_plotting.py
@@ -199,3 +199,108 @@ def test_repeated_setData(self, qtbot):
         t1 = w.plotType
         w.setData(data)
         assert w.plotType == t1
+
+
+# -- Pyqtgraph complex mode switching tests --
+
+class TestPyqtgraphComplexModes:
+    """Verify pyqtgraph backend handles complex mode switching for 1D data."""
+
+    @staticmethod
+    def _make_complex_1d():
+        x = np.linspace(0, 10, 50)
+        z = np.sin(x) + 1j * np.cos(x)
+        return MeshgridDataDict(
+            z=dict(values=z, axes=['x']), x=dict(values=x),
+        )
+
+    def test_complex_detected_as_imagData(self, qtbot):
+        """1D complex data should set imagData=True."""
+        from plottr.plot.pyqtgraph.autoplot import AutoPlot
+        w = AutoPlot(parent=None)
+        qtbot.addWidget(w)
+        w.setData(self._make_complex_1d())
+        assert w.figOptions.imagData is True
+
+    def test_all_complex_options_available(self, qtbot):
+        """All complex representations should be in the toolbar menu."""
+        from plottr.plot.pyqtgraph.autoplot import AutoPlot
+        from plottr.plot.base import ComplexRepresentation
+        w = AutoPlot(parent=None)
+        qtbot.addWidget(w)
+        w.setData(self._make_complex_1d())
+
+        # Find the Complex button's menu
+        menu_labels = self._get_complex_menu_labels(w)
+        assert ComplexRepresentation.real.label in menu_labels
+        assert ComplexRepresentation.realAndImag.label in menu_labels
+        assert ComplexRepresentation.realAndImagSeparate.label in menu_labels
+        assert ComplexRepresentation.magAndPhase.label in menu_labels
+
+    def test_switch_to_real_and_back(self, qtbot):
+        """After switching to Real, should be able to switch back to Real/Imag."""
+        from plottr.plot.pyqtgraph.autoplot import AutoPlot
+        from plottr.plot.base import ComplexRepresentation
+        w = AutoPlot(parent=None)
+        qtbot.addWidget(w)
+        w.setData(self._make_complex_1d())
+
+        # Switch to Real
+        w.figOptions.complexRepresentation = ComplexRepresentation.real
+        w._refreshPlot()
+
+        # imagData should still be True (data is still complex)
+        assert w.figOptions.imagData is True
+
+        # All options should still be available
+        menu_labels = self._get_complex_menu_labels(w)
+        assert ComplexRepresentation.realAndImag.label in menu_labels
+
+        # Switch back
+        w.figOptions.complexRepresentation = ComplexRepresentation.realAndImag
+        w._refreshPlot()
+        assert w.figOptions.complexRepresentation == ComplexRepresentation.realAndImag
+
+    def test_separate_re_im_mode(self, qtbot):
+        """realAndImagSeparate should create 2 subplots for 1D data."""
+        from plottr.plot.pyqtgraph.autoplot import AutoPlot
+        from plottr.plot.base import ComplexRepresentation
+        w = AutoPlot(parent=None)
+        qtbot.addWidget(w)
+        w.setData(self._make_complex_1d())
+
+        w.figOptions.complexRepresentation = ComplexRepresentation.realAndImagSeparate
+        w._refreshPlot()
+
+        # Should have 2 subplots (one for Real, one for Imag)
+        assert w.fmWidget is not None
+        assert len(w.fmWidget.subPlots) == 2
+
+    def test_non_complex_only_shows_real(self, qtbot):
+        """Non-complex 1D data should only offer Real in the menu."""
+        from plottr.plot.pyqtgraph.autoplot import AutoPlot
+        from plottr.plot.base import ComplexRepresentation
+        w = AutoPlot(parent=None)
+        qtbot.addWidget(w)
+        x = np.linspace(0, 10, 50)
+        data = MeshgridDataDict(
+            y=dict(values=np.sin(x), axes=['x']), x=dict(values=x),
+        )
+        w.setData(data)
+        assert w.figOptions.imagData is False
+        menu_labels = self._get_complex_menu_labels(w)
+        assert menu_labels == [ComplexRepresentation.real.label]
+
+    @staticmethod
+    def _get_complex_menu_labels(w):
+        """Extract labels from the Complex button's popup menu."""
+        # The Complex button is at action index 1 in the toolbar
+        toolbar = w.figConfig
+        actions = toolbar.actions()
+        for a in actions:
+            widget = toolbar.widgetForAction(a)
+            if isinstance(widget, __import__('PyQt6').QtWidgets.QToolButton):
+                menu = widget.menu()
+                if menu is not None:
+                    return [ma.text() for ma in menu.actions()]
+        return []

From d68b29622294c458a06caadd1d5c75eb66945837 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 17:04:51 +0200
Subject: [PATCH 47/64] fix: pyqtgraph image axis orientation, deselect-all UX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Axis inversion: Transpose z data in pyqtgraph setImage() to match
matplotlib convention. The meshgrid first axis maps to the bottom
(x) label but pyqtgraph ImageItem needs it transposed for correct
display orientation.

Deselect all: Remove the button since pyqtgraph's flowchart does
not propagate empty selection downstream (by design — None return
from process() means 'no change'). The Select All / 1D / 2D buttons
already provide sufficient selection control. deselectAll() now
selects the first dependent to ensure the plot always has data.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/gui/data_display.py        | 7 +++++--
 plottr/node/data_selector.py      | 8 --------
 plottr/plot/pyqtgraph/plots.py    | 5 ++++-
 test/pytest/test_data_selector.py | 7 +++++--
 test/pytest/test_plotting.py      | 5 +++--
 5 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/plottr/gui/data_display.py b/plottr/gui/data_display.py
index aaae752c..6c72bb68 100644
--- a/plottr/gui/data_display.py
+++ b/plottr/gui/data_display.py
@@ -139,8 +139,11 @@ def selectAll(self) -> None:
         self.setBatchSelectedData(enabled)
 
     def deselectAll(self) -> None:
-        """Deselect all fields. Single signal emission."""
-        self.setBatchSelectedData([])
+        """Clear selection. Selects the first dependent to ensure the plot
+        always has something to display (pyqtgraph flowchart does not
+        propagate empty selection downstream)."""
+        deps = list(self.dataItems.keys())
+        self.setBatchSelectedData(deps[:1] if deps else [])
 
     def selectByNdims(self, ndims: int) -> None:
         """Select all dependents with exactly *ndims* independent axes.
diff --git a/plottr/node/data_selector.py b/plottr/node/data_selector.py
index f806c489..25daba1b 100644
--- a/plottr/node/data_selector.py
+++ b/plottr/node/data_selector.py
@@ -46,10 +46,6 @@ def __init__(self, node: Optional[Node] = None):
         self._selectAllBtn.clicked.connect(self._onSelectAll)
         btnLayout.addWidget(self._selectAllBtn)
 
-        self._deselectAllBtn = QtWidgets.QPushButton("Deselect all")
-        self._deselectAllBtn.clicked.connect(self._onDeselectAll)
-        btnLayout.addWidget(self._deselectAllBtn)
-
         self._select1dBtn = QtWidgets.QPushButton("Select all 1D")
         self._select1dBtn.clicked.connect(self._onSelect1D)
         btnLayout.addWidget(self._select1dBtn)
@@ -68,10 +64,6 @@ def _onSelectAll(self) -> None:
         assert self.widget is not None
         self.widget.selectAll()
 
-    def _onDeselectAll(self) -> None:
-        assert self.widget is not None
-        self.widget.deselectAll()
-
     def _onSelect1D(self) -> None:
         assert self.widget is not None
         self.widget.selectByNdims(1)
diff --git a/plottr/plot/pyqtgraph/plots.py b/plottr/plot/pyqtgraph/plots.py
index 803579ac..1e9e44ac 100644
--- a/plottr/plot/pyqtgraph/plots.py
+++ b/plottr/plot/pyqtgraph/plots.py
@@ -114,7 +114,10 @@ def setImage(self, x: np.ndarray, y: np.ndarray, z: np.ndarray) -> None:
 
         self.img = pg.ImageItem()
         self.plot.addItem(self.img)
-        self.img.setImage(z)
+        # Transpose z to match matplotlib convention: the first axis of the
+        # meshgrid (labeled on bottom/x) maps to the horizontal display axis.
+        # pyqtgraph ImageItem displays array[col, row], so z.T is needed.
+        self.img.setImage(z.T)
         self.img.setRect(QtCore.QRectF(x.min(), y.min(), x.max() - x.min(), y.max() - y.min()))
 
         self.colorbar.setImageItem(self.img)
diff --git a/test/pytest/test_data_selector.py b/test/pytest/test_data_selector.py
index 71488969..256bd1e3 100644
--- a/test/pytest/test_data_selector.py
+++ b/test/pytest/test_data_selector.py
@@ -109,13 +109,16 @@ def test_select_all(self, qtbot):
         w.selectAll()
         assert set(w.getSelectedData()) == set(dd.dependents())
 
-    def test_deselect_all(self, qtbot):
+    def test_deselect_all_selects_first(self, qtbot):
+        """deselectAll should select only the first dependent (always keep one)."""
         from plottr.gui.data_display import DataSelectionWidget
         w = DataSelectionWidget(); qtbot.addWidget(w)
         dd = self._mixed(); w.setData(dd, dd.shapes())
         w.selectAll()
         w.deselectAll()
-        assert w.getSelectedData() == []
+        selected = w.getSelectedData()
+        assert len(selected) == 1
+        assert selected[0] == dd.dependents()[0]
 
     def test_select_1d(self, qtbot):
         from plottr.gui.data_display import DataSelectionWidget
diff --git a/test/pytest/test_plotting.py b/test/pytest/test_plotting.py
index d5546d52..36f488d4 100644
--- a/test/pytest/test_plotting.py
+++ b/test/pytest/test_plotting.py
@@ -61,7 +61,8 @@ def test_pyqtgraph_image_data_shape(self, qtbot):
         plot = PlotWithColorbar()
         qtbot.addWidget(plot)
         plot.setImage(xx, yy, zz)
-        assert plot.img.image.shape == (5, 3)
+        # z is transposed for display: input (5, 3) → ImageItem (3, 5)
+        assert plot.img.image.shape == (3, 5)
 
     def test_pyqtgraph_image_rect(self, qtbot):
         from plottr.plot.pyqtgraph.plots import PlotWithColorbar
@@ -85,7 +86,7 @@ def test_pyqtgraph_reversed_x(self, qtbot):
         plot = PlotWithColorbar()
         qtbot.addWidget(plot)
         plot.setImage(xx, yy, zz)
-        assert plot.img.image.shape == (5, 3)
+        assert plot.img.image.shape == (3, 5)  # transposed
 
     def test_mpl_and_pyqtgraph_consistency(self, qtbot):
         _, xx, yy, zz = _make_asymmetric_meshgrid()

From 68681cd8594791cbfb44dc6e7df7571b3e886ab5 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 17:09:19 +0200
Subject: [PATCH 48/64] fix: rename deselect-all to 'Select first only' button
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renamed deselectAll to selectFirst — selects only the first
dependent, matching the default behaviour when opening a plot window.
Added as a visible button in the data selector toolbar.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/gui/data_display.py        | 6 ++----
 plottr/node/data_selector.py      | 8 ++++++++
 test/pytest/test_data_selector.py | 6 +++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/plottr/gui/data_display.py b/plottr/gui/data_display.py
index 6c72bb68..c688b6e2 100644
--- a/plottr/gui/data_display.py
+++ b/plottr/gui/data_display.py
@@ -138,10 +138,8 @@ def selectAll(self) -> None:
         enabled = [n for n, w in self.dataItems.items() if not w.isDisabled()]
         self.setBatchSelectedData(enabled)
 
-    def deselectAll(self) -> None:
-        """Clear selection. Selects the first dependent to ensure the plot
-        always has something to display (pyqtgraph flowchart does not
-        propagate empty selection downstream)."""
+    def selectFirst(self) -> None:
+        """Select only the first dependent (default view)."""
         deps = list(self.dataItems.keys())
         self.setBatchSelectedData(deps[:1] if deps else [])
 
diff --git a/plottr/node/data_selector.py b/plottr/node/data_selector.py
index 25daba1b..06b21d6d 100644
--- a/plottr/node/data_selector.py
+++ b/plottr/node/data_selector.py
@@ -46,6 +46,10 @@ def __init__(self, node: Optional[Node] = None):
         self._selectAllBtn.clicked.connect(self._onSelectAll)
         btnLayout.addWidget(self._selectAllBtn)
 
+        self._selectFirstBtn = QtWidgets.QPushButton("Select first only")
+        self._selectFirstBtn.clicked.connect(self._onSelectFirst)
+        btnLayout.addWidget(self._selectFirstBtn)
+
         self._select1dBtn = QtWidgets.QPushButton("Select all 1D")
         self._select1dBtn.clicked.connect(self._onSelect1D)
         btnLayout.addWidget(self._select1dBtn)
@@ -64,6 +68,10 @@ def _onSelectAll(self) -> None:
         assert self.widget is not None
         self.widget.selectAll()
 
+    def _onSelectFirst(self) -> None:
+        assert self.widget is not None
+        self.widget.selectFirst()
+
     def _onSelect1D(self) -> None:
         assert self.widget is not None
         self.widget.selectByNdims(1)
diff --git a/test/pytest/test_data_selector.py b/test/pytest/test_data_selector.py
index 256bd1e3..389cc4c7 100644
--- a/test/pytest/test_data_selector.py
+++ b/test/pytest/test_data_selector.py
@@ -109,13 +109,13 @@ def test_select_all(self, qtbot):
         w.selectAll()
         assert set(w.getSelectedData()) == set(dd.dependents())
 
-    def test_deselect_all_selects_first(self, qtbot):
-        """deselectAll should select only the first dependent (always keep one)."""
+    def test_select_first(self, qtbot):
+        """selectFirst should select only the first dependent."""
         from plottr.gui.data_display import DataSelectionWidget
         w = DataSelectionWidget(); qtbot.addWidget(w)
         dd = self._mixed(); w.setData(dd, dd.shapes())
         w.selectAll()
-        w.deselectAll()
+        w.selectFirst()
         selected = w.getSelectedData()
         assert len(selected) == 1
         assert selected[0] == dd.dependents()[0]

From 2efa69efa41c86fb7b208b4bbfc855bb5352dde0 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 17:14:59 +0200
Subject: [PATCH 49/64] ci: trigger CI after history rewrite

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

From 8ec0349f68a05d5df062451c65fa7620e0c70402 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 17:31:31 +0200
Subject: [PATCH 50/64] chore: remove inspectr from mypy warn_unused_ignores
 override

Master cleaned up inspectr type:ignore comments, so the per-module
override is no longer needed for that module.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 pyproject.toml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 385ead33..c3bfc4b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,11 +108,10 @@ module = [
 ]
 ignore_missing_imports = true
 
-# These modules contain type: ignore comments that are needed by
-# PyQt5-stubs (CI) but unused with PyQt6 (or vice versa).
+# These modules contain type: ignore comments that may be unused
+# depending on the Qt stubs version installed.
 [[tool.mypy.overrides]]
 module = [
-    "plottr.apps.inspectr",
     "plottr.node.autonode",
     "plottr.node.scaleunits",
 ]

From 15f7b3ab69d217f9f87ed3553349a047d01e20c3 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 17:33:46 +0200
Subject: [PATCH 51/64] fix: use plottr Qt imports instead of PyQt6 in tests

CI installs PyQt5, not PyQt6. Use plottr's Qt abstraction layer
(plottr.QtCore, plottr.QtWidgets) for cross-binding compatibility.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 test/pytest/test_plotting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/pytest/test_plotting.py b/test/pytest/test_plotting.py
index 36f488d4..db482919 100644
--- a/test/pytest/test_plotting.py
+++ b/test/pytest/test_plotting.py
@@ -66,7 +66,7 @@ def test_pyqtgraph_image_data_shape(self, qtbot):
 
     def test_pyqtgraph_image_rect(self, qtbot):
         from plottr.plot.pyqtgraph.plots import PlotWithColorbar
-        from PyQt6 import QtCore
+        from plottr import QtCore
         _, xx, yy, zz = _make_asymmetric_meshgrid()
         plot = PlotWithColorbar()
         qtbot.addWidget(plot)
@@ -300,7 +300,7 @@ def _get_complex_menu_labels(w):
         actions = toolbar.actions()
         for a in actions:
             widget = toolbar.widgetForAction(a)
-            if isinstance(widget, __import__('PyQt6').QtWidgets.QToolButton):
+            if isinstance(widget, __import__('plottr').QtWidgets.QToolButton):
                 menu = widget.menu()
                 if menu is not None:
                     return [ma.text() for ma in menu.actions()]

From 99122f9337407735120743d31e8453b9cddc1e9b Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Fri, 1 May 2026 17:41:20 +0200
Subject: [PATCH 52/64] fix: ParamSpecBase import compat for older qcodes in CI

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 test/pytest/test_qcodes_data.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/test/pytest/test_qcodes_data.py b/test/pytest/test_qcodes_data.py
index 719be638..ba9ce9c2 100644
--- a/test/pytest/test_qcodes_data.py
+++ b/test/pytest/test_qcodes_data.py
@@ -318,7 +318,10 @@ def check():
 
 def _make_qcodes_db_with_runs(db_path: str, n_runs: int = 1) -> str:
     """Helper: create a QCodes DB with n_runs simple numeric datasets."""
-    from qcodes.parameters import ParamSpecBase
+    try:
+        from qcodes.parameters import ParamSpecBase
+    except ImportError:
+        from qcodes.dataset.descriptions.param_spec import ParamSpecBase
     from qcodes.dataset.descriptions.dependencies import InterDependencies_
 
     initialise_or_create_database_at(db_path)
@@ -386,7 +389,10 @@ class TestDatasetRefresh:
     def test_incremental_overview(self, tmp_path):
         """get_db_overview with start_run_id should find newly added runs."""
         from plottr.data.qcodes_db_overview import get_db_overview
-        from qcodes.parameters import ParamSpecBase
+        try:
+            from qcodes.parameters import ParamSpecBase
+        except ImportError:
+            from qcodes.dataset.descriptions.param_spec import ParamSpecBase
         from qcodes.dataset.descriptions.dependencies import InterDependencies_
 
         db_path = str(tmp_path / "test.db")
@@ -414,7 +420,10 @@ def test_inspectr_refresh(self, qtbot, tmp_path):
         import os
         os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
         from plottr.apps.inspectr import QCodesDBInspector
-        from qcodes.parameters import ParamSpecBase
+        try:
+            from qcodes.parameters import ParamSpecBase
+        except ImportError:
+            from qcodes.dataset.descriptions.param_spec import ParamSpecBase
         from qcodes.dataset.descriptions.dependencies import InterDependencies_
 
         db_path = str(tmp_path / "test.db")

From 62a42dd85aae058affdad48b7f13fa4e22070214 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Wed, 20 May 2026 10:00:32 +0200
Subject: [PATCH 53/64] fix: refresh updates incomplete dataset records +
 persist plot backend

refresh: refreshDB() now always does a full DB re-read instead of
incremental loading. The old incremental path (start_run_id > latest)
would never update existing rows in the dataframe, so the records
counter for incomplete datasets being filled with new data would
stay stale until the user closed and reopened inspectr. Full re-read
is fast (~10ms via SQL JOIN) so the optimization wasn't worth the
correctness cost. Existing merge logic via dbdf.update() correctly
applies the fresh values to existing rows.

backend persistence: User's plot backend choice (matplotlib /
pyqtgraph) is now saved via QSettings and restored on next launch.
QSettings was chosen for the cleanest cross-platform persistence
(registry on Windows, plist on macOS, ini on Linux) and zero
external dependencies.

Add 4 new tests: refresh updates incomplete records, save/load
backend choice, invalid value handling, launch uses saved backend,
combo change persists choice.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py         |  75 ++++++++++++++---
 test/pytest/test_qcodes_data.py | 144 ++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 13 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index fb6fa94b..e8471141 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -48,6 +48,13 @@
 #: Populated lazily on first access.
 _PLOT_BACKENDS: Dict[str, type] = {}
 
+#: Organization and application names used by QSettings to persist per-user
+#: preferences (selected plot backend, etc.). QSettings picks an OS-native
+#: storage location (registry on Windows, plist on macOS, ini on Linux).
+_QSETTINGS_ORG = 'plottr'
+_QSETTINGS_APP = 'inspectr'
+_BACKEND_SETTING_KEY = 'plotBackend'
+
 
 def _get_plot_backends() -> Dict[str, type]:
     """Lazily populate and return the backend mapping."""
@@ -67,6 +74,25 @@ def _backend_name_for_class(cls: Optional[type]) -> Optional[str]:
     return None
 
 
+def _load_saved_backend() -> Optional[str]:
+    """Read the previously-saved backend choice from QSettings.
+
+    :return: backend name (e.g. 'matplotlib', 'pyqtgraph'), or None if no
+        setting saved or the saved value is not a recognised backend.
+    """
+    settings = QtCore.QSettings(_QSETTINGS_ORG, _QSETTINGS_APP)
+    saved = settings.value(_BACKEND_SETTING_KEY)
+    if isinstance(saved, str) and saved in _get_plot_backends():
+        return saved
+    return None
+
+
+def _save_backend_choice(backend: str) -> None:
+    """Persist the user's backend choice for future inspectr launches."""
+    settings = QtCore.QSettings(_QSETTINGS_ORG, _QSETTINGS_APP)
+    settings.setValue(_BACKEND_SETTING_KEY, backend)
+
+
 ### Database inspector tool
 
 class DateList(QtWidgets.QListWidget):
@@ -475,6 +501,15 @@ def __init__(self, parent: Optional[QtWidgets.QWidget] = None,
         super().__init__(parent)
 
         self._plotWindows: Dict[int, WindowDict] = {}
+
+        # If no explicit backend class is requested, fall back to whatever
+        # the user picked the last time they used inspectr (persisted via
+        # QSettings). Falling back to None lets downstream code use the
+        # plottr-wide default.
+        if plotWidgetClass is None:
+            saved_name = _load_saved_backend()
+            if saved_name is not None:
+                plotWidgetClass = _get_plot_backends().get(saved_name)
         self._plotWidgetClass = plotWidgetClass
 
         self.filepath = dbPath
@@ -742,19 +777,28 @@ def updateDates(self) -> None:
     ### reloading the db
     @Slot()
     def refreshDB(self) -> None:
-        if self.filepath is not None:
-            if self.loadDBThread.isRunning():
-                return
-            if self.dbdf is not None and self.dbdf.size > 0:
-                self.latestRunId = int(self.dbdf.index.values.max())
-            else:
-                self.latestRunId = -1
+        """Re-read the database to pick up new runs AND updates to existing
+        runs (e.g., records count growing as an incomplete dataset is filled).
+
+        Always does a full re-read because incremental loading (start_run_id >
+        latestRunId) would miss updates to incomplete datasets whose row
+        already exists in the dbdf. The full re-read is fast (single SQL JOIN
+        ~10ms even for 1500 runs), so the incremental optimization isn't
+        worth the correctness cost.
+        """
+        if self.filepath is None or self.loadDBThread.isRunning():
+            return
+
+        # Remember the latest run_id so DBLoaded knows to merge rather than
+        # replace (preserves existing UI state like selection/expand state).
+        if self.dbdf is not None and self.dbdf.size > 0:
+            self.latestRunId = int(self.dbdf.index.values.max())
+        else:
+            self.latestRunId = -1
 
-            # Incremental refresh: only load runs newer than what we have.
-            start_run_id = self.latestRunId + 1 if self.latestRunId is not None and self.latestRunId > 0 else 1
-            if self.filepath is not None:
-                if not self.loadDBThread.isRunning():
-                    self.loadDBProcess.setPath(self.filepath, start_run_id=start_run_id)
+        # Full re-read so updates to existing runs (records counter, completed
+        # timestamp) are picked up.
+        self.loadDBProcess.setPath(self.filepath, start_run_id=1)
 
     @Slot(float)
     def setMonitorInterval(self, val: float) -> None:
@@ -840,7 +884,12 @@ def plotRun(self, runId: int) -> None:
     @Slot(str)
     def _onBackendChanged(self, backend: str) -> None:
         backends = _get_plot_backends()
-        self._plotWidgetClass = backends.get(backend, self._plotWidgetClass)
+        if backend in backends:
+            self._plotWidgetClass = backends[backend]
+            # Persist the user's choice for next launch
+            _save_backend_choice(backend)
+        else:
+            self._plotWidgetClass = backends.get(backend, self._plotWidgetClass)
 
     def setTag(self, item: QtWidgets.QTreeWidgetItem, tag: str) -> None:
         # set tag in the database
diff --git a/test/pytest/test_qcodes_data.py b/test/pytest/test_qcodes_data.py
index ba9ce9c2..224b5f34 100644
--- a/test/pytest/test_qcodes_data.py
+++ b/test/pytest/test_qcodes_data.py
@@ -453,3 +453,147 @@ def refresh_done():
             return (inspector.dbdf is not None and 2 in inspector.dbdf.index)
         qtbot.waitUntil(refresh_done, timeout=5000)
         assert 2 in inspector.dbdf.index
+
+    def test_inspectr_refresh_updates_records_for_incomplete(
+        self, qtbot, tmp_path
+    ):
+        """refreshDB should update records counter for incomplete datasets.
+
+        This was a regression: incremental refresh only loaded NEW runs,
+        so existing rows' records counter stayed stale.
+        """
+        import os
+        os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+        from plottr.apps.inspectr import QCodesDBInspector
+        try:
+            from qcodes.parameters import ParamSpecBase
+        except ImportError:
+            from qcodes.dataset.descriptions.param_spec import ParamSpecBase
+        from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+        db_path = str(tmp_path / "test.db")
+        initialise_or_create_database_at(db_path)
+        load_or_create_experiment("exp", sample_name="s")
+        p_x = ParamSpecBase("x", "numeric")
+        p_y = ParamSpecBase("y", "numeric")
+        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+
+        # Start an INCOMPLETE dataset with 5 results
+        ds = qc.new_data_set("incomplete")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        for i in range(5):
+            ds.add_results([{p_x.name: float(i), p_y.name: float(i ** 2)}])
+
+        inspector = QCodesDBInspector(dbPath=db_path)
+        qtbot.addWidget(inspector)
+        qtbot.waitUntil(
+            lambda: inspector.dbdf is not None and 1 in inspector.dbdf.index,
+            timeout=5000
+        )
+        initial = int(inspector.dbdf.loc[1, 'records'])
+        assert initial == 5
+
+        # Add more results to the same dataset (still incomplete)
+        for i in range(5, 25):
+            ds.add_results([{p_x.name: float(i), p_y.name: float(i ** 2)}])
+
+        # Refresh should pick up the new records count
+        inspector.refreshDB()
+        qtbot.waitUntil(
+            lambda: int(inspector.dbdf.loc[1, 'records']) == 25,
+            timeout=5000
+        )
+        assert int(inspector.dbdf.loc[1, 'records']) == 25
+        ds.mark_completed()
+
+
+class TestBackendPersistence:
+    """Verify that the chosen plot backend is remembered across launches."""
+
+    @pytest.fixture(autouse=True)
+    def _clear_qsettings(self):
+        """Use an isolated QSettings store for each test so we don't pollute
+        the user's real preferences."""
+        from plottr import QtCore
+        from plottr.apps.inspectr import (
+            _QSETTINGS_ORG, _QSETTINGS_APP, _BACKEND_SETTING_KEY,
+        )
+        # Force INI format in a temp scope so the test is isolated
+        QtCore.QCoreApplication.setOrganizationName("plottr-test")
+        QtCore.QCoreApplication.setApplicationName("inspectr-test")
+        settings = QtCore.QSettings(_QSETTINGS_ORG, _QSETTINGS_APP)
+        settings.remove(_BACKEND_SETTING_KEY)
+        yield
+        settings.remove(_BACKEND_SETTING_KEY)
+
+    def test_save_and_load_backend_choice(self):
+        """Saved backend should be returned by _load_saved_backend."""
+        from plottr.apps.inspectr import (
+            _load_saved_backend, _save_backend_choice,
+        )
+        assert _load_saved_backend() is None
+        _save_backend_choice("pyqtgraph")
+        assert _load_saved_backend() == "pyqtgraph"
+        _save_backend_choice("matplotlib")
+        assert _load_saved_backend() == "matplotlib"
+
+    def test_unknown_backend_returns_none(self):
+        """Invalid saved value should not be returned."""
+        from plottr import QtCore
+        from plottr.apps.inspectr import (
+            _load_saved_backend, _QSETTINGS_ORG, _QSETTINGS_APP,
+            _BACKEND_SETTING_KEY,
+        )
+        settings = QtCore.QSettings(_QSETTINGS_ORG, _QSETTINGS_APP)
+        settings.setValue(_BACKEND_SETTING_KEY, "not-a-backend")
+        assert _load_saved_backend() is None
+
+    def test_inspectr_uses_saved_backend_on_launch(self, qtbot, tmp_path):
+        """If no plotWidgetClass is passed, inspectr should pick up the
+        previously saved backend."""
+        import os
+        os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+        from plottr.apps.inspectr import (
+            QCodesDBInspector, _save_backend_choice, _get_plot_backends,
+        )
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=1)
+
+        # Simulate a previous run having selected pyqtgraph
+        _save_backend_choice("pyqtgraph")
+
+        inspector = QCodesDBInspector(dbPath=db_path)
+        qtbot.addWidget(inspector)
+        expected_cls = _get_plot_backends()["pyqtgraph"]
+        assert inspector._plotWidgetClass is expected_cls
+        # Wait for background load to complete cleanly before teardown
+        qtbot.waitUntil(
+            lambda: not inspector.loadDBThread.isRunning(), timeout=5000
+        )
+
+    def test_changing_backend_persists_choice(self, qtbot, tmp_path):
+        """Changing the toolbar combo box should save the new choice."""
+        import os
+        os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+        from plottr.apps.inspectr import (
+            QCodesDBInspector, _load_saved_backend,
+        )
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=1)
+
+        inspector = QCodesDBInspector(dbPath=db_path)
+        qtbot.addWidget(inspector)
+        # Wait for initial load to settle
+        qtbot.waitUntil(
+            lambda: not inspector.loadDBThread.isRunning(), timeout=5000
+        )
+
+        inspector.plotBackendSelector.setCurrentText("pyqtgraph")
+        assert _load_saved_backend() == "pyqtgraph"
+
+        inspector.plotBackendSelector.setCurrentText("matplotlib")
+        assert _load_saved_backend() == "matplotlib"
+

From a64d8b359bd16ecb4b4c4fba5296a741f7a2a7d7 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Wed, 20 May 2026 17:23:07 +0200
Subject: [PATCH 54/64] fix: handle metadata-only datasets gracefully (no
 KeyError)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ds_to_datadicts: Skip dependents whose parameter tree is missing from
the cache instead of raising KeyError. This commonly happens when the
dataset's .nc data file is missing (metadata-only DB downloaded
without the companion data files).

QCAutoPlotMainWindow: Show a clear status bar message ('No data
available for run N ...') when the dataset has no results, instead
of leaving the user with an unexplained empty window.

This is not a regression from our changes — it's a pre-existing issue
that becomes more visible now that users can browse large metadata-only
DBs quickly via the fast SQL overview.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py         | 13 +++++
 plottr/data/qcodes_dataset.py   | 41 ++++++++++-----
 test/pytest/test_qcodes_data.py | 93 +++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+), 13 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 7779bcba..8852c3d4 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -289,6 +289,19 @@ def __init__(self, fc: Flowchart,
         if self.loaderNode is not None and self.loaderNode.nLoadedRecords > 0:
             self.setDefaults(self.loaderNode.outputValues()['dataOut'])
             self._initialized = True
+        elif self.loaderNode is not None and pathAndId is not None:
+            # Loader ran but produced no data — most commonly because the
+            # dataset's .nc data file is missing (metadata-only DB) or the
+            # dataset really has no results yet.  Show a clear status bar
+            # message instead of leaving the user with an empty window.
+            ds = getattr(self.loaderNode, '_dataset', None)
+            if ds is not None and ds.number_of_results == 0:
+                msg = (
+                    f"No data available for run {pathAndId[1]} "
+                    f"(GUID: {ds.guid}). The dataset's data file may be "
+                    f"missing or the dataset is empty."
+                )
+                self.statusBar().showMessage(msg)
 
     def setDefaults(self, data: DataDictBase) -> None:
         super().setDefaults(data)
diff --git a/plottr/data/qcodes_dataset.py b/plottr/data/qcodes_dataset.py
index 8b6911fd..09c37356 100644
--- a/plottr/data/qcodes_dataset.py
+++ b/plottr/data/qcodes_dataset.py
@@ -298,13 +298,18 @@ def ds_to_datadicts(ds: 'DataSetProtocol') -> Dict[str, DataDict]:
     """
     Make DataDicts from a qcodes DataSet.
 
+    Parameters whose values are not present in the dataset's cache (e.g.,
+    when the underlying ``.nc`` file is missing or the dataset is metadata
+    only) are skipped rather than raising ``KeyError``.  This lets callers
+    handle "no data" gracefully (e.g., showing an empty plot or a status
+    message) instead of crashing.
+
     :param ds: qcodes dataset
-    :returns: dictionary with one item per dependent.
+    :returns: dictionary with one item per dependent that has data.
               key: name of the dependent
-              value: DataDict containing that dependent and its
-                     axes.
+              value: DataDict containing that dependent and its axes.
     """
-    ret = {}
+    ret: Dict[str, DataDict] = {}
     has_cache = hasattr(ds, 'cache')
     if has_cache:
         pdata = ds.cache.data()
@@ -312,15 +317,25 @@ def ds_to_datadicts(ds: 'DataSetProtocol') -> Dict[str, DataDict]:
         # qcodes < 0.17
         pdata = ds.get_parameter_data()
     for p, spec in ds.paramspecs.items():
-        if spec.depends_on != '':
-            axes = spec.depends_on_
-            data = dict()
-            data[p] = dict(unit=spec.unit, label=spec.label, axes=axes, values=pdata[p][p])
-            for ax in axes:
-                axspec = ds.paramspecs[ax]
-                data[ax] = dict(unit=axspec.unit, label=axspec.label, values=pdata[p][ax])
-            ret[p] = DataDict(**data)
-            ret[p].validate()
+        if spec.depends_on == '':
+            continue
+        # Skip dependents whose parameter tree isn't present in the cache
+        # (typically: metadata-only datasets whose .nc data file is missing).
+        if p not in pdata or p not in pdata[p]:
+            continue
+        axes = spec.depends_on_
+        # Skip if any required axis values are also missing
+        if any(ax not in pdata[p] for ax in axes):
+            continue
+        data: Dict[str, Any] = {}
+        data[p] = dict(unit=spec.unit, label=spec.label, axes=axes,
+                       values=pdata[p][p])
+        for ax in axes:
+            axspec = ds.paramspecs[ax]
+            data[ax] = dict(unit=axspec.unit, label=axspec.label,
+                            values=pdata[p][ax])
+        ret[p] = DataDict(**data)
+        ret[p].validate()
 
     return ret
 
diff --git a/test/pytest/test_qcodes_data.py b/test/pytest/test_qcodes_data.py
index 224b5f34..4cefdcd0 100644
--- a/test/pytest/test_qcodes_data.py
+++ b/test/pytest/test_qcodes_data.py
@@ -597,3 +597,96 @@ def test_changing_backend_persists_choice(self, qtbot, tmp_path):
         inspector.plotBackendSelector.setCurrentText("matplotlib")
         assert _load_saved_backend() == "matplotlib"
 
+
+class TestNoDataAvailable:
+    """Verify that opening a dataset with no actual data
+    (e.g., metadata-only DB where the .nc file is missing)
+    shows a clear status message instead of an empty window."""
+
+    def _make_dataset_without_data(self, db_path: str) -> int:
+        """Create a qcodes dataset whose data file is then deleted,
+        leaving a metadata-only entry in the SQLite DB."""
+        import os
+        try:
+            from qcodes.parameters import ParamSpecBase
+        except ImportError:
+            from qcodes.dataset.descriptions.param_spec import ParamSpecBase
+        from qcodes.dataset.descriptions.dependencies import InterDependencies_
+
+        initialise_or_create_database_at(db_path)
+        load_or_create_experiment("metadata_only_exp", sample_name="s")
+        p_x = ParamSpecBase("x", "numeric")
+        p_y = ParamSpecBase("y", "numeric")
+        interdeps = InterDependencies_(dependencies={p_y: (p_x,)})
+
+        ds = qc.new_data_set("metadata_only_run")
+        ds.set_interdependencies(interdeps)
+        ds.mark_started()
+        # Don't add any results, mark as completed
+        ds.mark_completed()
+        run_id = ds.run_id
+
+        # Force a reload — load_dataset_from will see number_of_results == 0
+        return run_id
+
+    def test_ds_to_datadicts_skips_missing_params(self, tmp_path):
+        """ds_to_datadicts should not raise KeyError when cache is empty
+        or missing parameters (e.g., when the .nc data file is missing
+        for a metadata-only DB)."""
+        from plottr.data.qcodes_dataset import ds_to_datadicts
+        from qcodes.dataset.data_set import load_by_id
+        from unittest.mock import patch
+
+        db_path = str(tmp_path / "test.db")
+        run_id = self._make_dataset_without_data(db_path)
+        ds = load_by_id(run_id)
+
+        # Simulate: .nc file missing → cache.data() returns {}.
+        # Should NOT raise KeyError; should return empty dict (no
+        # dependent params have data to report).
+        with patch.object(ds.cache, 'data', return_value={}):
+            result = ds_to_datadicts(ds)
+        assert result == {}
+
+    def test_ds_to_datadicts_skips_partial_data(self, tmp_path):
+        """ds_to_datadicts should skip dependents whose tree is missing
+        from the cache, rather than crashing."""
+        from plottr.data.qcodes_dataset import ds_to_datadicts
+        from qcodes.dataset.data_set import load_by_id
+        from unittest.mock import patch
+        import numpy as np
+
+        db_path = str(tmp_path / "test.db")
+        run_id = self._make_dataset_without_data(db_path)
+        ds = load_by_id(run_id)
+
+        # Cache says we have x (independent) but no 'y' tree → 'y' should
+        # be silently skipped without raising KeyError.
+        partial = {'x': {'x': np.array([1.0, 2.0])}}
+        with patch.object(ds.cache, 'data', return_value=partial):
+            result = ds_to_datadicts(ds)
+        # 'y' is the only dependent and has no data → empty result
+        assert result == {}
+
+    def test_autoplot_shows_status_for_empty_dataset(
+        self, qtbot, tmp_path
+    ):
+        """QCAutoPlotMainWindow should show a status bar message when
+        the dataset has no data, instead of leaving an empty window
+        with no explanation."""
+        import os
+        os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+        from plottr.apps.autoplot import autoplotQcodesDataset
+
+        db_path = str(tmp_path / "test.db")
+        run_id = self._make_dataset_without_data(db_path)
+
+        fc, win = autoplotQcodesDataset(pathAndId=(db_path, run_id))
+        qtbot.addWidget(win)
+
+        status = win.statusBar().currentMessage()
+        assert "No data available" in status
+        assert str(run_id) in status
+        win.close()
+
+

From 93734da1f89483769028d7d42a3bef295efef74f Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Wed, 20 May 2026 17:34:13 +0200
Subject: [PATCH 55/64] fix: show missing-data-file message, clear stale run
 list on reload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

autoplot: When a dataset has no data (number_of_results == 0), check
its export_info for missing .nc files and show a status bar message
with the exact missing file path(s). This replaces the silent empty
window for metadata-only DBs where the .nc companion files are absent.

ds_to_datadicts: Skip dependents whose parameter tree is missing from
the cache instead of raising KeyError — returns an empty dict for
datasets with no loadable data.

inspectr: Clear the run list before reloading a DB so that stale
items from a previous load don't show through the overlay text.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 40 ++++++++++++++++++++++++++++++++++------
 plottr/apps/inspectr.py |  3 +++
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 8852c3d4..e05c64ae 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -36,6 +36,38 @@
 LOGGER = logging.getLogger('plottr.apps.autoplot')
 
 
+def _no_data_message(ds: Any, run_id: int) -> str:
+    """Build a user-facing message explaining why a dataset has no data.
+
+    Checks the dataset's ``export_info`` for missing ``.nc`` files and
+    includes the expected path(s) so the user knows what to look for.
+    """
+    parts = [f"No data available for run {run_id}"]
+    try:
+        parts.append(f"(GUID: {ds.guid})")
+    except Exception:
+        pass
+
+    missing_files: List[str] = []
+    try:
+        ei = ds.export_info
+        if ei is not None and hasattr(ei, 'export_paths'):
+            for _fmt, path in ei.export_paths.items():
+                paths = path if isinstance(path, list) else [path]
+                for p in paths:
+                    if not os.path.exists(p):
+                        missing_files.append(p)
+    except Exception:
+        pass
+
+    if missing_files:
+        parts.append("— data file(s) not found: " + ", ".join(missing_files))
+    else:
+        parts.append("— the dataset may be empty or still running.")
+
+    return " ".join(parts)
+
+
 def autoplot(inputData: Union[None, DataDictBase] = None,
              plotWidgetClass: Optional[Type[PlotWidget]] = None) \
         -> Tuple[Flowchart, 'AutoPlotMainWindow']:
@@ -295,12 +327,8 @@ def __init__(self, fc: Flowchart,
             # dataset really has no results yet.  Show a clear status bar
             # message instead of leaving the user with an empty window.
             ds = getattr(self.loaderNode, '_dataset', None)
-            if ds is not None and ds.number_of_results == 0:
-                msg = (
-                    f"No data available for run {pathAndId[1]} "
-                    f"(GUID: {ds.guid}). The dataset's data file may be "
-                    f"missing or the dataset is empty."
-                )
+            if ds is not None and not ds.number_of_results:
+                msg = _no_data_message(ds, pathAndId[1])
                 self.statusBar().showMessage(msg)
 
     def setDefaults(self, data: DataDictBase) -> None:
diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index e8471141..0d482e2c 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -714,6 +714,9 @@ def loadFullDB(self, path: Optional[str] = None) -> None:
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():
+                # Clear stale run list items so the overlay text is not
+                # shown on top of old data from a previous load.
+                self.runList.clear()
                 self.runList.setOverlayText("Loading database...")
                 self.loadDBProcess.setPath(self.filepath, start_run_id=1)
 

From 3ef952a190bde88ecacb74e22e826bc6e8394616 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Wed, 20 May 2026 17:44:32 +0200
Subject: [PATCH 56/64] fix: prominent no-data label in plot area, skip reload
 of same DB

autoplot: Show the missing-data-file message as a large centered
label in the plot area (in addition to the status bar) so it is
impossible to miss.

inspectr: When the user opens the same DB file that is already
loaded, skip the reload entirely instead of showing a transient
'Loading database...' overlay.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 11 +++++++++--
 plottr/apps/inspectr.py |  9 ++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index e05c64ae..467fe501 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -324,12 +324,19 @@ def __init__(self, fc: Flowchart,
         elif self.loaderNode is not None and pathAndId is not None:
             # Loader ran but produced no data — most commonly because the
             # dataset's .nc data file is missing (metadata-only DB) or the
-            # dataset really has no results yet.  Show a clear status bar
-            # message instead of leaving the user with an empty window.
+            # dataset really has no results yet.
             ds = getattr(self.loaderNode, '_dataset', None)
             if ds is not None and not ds.number_of_results:
                 msg = _no_data_message(ds, pathAndId[1])
                 self.statusBar().showMessage(msg)
+                # Also show the message prominently in the central plot area
+                lbl = QtWidgets.QLabel(msg)
+                lbl.setAlignment(QtCore.Qt.AlignCenter)
+                lbl.setWordWrap(True)
+                lbl.setStyleSheet(
+                    "color: gray; font-size: 13pt; padding: 40px;"
+                )
+                self.plot.layout().addWidget(lbl)
 
     def setDefaults(self, data: DataDictBase) -> None:
         super().setDefaults(data)
diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 0d482e2c..e1b6d2ef 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -705,10 +705,13 @@ def loadDB(self) -> None:
             self.loadFullDB(path=path)
 
     def loadFullDB(self, path: Optional[str] = None) -> None:
-        if path is not None and path != self.filepath:
-            self.filepath = path
+        if path is not None and path == self.filepath:
+            # Same file already loaded — nothing to do.
+            return
 
-            # makes sure we treat a newly loaded file fresh and not as a
+        if path is not None:
+            self.filepath = path
+            # Makes sure we treat a newly loaded file fresh and not as a
             # refreshed one.
             self.latestRunId = None
 

From c2aa8dc20c6f3b48fe07356137e2043fdc0c8059 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Wed, 20 May 2026 17:48:33 +0200
Subject: [PATCH 57/64] fix: allow initial DB load when path is pre-set from
 cmdline

loadFullDB was skipping when path == self.filepath, but this also
blocked the initial load during __init__ (where filepath is set from
the dbPath constructor argument before loadFullDB is called). Now
only skips when data is already loaded (self.dbdf is not None).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index e1b6d2ef..65ff143b 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -705,15 +705,16 @@ def loadDB(self) -> None:
             self.loadFullDB(path=path)
 
     def loadFullDB(self, path: Optional[str] = None) -> None:
-        if path is not None and path == self.filepath:
-            # Same file already loaded — nothing to do.
+        if path is not None and path == self.filepath and self.dbdf is not None:
+            # Same file already loaded and data present — nothing to do.
             return
 
         if path is not None:
+            if path != self.filepath:
+                # Makes sure we treat a newly loaded file fresh and not as a
+                # refreshed one.
+                self.latestRunId = None
             self.filepath = path
-            # Makes sure we treat a newly loaded file fresh and not as a
-            # refreshed one.
-            self.latestRunId = None
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():

From 81bfe985e90a9c28d5c8026657927c16717c9116 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 08:23:20 +0200
Subject: [PATCH 58/64] fix: always do full reload in loadFullDB, no skip
 optimization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

loadFullDB now always treats the request as a fresh load — clears
the run list, resets latestRunId, and re-reads the full database.
Same flow whether the file is new or already open.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 65ff143b..7b94aad9 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -705,21 +705,12 @@ def loadDB(self) -> None:
             self.loadFullDB(path=path)
 
     def loadFullDB(self, path: Optional[str] = None) -> None:
-        if path is not None and path == self.filepath and self.dbdf is not None:
-            # Same file already loaded and data present — nothing to do.
-            return
-
         if path is not None:
-            if path != self.filepath:
-                # Makes sure we treat a newly loaded file fresh and not as a
-                # refreshed one.
-                self.latestRunId = None
             self.filepath = path
+            self.latestRunId = None
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():
-                # Clear stale run list items so the overlay text is not
-                # shown on top of old data from a previous load.
                 self.runList.clear()
                 self.runList.setOverlayText("Loading database...")
                 self.loadDBProcess.setPath(self.filepath, start_run_id=1)

From bc4dd24b5e949b1958e75f76991f7ba46ed04a5d Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 09:43:23 +0200
Subject: [PATCH 59/64] fix: loadFullDB fully resets UI state (dates, runs,
 dbdf)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

loadFullDB now clears dateList, runList, AND sets dbdf=None before
starting the background load. Previously only runList was cleared,
so the old date selection persisted and immediately repopulated the
run list from stale data — making it look like nothing changed.

Add test that verifies loadFullDB clears all three (dbdf, dateList,
runList) immediately, then repopulates after the load completes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/inspectr.py         |  7 +++++-
 test/pytest/test_qcodes_data.py | 43 +++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 7b94aad9..2c46564b 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -707,10 +707,15 @@ def loadDB(self) -> None:
     def loadFullDB(self, path: Optional[str] = None) -> None:
         if path is not None:
             self.filepath = path
-            self.latestRunId = None
 
         if self.filepath is not None:
             if not self.loadDBThread.isRunning():
+                # Reset to "just opened" state: clear all UI and cached data
+                # so the user sees a clean loading experience regardless of
+                # whether this is a new file or a reload of the same file.
+                self.latestRunId = None
+                self.dbdf = None
+                self.dateList.clear()
                 self.runList.clear()
                 self.runList.setOverlayText("Loading database...")
                 self.loadDBProcess.setPath(self.filepath, start_run_id=1)
diff --git a/test/pytest/test_qcodes_data.py b/test/pytest/test_qcodes_data.py
index 4cefdcd0..df0de7d2 100644
--- a/test/pytest/test_qcodes_data.py
+++ b/test/pytest/test_qcodes_data.py
@@ -454,6 +454,49 @@ def refresh_done():
         qtbot.waitUntil(refresh_done, timeout=5000)
         assert 2 in inspector.dbdf.index
 
+    def test_loadFullDB_resets_ui_on_reload(self, qtbot, tmp_path):
+        """Reloading the same DB file should clear date list, run list,
+        and dbdf — same as loading a fresh file."""
+        import os
+        os.environ.setdefault("QT_QPA_PLATFORM", "offscreen")
+        from plottr.apps.inspectr import QCodesDBInspector
+
+        db_path = str(tmp_path / "test.db")
+        _make_qcodes_db_with_runs(db_path, n_runs=3)
+
+        inspector = QCodesDBInspector(dbPath=db_path)
+        qtbot.addWidget(inspector)
+        qtbot.waitUntil(
+            lambda: inspector.dbdf is not None and inspector.dbdf.size > 0,
+            timeout=5000,
+        )
+        # Select a date so the run list is populated
+        if inspector.dateList.count() > 0:
+            inspector.dateList.item(0).setSelected(True)
+        qtbot.waitUntil(
+            lambda: inspector.runList.topLevelItemCount() > 0,
+            timeout=5000,
+        )
+        assert inspector.runList.topLevelItemCount() > 0
+
+        # Reload the same file
+        inspector.loadFullDB(db_path)
+
+        # Immediately after loadFullDB: UI should be cleared
+        assert inspector.dbdf is None, "dbdf should be reset to None"
+        assert inspector.dateList.count() == 0, "date list should be empty"
+        assert inspector.runList.topLevelItemCount() == 0, \
+            "run list should be empty during reload"
+
+        # Wait for reload to complete
+        qtbot.waitUntil(
+            lambda: inspector.dbdf is not None and inspector.dbdf.size > 0,
+            timeout=5000,
+        )
+        # After reload: data is back but run list shows hint (no date selected)
+        assert inspector.dbdf is not None
+        assert inspector.dateList.count() > 0
+
     def test_inspectr_refresh_updates_records_for_incomplete(
         self, qtbot, tmp_path
     ):

From d4149ce2675121dafa4470af7d1d32dfcabf5b07 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 10:44:01 +0200
Subject: [PATCH 60/64] fix: move no-data banner to top of autoplot window

Display the missing-data-file warning as a yellow banner at the top
of the autoplot window (above the plot area) for maximum visibility.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 467fe501..0370d43d 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -329,14 +329,21 @@ def __init__(self, fc: Flowchart,
             if ds is not None and not ds.number_of_results:
                 msg = _no_data_message(ds, pathAndId[1])
                 self.statusBar().showMessage(msg)
-                # Also show the message prominently in the central plot area
-                lbl = QtWidgets.QLabel(msg)
-                lbl.setAlignment(QtCore.Qt.AlignCenter)
-                lbl.setWordWrap(True)
-                lbl.setStyleSheet(
-                    "color: gray; font-size: 13pt; padding: 40px;"
+                # Show message at the top of the window for visibility
+                banner = QtWidgets.QLabel(msg)
+                banner.setWordWrap(True)
+                banner.setStyleSheet(
+                    "background-color: #fff3cd; color: #856404;"
+                    "border: 1px solid #ffc107; border-radius: 4px;"
+                    "padding: 8px; font-size: 11pt;"
                 )
-                self.plot.layout().addWidget(lbl)
+                wrapper = QtWidgets.QWidget()
+                layout = QtWidgets.QVBoxLayout(wrapper)
+                layout.setContentsMargins(0, 0, 0, 0)
+                layout.setSpacing(0)
+                layout.addWidget(banner)
+                layout.addWidget(self.plot)
+                self.setCentralWidget(wrapper)
 
     def setDefaults(self, data: DataDictBase) -> None:
         super().setDefaults(data)

From 42e68889d34f514af287d487b11dbc08c28ede59 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 10:45:55 +0200
Subject: [PATCH 61/64] fix: make no-data banner text selectable and
 copy-pastable

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 0370d43d..2e2f02ce 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -332,6 +332,11 @@ def __init__(self, fc: Flowchart,
                 # Show message at the top of the window for visibility
                 banner = QtWidgets.QLabel(msg)
                 banner.setWordWrap(True)
+                banner.setTextInteractionFlags(
+                    QtCore.Qt.TextSelectableByMouse
+                    | QtCore.Qt.TextSelectableByKeyboard
+                )
+                banner.setCursor(QtGui.QCursor(QtCore.Qt.IBeamCursor))
                 banner.setStyleSheet(
                     "background-color: #fff3cd; color: #856404;"
                     "border: 1px solid #ffc107; border-radius: 4px;"

From ec7934320c7235efdf0fb47ecea2852606ad5afe Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 12:18:12 +0200
Subject: [PATCH 62/64] fix: remove no-data banner once data arrives on refresh

When a dataset starts empty (no results yet) and the autoplot window
shows the 'No data available' banner, subsequent monitor-triggered
refreshes that find new data now remove the banner and restore the
normal plot area. Uses _removeNoDataBanner in QCAutoPlotMainWindow.
refreshData override.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 68 ++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 21 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 2e2f02ce..18c48767 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -307,6 +307,8 @@ def __init__(self, fc: Flowchart,
                  pathAndId: Optional[Tuple[str, int]] = None, **kw: Any):
 
         super().__init__(fc, parent, **kw)
+        self._noDataBanner: Optional[QtWidgets.QLabel] = None
+        self._noDataWrapper: Optional[QtWidgets.QWidget] = None
 
         windowTitle = "Plottr | QCoDeS autoplot"
         if pathAndId is not None:
@@ -327,32 +329,56 @@ def __init__(self, fc: Flowchart,
             # dataset really has no results yet.
             ds = getattr(self.loaderNode, '_dataset', None)
             if ds is not None and not ds.number_of_results:
-                msg = _no_data_message(ds, pathAndId[1])
-                self.statusBar().showMessage(msg)
-                # Show message at the top of the window for visibility
-                banner = QtWidgets.QLabel(msg)
-                banner.setWordWrap(True)
-                banner.setTextInteractionFlags(
-                    QtCore.Qt.TextSelectableByMouse
-                    | QtCore.Qt.TextSelectableByKeyboard
+                self._showNoDataBanner(
+                    _no_data_message(ds, pathAndId[1])
                 )
-                banner.setCursor(QtGui.QCursor(QtCore.Qt.IBeamCursor))
-                banner.setStyleSheet(
-                    "background-color: #fff3cd; color: #856404;"
-                    "border: 1px solid #ffc107; border-radius: 4px;"
-                    "padding: 8px; font-size: 11pt;"
-                )
-                wrapper = QtWidgets.QWidget()
-                layout = QtWidgets.QVBoxLayout(wrapper)
-                layout.setContentsMargins(0, 0, 0, 0)
-                layout.setSpacing(0)
-                layout.addWidget(banner)
-                layout.addWidget(self.plot)
-                self.setCentralWidget(wrapper)
+
+    def _showNoDataBanner(self, msg: str) -> None:
+        """Show a prominent warning banner at the top of the window."""
+        self.statusBar().showMessage(msg)
+        banner = QtWidgets.QLabel(msg)
+        banner.setWordWrap(True)
+        banner.setTextInteractionFlags(
+            QtCore.Qt.TextSelectableByMouse
+            | QtCore.Qt.TextSelectableByKeyboard
+        )
+        banner.setCursor(QtGui.QCursor(QtCore.Qt.IBeamCursor))
+        banner.setStyleSheet(
+            "background-color: #fff3cd; color: #856404;"
+            "border: 1px solid #ffc107; border-radius: 4px;"
+            "padding: 8px; font-size: 11pt;"
+        )
+        wrapper = QtWidgets.QWidget()
+        layout = QtWidgets.QVBoxLayout(wrapper)
+        layout.setContentsMargins(0, 0, 0, 0)
+        layout.setSpacing(0)
+        layout.addWidget(banner)
+        layout.addWidget(self.plot)
+        self.setCentralWidget(wrapper)
+        self._noDataBanner = banner
+        self._noDataWrapper = wrapper
+
+    def _removeNoDataBanner(self) -> None:
+        """Remove the no-data banner, restoring the plot as central widget."""
+        if self._noDataBanner is not None:
+            self._noDataBanner.deleteLater()
+            self._noDataBanner = None
+            # Reparent plot back to be the direct central widget
+            self.setCentralWidget(self.plot)
+            self._noDataWrapper = None
+            self.statusBar().clearMessage()
 
     def setDefaults(self, data: DataDictBase) -> None:
         super().setDefaults(data)
 
+    def refreshData(self) -> None:
+        super().refreshData()
+        # Once data arrives, remove the "no data" banner if it was shown
+        if (self._noDataBanner is not None
+                and self.loaderNode is not None
+                and self.loaderNode.nLoadedRecords > 0):
+            self._removeNoDataBanner()
+
 
 
 def autoplotQcodesDataset(log: bool = False,

From 87103d635754380cb0956de03b93af11a2b3eb0b Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 12:22:38 +0200
Subject: [PATCH 63/64] refactor: generic showWarningBanner/removeWarningBanner
 on AutoPlotMainWindow

Move the banner logic from QCAutoPlotMainWindow private methods to
the parent AutoPlotMainWindow as public showWarningBanner() and
removeWarningBanner(). Any subclass or future use can now show
warnings/errors about datasets with a single method call.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py | 88 ++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 41 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 18c48767..2b34fdcf 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -177,6 +177,10 @@ def __init__(self, fc: Flowchart,
         # is processed
         self._initialized = False
 
+        # Warning/info banner shown above the plot area (e.g. missing data)
+        self._warningBanner: Optional[QtWidgets.QLabel] = None
+        self._warningWrapper: Optional[QtWidgets.QWidget] = None
+
         windowTitle = "Plottr | Autoplot"
         self.setWindowTitle(windowTitle)
 
@@ -235,6 +239,45 @@ def onChangedLoaderData(self) -> None:
         if data is not None:
             self.setDefaults(self.loaderNode.outputValues()['dataOut'])
 
+    def showWarningBanner(self, msg: str) -> None:
+        """Show a prominent warning banner at the top of the window.
+
+        The banner is displayed above the plot area with selectable text.
+        Call :meth:`removeWarningBanner` to dismiss it.
+        """
+        self.removeWarningBanner()
+        self.statusBar().showMessage(msg)
+        banner = QtWidgets.QLabel(msg)
+        banner.setWordWrap(True)
+        banner.setTextInteractionFlags(
+            QtCore.Qt.TextSelectableByMouse
+            | QtCore.Qt.TextSelectableByKeyboard
+        )
+        banner.setCursor(QtGui.QCursor(QtCore.Qt.IBeamCursor))
+        banner.setStyleSheet(
+            "background-color: #fff3cd; color: #856404;"
+            "border: 1px solid #ffc107; border-radius: 4px;"
+            "padding: 8px; font-size: 11pt;"
+        )
+        wrapper = QtWidgets.QWidget()
+        layout = QtWidgets.QVBoxLayout(wrapper)
+        layout.setContentsMargins(0, 0, 0, 0)
+        layout.setSpacing(0)
+        layout.addWidget(banner)
+        layout.addWidget(self.plot)
+        self.setCentralWidget(wrapper)
+        self._warningBanner = banner
+        self._warningWrapper = wrapper
+
+    def removeWarningBanner(self) -> None:
+        """Remove the warning banner, restoring the plot as central widget."""
+        if self._warningBanner is not None:
+            self._warningBanner.deleteLater()
+            self._warningBanner = None
+            self.setCentralWidget(self.plot)
+            self._warningWrapper = None
+            self.statusBar().clearMessage()
+
     @Slot()
     def refreshData(self) -> None:
         """
@@ -307,8 +350,6 @@ def __init__(self, fc: Flowchart,
                  pathAndId: Optional[Tuple[str, int]] = None, **kw: Any):
 
         super().__init__(fc, parent, **kw)
-        self._noDataBanner: Optional[QtWidgets.QLabel] = None
-        self._noDataWrapper: Optional[QtWidgets.QWidget] = None
 
         windowTitle = "Plottr | QCoDeS autoplot"
         if pathAndId is not None:
@@ -329,55 +370,20 @@ def __init__(self, fc: Flowchart,
             # dataset really has no results yet.
             ds = getattr(self.loaderNode, '_dataset', None)
             if ds is not None and not ds.number_of_results:
-                self._showNoDataBanner(
+                self.showWarningBanner(
                     _no_data_message(ds, pathAndId[1])
                 )
 
-    def _showNoDataBanner(self, msg: str) -> None:
-        """Show a prominent warning banner at the top of the window."""
-        self.statusBar().showMessage(msg)
-        banner = QtWidgets.QLabel(msg)
-        banner.setWordWrap(True)
-        banner.setTextInteractionFlags(
-            QtCore.Qt.TextSelectableByMouse
-            | QtCore.Qt.TextSelectableByKeyboard
-        )
-        banner.setCursor(QtGui.QCursor(QtCore.Qt.IBeamCursor))
-        banner.setStyleSheet(
-            "background-color: #fff3cd; color: #856404;"
-            "border: 1px solid #ffc107; border-radius: 4px;"
-            "padding: 8px; font-size: 11pt;"
-        )
-        wrapper = QtWidgets.QWidget()
-        layout = QtWidgets.QVBoxLayout(wrapper)
-        layout.setContentsMargins(0, 0, 0, 0)
-        layout.setSpacing(0)
-        layout.addWidget(banner)
-        layout.addWidget(self.plot)
-        self.setCentralWidget(wrapper)
-        self._noDataBanner = banner
-        self._noDataWrapper = wrapper
-
-    def _removeNoDataBanner(self) -> None:
-        """Remove the no-data banner, restoring the plot as central widget."""
-        if self._noDataBanner is not None:
-            self._noDataBanner.deleteLater()
-            self._noDataBanner = None
-            # Reparent plot back to be the direct central widget
-            self.setCentralWidget(self.plot)
-            self._noDataWrapper = None
-            self.statusBar().clearMessage()
-
     def setDefaults(self, data: DataDictBase) -> None:
         super().setDefaults(data)
 
     def refreshData(self) -> None:
         super().refreshData()
-        # Once data arrives, remove the "no data" banner if it was shown
-        if (self._noDataBanner is not None
+        # Once data arrives, remove the warning banner if it was shown
+        if (self._warningBanner is not None
                 and self.loaderNode is not None
                 and self.loaderNode.nLoadedRecords > 0):
-            self._removeNoDataBanner()
+            self.removeWarningBanner()
 
 
 

From 4967f03ba73046397fa8089e6f28df093ffa67e4 Mon Sep 17 00:00:00 2001
From: Mikhail Astafev <miastafe@microsoft.com>
Date: Thu, 21 May 2026 16:25:09 +0200
Subject: [PATCH 64/64] =?UTF-8?q?fix:=20address=20PR=20review=20=E2=80=94?=
 =?UTF-8?q?=2014=20comments?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

autoplot:
- _no_data_message takes only ds (uses ds.run_id, ds.running)
- State-specific messages: running → 'started but no data yet',
  missing files → shows paths, default → generic message
- export_paths is dict[str, str], removed isinstance list check
- Removed empty setDefaults override from QCAutoPlotMainWindow
- Removed dataset-specific comments from warning banner code

inspectr:
- Removed start_run_id from LoadDBProcess (always 1, was a relic)
- Removed script_pyqtgraph entry point
- Simplified refreshDB docstring
- Merged redundant elif/else in DBLoaded

qcodes_db_overview:
- Completed datasets: prefer shapes from run_description
- Active datasets: prefer results table row count
- Simplified comment on missing tables

qcodes_dataset:
- Use tqdm (when available) for smart progress frequency
- Falls back to every-10th-item without tqdm

data_display:
- blockSignals(False) in finally clause of setBatchSelectedData

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 plottr/apps/autoplot.py           | 44 +++++++++++++------------------
 plottr/apps/inspectr.py           | 40 ++++------------------------
 plottr/data/qcodes_dataset.py     | 28 +++++++++++++++++++-
 plottr/data/qcodes_db_overview.py | 22 +++++++++++-----
 plottr/gui/data_display.py        |  1 +
 5 files changed, 67 insertions(+), 68 deletions(-)

diff --git a/plottr/apps/autoplot.py b/plottr/apps/autoplot.py
index 2b34fdcf..0c9ad343 100644
--- a/plottr/apps/autoplot.py
+++ b/plottr/apps/autoplot.py
@@ -36,36 +36,37 @@
 LOGGER = logging.getLogger('plottr.apps.autoplot')
 
 
-def _no_data_message(ds: Any, run_id: int) -> str:
+def _no_data_message(ds: Any) -> str:
     """Build a user-facing message explaining why a dataset has no data.
 
-    Checks the dataset's ``export_info`` for missing ``.nc`` files and
-    includes the expected path(s) so the user knows what to look for.
+    Uses dataset state (running/pristine) and export_info to provide
+    a specific, actionable message.
     """
-    parts = [f"No data available for run {run_id}"]
-    try:
-        parts.append(f"(GUID: {ds.guid})")
-    except Exception:
-        pass
+    run_id = getattr(ds, 'run_id', '?')
+    guid = getattr(ds, 'guid', '')
+    header = f"No data available for run {run_id}"
+    if guid:
+        header += f" (GUID: {guid})"
+
+    # Check if the dataset is still being filled
+    if getattr(ds, 'running', False):
+        return f"{header} — dataset has started but does not contain data yet."
 
+    # Check for missing exported data files
     missing_files: List[str] = []
     try:
         ei = ds.export_info
         if ei is not None and hasattr(ei, 'export_paths'):
             for _fmt, path in ei.export_paths.items():
-                paths = path if isinstance(path, list) else [path]
-                for p in paths:
-                    if not os.path.exists(p):
-                        missing_files.append(p)
+                if not os.path.exists(path):
+                    missing_files.append(path)
     except Exception:
         pass
 
     if missing_files:
-        parts.append("— data file(s) not found: " + ", ".join(missing_files))
-    else:
-        parts.append("— the dataset may be empty or still running.")
+        return f"{header} — data file(s) not found: " + ", ".join(missing_files)
 
-    return " ".join(parts)
+    return f"{header} — the dataset does not seem to have data to plot."
 
 
 def autoplot(inputData: Union[None, DataDictBase] = None,
@@ -365,21 +366,12 @@ def __init__(self, fc: Flowchart,
             self.setDefaults(self.loaderNode.outputValues()['dataOut'])
             self._initialized = True
         elif self.loaderNode is not None and pathAndId is not None:
-            # Loader ran but produced no data — most commonly because the
-            # dataset's .nc data file is missing (metadata-only DB) or the
-            # dataset really has no results yet.
             ds = getattr(self.loaderNode, '_dataset', None)
             if ds is not None and not ds.number_of_results:
-                self.showWarningBanner(
-                    _no_data_message(ds, pathAndId[1])
-                )
-
-    def setDefaults(self, data: DataDictBase) -> None:
-        super().setDefaults(data)
+                self.showWarningBanner(_no_data_message(ds))
 
     def refreshData(self) -> None:
         super().refreshData()
-        # Once data arrives, remove the warning banner if it was shown
         if (self._warningBanner is not None
                 and self.loaderNode is not None
                 and self.loaderNode.nLoadedRecords > 0):
diff --git a/plottr/apps/inspectr.py b/plottr/apps/inspectr.py
index 2c46564b..add50761 100644
--- a/plottr/apps/inspectr.py
+++ b/plottr/apps/inspectr.py
@@ -441,11 +441,9 @@ class LoadDBProcess(QtCore.QObject):
     def __init__(self) -> None:
         super().__init__()
         self.path: Optional[str] = None
-        self.start_run_id: int = 1
 
-    def setPath(self, path: str, start_run_id: int = 1) -> None:
+    def setPath(self, path: str) -> None:
         self.path = path
-        self.start_run_id = start_run_id
         self.pathSet.emit()
 
     def loadDB(self) -> None:
@@ -454,11 +452,7 @@ def loadDB(self) -> None:
         overview: Optional[Dict[int, Any]] = None
         if self.use_fast_sql:
             try:
-                # start_run_id uses > comparison, so subtract 1 for inclusive
-                overview = get_db_overview(
-                    self.path,
-                    start_run_id=self.start_run_id - 1,
-                )
+                overview = get_db_overview(self.path)
             except Exception as e:
                 LOGGER.warning(f"Fast SQL overview failed, falling back to "
                                f"qcodes API: {e}")
@@ -467,7 +461,6 @@ def loadDB(self) -> None:
         if overview is None:
             overview = get_runs_from_db_fast(
                 self.path,
-                start_run_id=self.start_run_id,
                 progress_callback=self._onProgress,
             )
 
@@ -718,7 +711,7 @@ def loadFullDB(self, path: Optional[str] = None) -> None:
                 self.dateList.clear()
                 self.runList.clear()
                 self.runList.setOverlayText("Loading database...")
-                self.loadDBProcess.setPath(self.filepath, start_run_id=1)
+                self.loadDBProcess.setPath(self.filepath)
 
     @Slot(int, int)
     def onLoadProgress(self, current: int, total: int) -> None:
@@ -742,8 +735,6 @@ def DBLoaded(self, dbdf: pandas.DataFrame) -> None:
             new_rows = dbdf.loc[~existing_mask]
             if not new_rows.empty:
                 self.dbdf = pandas.concat([self.dbdf, new_rows])
-        elif dbdf.size > 0:
-            self.dbdf = dbdf
         else:
             self.dbdf = dbdf
 
@@ -783,11 +774,7 @@ def refreshDB(self) -> None:
         """Re-read the database to pick up new runs AND updates to existing
         runs (e.g., records count growing as an incomplete dataset is filled).
 
-        Always does a full re-read because incremental loading (start_run_id >
-        latestRunId) would miss updates to incomplete datasets whose row
-        already exists in the dbdf. The full re-read is fast (single SQL JOIN
-        ~10ms even for 1500 runs), so the incremental optimization isn't
-        worth the correctness cost.
+        Always does a full re-read which is fast at the moment.
         """
         if self.filepath is None or self.loadDBThread.isRunning():
             return
@@ -799,9 +786,7 @@ def refreshDB(self) -> None:
         else:
             self.latestRunId = -1
 
-        # Full re-read so updates to existing runs (records counter, completed
-        # timestamp) are picked up.
-        self.loadDBProcess.setPath(self.filepath, start_run_id=1)
+        self.loadDBProcess.setPath(self.filepath)
 
     @Slot(float)
     def setMonitorInterval(self, val: float) -> None:
@@ -968,18 +953,3 @@ def script() -> None:
     args = parser.parse_args()
     main(args.dbpath, args.console_log_level)
 
-
-def script_pyqtgraph() -> None:
-    """Entry point for inspectr using the pyqtgraph plotting backend."""
-    from plottr.plot.pyqtgraph.autoplot import AutoPlot as PGAutoPlot
-
-    parser = argparse.ArgumentParser(
-        description='inspectr -- sifting through qcodes data (pyqtgraph backend).'
-    )
-    parser.add_argument('--dbpath', help='path to qcodes .db file',
-                        default=None)
-    parser.add_argument("--console-log-level",
-                        choices=("ERROR", "WARNING", "INFO", "DEBUG"),
-                        default="WARNING")
-    args = parser.parse_args()
-    main(args.dbpath, args.console_log_level, plotWidgetClass=PGAutoPlot)
diff --git a/plottr/data/qcodes_dataset.py b/plottr/data/qcodes_dataset.py
index 09c37356..551aa160 100644
--- a/plottr/data/qcodes_dataset.py
+++ b/plottr/data/qcodes_dataset.py
@@ -279,6 +279,26 @@ def get_runs_from_db_fast(path: str,
             return overview
 
         total = last - start_run_id + 1
+
+        # Use tqdm (if available) for smart progress update frequency.
+        # tqdm's mininterval prevents flooding the callback on fast iterations.
+        _last_callback_n = [0]
+        def _tqdm_callback(pbar: Any) -> None:
+            if progress_callback is not None and pbar.n != _last_callback_n[0]:
+                _last_callback_n[0] = pbar.n
+                progress_callback(pbar.n, pbar.total)
+
+        try:
+            from tqdm import tqdm  # type: ignore[import-untyped]
+            pbar = tqdm(
+                range(start_run_id, last + 1), total=total,
+                desc="Loading datasets", disable=True, mininterval=0.3,
+            )
+            use_tqdm = True
+        except ImportError:
+            pbar = None
+            use_tqdm = False
+
         for i, run_id in enumerate(range(start_run_id, last + 1)):
             try:
                 ds = load_by_id(run_id, conn=conn_)
@@ -286,9 +306,15 @@ def get_runs_from_db_fast(path: str,
             except Exception:
                 pass  # skip missing/corrupt runs
 
-            if progress_callback is not None and (i % 10 == 0 or i == total - 1):
+            if use_tqdm and pbar is not None:
+                pbar.update(1)
+                _tqdm_callback(pbar)
+            elif progress_callback is not None and (i % 10 == 0 or i == total - 1):
                 progress_callback(i + 1, total)
 
+        if use_tqdm and pbar is not None:
+            pbar.close()
+
     return overview
 
 
diff --git a/plottr/data/qcodes_db_overview.py b/plottr/data/qcodes_db_overview.py
index 0b965561..6e421a25 100644
--- a/plottr/data/qcodes_db_overview.py
+++ b/plottr/data/qcodes_db_overview.py
@@ -155,7 +155,7 @@ def get_db_overview(db_path: str,
                 ).fetchone()
                 row_counts[tbl] = cnt[0] if cnt else 0
             except Exception:
-                pass  # table may not exist (e.g., qdwsdk downloads)
+                pass  # table may not exist
 
         tag_col_idx = 10 if has_inspectr_tag else -1
         for row in rows:
@@ -164,12 +164,22 @@ def get_db_overview(db_path: str,
             completed_date, completed_time = _format_timestamp(row[5])
             tag = row[tag_col_idx] if tag_col_idx > 0 and len(row) > tag_col_idx and row[tag_col_idx] else ''
             result_table = row[8] or ''
-
-            # Determine record count: prefer results table row count,
-            # then try shape info from run_description, then result_counter.
-            records = row_counts.get(result_table, 0)
-            if records == 0:
+            is_completed = row[5] is not None and row[5] != 0
+
+            # Determine record count.
+            # For completed datasets: prefer shape metadata (authoritative
+            # final count) over results table rows.
+            # For active (incomplete) datasets: prefer results table rows
+            # (live count that grows as data is added).
+            # Fall back to result_counter if nothing else is available.
+            if is_completed:
                 records = _records_from_run_description(row[9])
+                if records == 0:
+                    records = row_counts.get(result_table, 0)
+            else:
+                records = row_counts.get(result_table, 0)
+                if records == 0:
+                    records = _records_from_run_description(row[9])
             if records == 0:
                 records = row[6] or 0
 
diff --git a/plottr/gui/data_display.py b/plottr/gui/data_display.py
index b56cf21e..b5117896 100644
--- a/plottr/gui/data_display.py
+++ b/plottr/gui/data_display.py
@@ -132,6 +132,7 @@ def setBatchSelectedData(self, vals: List[str]) -> None:
             self.dataSelectionMade.emit(self.getSelectedData())
         finally:
             self._batchUpdate = False
+            self.blockSignals(False)
 
     def selectAll(self) -> None:
         """Select all enabled dependent fields. Single signal emission."""