LeMaterial · Ramlaoui · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/Makefile b/Makefile
@@ -17,7 +17,7 @@ help:
 	@echo "  make fmt             Format Rust + Python"
 	@echo "  make fmt-check        Check formatting"
 	@echo "  make lint             Run linters"
-	@echo "  make test             Run tests (Rust + Python)"
+	@echo "  make test             Run default tests (Rust + Python core suite)"
 	@echo "  make ci               Run all CI checks"
 	@echo ""
 	@echo "Language-specific:"
@@ -27,7 +27,7 @@ help:
 	@echo "  make py-fmt           uv ruff format python (atompack-py)"
 	@echo "  make py-lint          uv ruff check python (atompack-py)"
 	@echo "  make py-test          uv pytest core suite (atompack-py/tests without benchmark tooling)"
-	@echo "  make py-test-benchmarks  uv pytest benchmark tooling suite (atompack-py/tests/benchmarks)"
+	@echo "  make py-test-benchmarks  uv pytest benchmark tooling suite (manual only)"
 	@echo "  make py-dev           uv maturin develop (atompack-py)"
 	@echo "  make py-dev-release   uv maturin develop -r (atompack-py)"
 	@echo ""
@@ -107,10 +107,10 @@ fmt-check: rust-fmt-check py-fmt-check
 
 lint: rust-lint py-lint
 
-test: rust-test py-test py-test-benchmarks
+test: rust-test py-test
 
 ci-rust: rust-fmt-check rust-lint rust-test
 
-ci-py: py-fmt-check py-lint py-test py-test-benchmarks
+ci-py: py-fmt-check py-lint py-test
 
 ci: ci-rust ci-py
diff --git a/atompack-py/benchmarks/README.md b/atompack-py/benchmarks/README.md
@@ -105,14 +105,15 @@ uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_bench
 uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --codec zstd:3
 uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --bench 2 --sizes 50000 500000 5000000
 uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --bench 3 --batch-scale-atoms 64 256 --batch-scale-sizes 256 512 1024 2048 4096 10000
-uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --scratch-dir /ogre/atompack-v2/tmp
+uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --scratch-dir /tmp/atompack-bench
 uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --out atompack-py/benchmarks/write_results.json
 ```
 
 Notes:
 
-- Temporary benchmark datasets default to `/ogre/tmp`; override with
-  `--scratch-dir ...` when you want a different filesystem.
+- Benchmark datasets default to a temp-backed `atompack-benchmarks` directory;
+  override with `--scratch-dir ...` or `ATOMPACK_BENCHMARK_SCRATCH` when you
+  want a different filesystem.
 - This script defaults to `--codec none` so raw write throughput is measured unless you explicitly opt into compression.
 - Pass `--codec lz4` or `--codec zstd:3` when you want compressed-write numbers.
 - Atompack now auto-sizes its write batch by atom count unless you pass

diff --git a/atompack-py/benchmarks/atompack_batch_benchmark.py b/atompack-py/benchmarks/atompack_batch_benchmark.py
@@ -36,7 +36,7 @@
 
 import atompack
 
-from benchmark import _n_mols_for_atoms, _read_sample, bench, create_atompack_db
+from benchmark import DEFAULT_SCRATCH, _n_mols_for_atoms, _read_sample, bench, create_atompack_db
 
 DEFAULT_ATOMS = [64, 256, 512]
 DEFAULT_BATCH_SIZES = [32, 128, 512, 2048]
@@ -241,7 +241,7 @@ def main(argv: list[str] | None = None) -> int:
     parser.add_argument("--trials", type=int, default=5)
     parser.add_argument("--batch-sizes", type=int, nargs="+", default=DEFAULT_BATCH_SIZES)
     parser.add_argument("--threads", nargs="+", default=DEFAULT_THREADS)
-    parser.add_argument("--scratch-dir", type=Path, default=Path("/ogre/atompack-v2/benchmarks"))
+    parser.add_argument("--scratch-dir", type=Path, default=DEFAULT_SCRATCH)
     parser.add_argument("--compression", type=str, default=DEFAULT_CODEC, choices=["none", "lz4", "zstd"])
     parser.add_argument("--level", type=int, default=DEFAULT_LEVEL)
     parser.add_argument("--seed", type=int, default=1234)

diff --git a/atompack-py/benchmarks/benchmark.py b/atompack-py/benchmarks/benchmark.py
@@ -216,12 +216,17 @@ def _ensure_scratch_has_space(path: Path, *, context: str, min_free_bytes: int =
         "Free space on that filesystem or use --scratch-dir on a different disk."
     )
 
-DEFAULT_SCRATCH = Path("/ogre/atompack-v2/benchmarks")
-DEFAULT_OMAT_ATOMPACK = Path(
-    "/ogre/atompack-v2/omat/train_50m_atompack_single_v3_soa/part_0000.atp"
-)
-DEFAULT_OMAT_LMDB_PACKED = Path("/ogre/atompack-v2/omat/train_50m_lmdb_single_v3")
-DEFAULT_OMAT_LMDB_PICKLE = Path("/ogre/atompack-v2/omat/train_50m_lmdb_pickle_style_v1")
+DEFAULT_SCRATCH_ENV = "ATOMPACK_BENCHMARK_SCRATCH"
+
+
+def _default_scratch_dir() -> Path:
+    override = os.environ.get(DEFAULT_SCRATCH_ENV)
+    if override:
+        return Path(override).expanduser()
+    return Path(tempfile.gettempdir()) / "atompack-benchmarks"
+
+
+DEFAULT_SCRATCH = _default_scratch_dir()
 
 # Default molecule counts per atom count — sized so datasets are large enough
 # to exceed page cache and stress real I/O.

diff --git a/atompack-py/benchmarks/write_benchmark.py b/atompack-py/benchmarks/write_benchmark.py
@@ -68,7 +68,11 @@
 HDF5_SOA_CHUNK_SIZE = 256
 ASE_WRITE_MAX = 5_000
 DEFAULT_WRITE_CODEC = "none"
-DEFAULT_SCRATCH_DIR = "/ogre/tmp"
+DEFAULT_SCRATCH_ENV = "ATOMPACK_BENCHMARK_SCRATCH"
+DEFAULT_SCRATCH_DIR = str(
+    Path(os.environ.get(DEFAULT_SCRATCH_ENV, tempfile.gettempdir())).expanduser()
+    / "atompack-benchmarks"
+)
 DEFAULT_ATOMPACK_TARGET_BATCH_MIB = 16.0
 DEFAULT_BATCH_SWEEP_SIZES = [256, 512, 1024, 2048, 4096, WRITE_BATCH_SIZE]
 DEFAULT_WARMUP_TRIALS = 1
@@ -1252,7 +1256,10 @@ def main(argv: list[str] | None = None) -> int:
     )
     parser.add_argument(
         "--scratch-dir", type=str, default=DEFAULT_SCRATCH_DIR,
-        help=f"Directory for temporary datasets (default: {DEFAULT_SCRATCH_DIR}).",
+        help=(
+            "Directory for temporary datasets "
+            f"(default: {DEFAULT_SCRATCH_DIR}; override via {DEFAULT_SCRATCH_ENV})."
+        ),
     )
     parser.add_argument(
         "--out", type=Path, default=None,

diff --git a/atompack-py/tests/benchmarks/test_publication_surface.py b/atompack-py/tests/benchmarks/test_publication_surface.py
diff --git a/atompack-py/tests/test_stub_surface.py b/atompack-py/tests/test_stub_surface.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import ast
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+PRIVATE_STUB = ROOT / "python" / "atompack" / "_atompack_rs.pyi"
+PUBLIC_STUB = ROOT / "python" / "atompack" / "__init__.pyi"
+HUB_STUB = ROOT / "python" / "atompack" / "hub.pyi"
+
+
+def _class_method_names(path: Path, class_name: str) -> set[str]:
+    tree = ast.parse(path.read_text(encoding="utf-8"))
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef) and node.name == class_name:
+            return {
+                child.name
+                for child in node.body
+                if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef))
+            }
+    raise AssertionError(f"Class {class_name!r} not found in {path}")
+
+
+def _class_docstring(path: Path, class_name: str) -> str | None:
+    tree = ast.parse(path.read_text(encoding="utf-8"))
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef) and node.name == class_name:
+            return ast.get_docstring(node)
+    raise AssertionError(f"Class {class_name!r} not found in {path}")
+
+
+def _function_docstring(path: Path, function_name: str) -> str | None:
+    tree = ast.parse(path.read_text(encoding="utf-8"))
+    for node in tree.body:
+        if isinstance(node, ast.FunctionDef) and node.name == function_name:
+            return ast.get_docstring(node)
+    raise AssertionError(f"Function {function_name!r} not found in {path}")
+
+
+def _function_arg_names(path: Path, function_name: str) -> list[str]:
+    tree = ast.parse(path.read_text(encoding="utf-8"))
+    for node in tree.body:
+        if isinstance(node, ast.FunctionDef) and node.name == function_name:
+            args = [arg.arg for arg in node.args.args]
+            args.extend(arg.arg for arg in node.args.kwonlyargs)
+            return args
+    raise AssertionError(f"Function {function_name!r} not found in {path}")
+
+
+def test_private_stub_tracks_low_level_surface() -> None:
+    molecule_methods = _class_method_names(PRIVATE_STUB, "PyMolecule")
+    assert {
+        "__init__",
+        "from_arrays",
+        "to_owned",
+        "_ase_builtin_tuple_fast",
+        "_ase_payload",
+        "__getitem__",
+    } <= molecule_methods
+
+    database_methods = _class_method_names(PRIVATE_STUB, "PyAtomDatabase")
+    assert {"add_arrays_batch", "get_molecules_flat"} <= database_methods
+
+    text = PRIVATE_STUB.read_text(encoding="utf-8")
+    assert 'compression: str = "none"' in text
+    assert "overwrite: bool = False" in text
+    assert "Parameters" in (_class_docstring(PRIVATE_STUB, "PyAtom") or "")
+    assert "Atomic positions" in (_class_docstring(PRIVATE_STUB, "PyMolecule") or "")
+    assert "Compression type" in (_class_docstring(PRIVATE_STUB, "PyAtomDatabase") or "")
+
+
+def test_public_stub_exposes_flat_batch_reader() -> None:
+    database_methods = _class_method_names(PUBLIC_STUB, "Database")
+    assert "get_molecules_flat" in database_methods
+
+
+def test_hub_stub_has_public_docstrings() -> None:
+    reader_doc = _class_docstring(HUB_STUB, "AtompackReader") or ""
+    assert "lexicographically ordered shard set" in reader_doc
+
+    download_doc = _function_docstring(HUB_STUB, "download") or ""
+    assert "shard directory" in download_doc
+
+    upload_doc = _function_docstring(HUB_STUB, "upload") or ""
+    assert "Xet" in upload_doc
+    assert "use_xet" in _function_arg_names(HUB_STUB, "upload")
+
+    open_doc = _function_docstring(HUB_STUB, "open") or ""
+    assert "download" in open_doc.lower()
+
+    open_path_doc = _function_docstring(HUB_STUB, "open_path") or ""
+    assert "Directories are scanned recursively" in open_path_doc