diff --git a/Makefile b/Makefile index c6d36c9..1f58a9d 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ help: @echo " make fmt Format Rust + Python" @echo " make fmt-check Check formatting" @echo " make lint Run linters" - @echo " make test Run tests (Rust + Python)" + @echo " make test Run default tests (Rust + Python core suite)" @echo " make ci Run all CI checks" @echo "" @echo "Language-specific:" @@ -27,7 +27,7 @@ help: @echo " make py-fmt uv ruff format python (atompack-py)" @echo " make py-lint uv ruff check python (atompack-py)" @echo " make py-test uv pytest core suite (atompack-py/tests without benchmark tooling)" - @echo " make py-test-benchmarks uv pytest benchmark tooling suite (atompack-py/tests/benchmarks)" + @echo " make py-test-benchmarks uv pytest benchmark tooling suite (manual only)" @echo " make py-dev uv maturin develop (atompack-py)" @echo " make py-dev-release uv maturin develop -r (atompack-py)" @echo "" @@ -107,10 +107,10 @@ fmt-check: rust-fmt-check py-fmt-check lint: rust-lint py-lint -test: rust-test py-test py-test-benchmarks +test: rust-test py-test ci-rust: rust-fmt-check rust-lint rust-test -ci-py: py-fmt-check py-lint py-test py-test-benchmarks +ci-py: py-fmt-check py-lint py-test ci: ci-rust ci-py diff --git a/atompack-py/benchmarks/README.md b/atompack-py/benchmarks/README.md index 846d6a1..035765b 100644 --- a/atompack-py/benchmarks/README.md +++ b/atompack-py/benchmarks/README.md @@ -105,14 +105,15 @@ uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_bench uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --codec zstd:3 uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --bench 2 --sizes 50000 500000 5000000 uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --bench 3 --batch-scale-atoms 64 256 --batch-scale-sizes 256 512 1024 2048 4096 10000 -uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --scratch-dir /ogre/atompack-v2/tmp +uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --scratch-dir /tmp/atompack-bench uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --out atompack-py/benchmarks/write_results.json ``` Notes: -- Temporary benchmark datasets default to `/ogre/tmp`; override with - `--scratch-dir ...` when you want a different filesystem. +- Benchmark datasets default to a temp-backed `atompack-benchmarks` directory; + override with `--scratch-dir ...` or `ATOMPACK_BENCHMARK_SCRATCH` when you + want a different filesystem. - This script defaults to `--codec none` so raw write throughput is measured unless you explicitly opt into compression. - Pass `--codec lz4` or `--codec zstd:3` when you want compressed-write numbers. - Atompack now auto-sizes its write batch by atom count unless you pass diff --git a/atompack-py/benchmarks/atompack_batch_benchmark.py b/atompack-py/benchmarks/atompack_batch_benchmark.py index a0d54e3..7bd4417 100644 --- a/atompack-py/benchmarks/atompack_batch_benchmark.py +++ b/atompack-py/benchmarks/atompack_batch_benchmark.py @@ -36,7 +36,7 @@ import atompack -from benchmark import _n_mols_for_atoms, _read_sample, bench, create_atompack_db +from benchmark import DEFAULT_SCRATCH, _n_mols_for_atoms, _read_sample, bench, create_atompack_db DEFAULT_ATOMS = [64, 256, 512] DEFAULT_BATCH_SIZES = [32, 128, 512, 2048] @@ -241,7 +241,7 @@ def main(argv: list[str] | None = None) -> int: parser.add_argument("--trials", type=int, default=5) parser.add_argument("--batch-sizes", type=int, nargs="+", default=DEFAULT_BATCH_SIZES) parser.add_argument("--threads", nargs="+", default=DEFAULT_THREADS) - parser.add_argument("--scratch-dir", type=Path, default=Path("/ogre/atompack-v2/benchmarks")) + parser.add_argument("--scratch-dir", type=Path, default=DEFAULT_SCRATCH) parser.add_argument("--compression", type=str, default=DEFAULT_CODEC, choices=["none", "lz4", "zstd"]) parser.add_argument("--level", type=int, default=DEFAULT_LEVEL) parser.add_argument("--seed", type=int, default=1234) diff --git a/atompack-py/benchmarks/benchmark.py b/atompack-py/benchmarks/benchmark.py index d583d61..7f02d2a 100644 --- a/atompack-py/benchmarks/benchmark.py +++ b/atompack-py/benchmarks/benchmark.py @@ -216,12 +216,17 @@ def _ensure_scratch_has_space(path: Path, *, context: str, min_free_bytes: int = "Free space on that filesystem or use --scratch-dir on a different disk." ) -DEFAULT_SCRATCH = Path("/ogre/atompack-v2/benchmarks") -DEFAULT_OMAT_ATOMPACK = Path( - "/ogre/atompack-v2/omat/train_50m_atompack_single_v3_soa/part_0000.atp" -) -DEFAULT_OMAT_LMDB_PACKED = Path("/ogre/atompack-v2/omat/train_50m_lmdb_single_v3") -DEFAULT_OMAT_LMDB_PICKLE = Path("/ogre/atompack-v2/omat/train_50m_lmdb_pickle_style_v1") +DEFAULT_SCRATCH_ENV = "ATOMPACK_BENCHMARK_SCRATCH" + + +def _default_scratch_dir() -> Path: + override = os.environ.get(DEFAULT_SCRATCH_ENV) + if override: + return Path(override).expanduser() + return Path(tempfile.gettempdir()) / "atompack-benchmarks" + + +DEFAULT_SCRATCH = _default_scratch_dir() # Default molecule counts per atom count — sized so datasets are large enough # to exceed page cache and stress real I/O. diff --git a/atompack-py/benchmarks/write_benchmark.py b/atompack-py/benchmarks/write_benchmark.py index c7253a0..672c229 100644 --- a/atompack-py/benchmarks/write_benchmark.py +++ b/atompack-py/benchmarks/write_benchmark.py @@ -68,7 +68,11 @@ HDF5_SOA_CHUNK_SIZE = 256 ASE_WRITE_MAX = 5_000 DEFAULT_WRITE_CODEC = "none" -DEFAULT_SCRATCH_DIR = "/ogre/tmp" +DEFAULT_SCRATCH_ENV = "ATOMPACK_BENCHMARK_SCRATCH" +DEFAULT_SCRATCH_DIR = str( + Path(os.environ.get(DEFAULT_SCRATCH_ENV, tempfile.gettempdir())).expanduser() + / "atompack-benchmarks" +) DEFAULT_ATOMPACK_TARGET_BATCH_MIB = 16.0 DEFAULT_BATCH_SWEEP_SIZES = [256, 512, 1024, 2048, 4096, WRITE_BATCH_SIZE] DEFAULT_WARMUP_TRIALS = 1 @@ -1252,7 +1256,10 @@ def main(argv: list[str] | None = None) -> int: ) parser.add_argument( "--scratch-dir", type=str, default=DEFAULT_SCRATCH_DIR, - help=f"Directory for temporary datasets (default: {DEFAULT_SCRATCH_DIR}).", + help=( + "Directory for temporary datasets " + f"(default: {DEFAULT_SCRATCH_DIR}; override via {DEFAULT_SCRATCH_ENV})." + ), ) parser.add_argument( "--out", type=Path, default=None, diff --git a/atompack-py/tests/benchmarks/test_publication_surface.py b/atompack-py/tests/benchmarks/test_publication_surface.py deleted file mode 100644 index 2e9e4ab..0000000 --- a/atompack-py/tests/benchmarks/test_publication_surface.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2026 Entalpic -from __future__ import annotations - -from pathlib import Path - - -ROOT = Path(__file__).resolve().parents[3] -def test_maintained_benchmark_entrypoints_import(stub_tqdm, load_benchmark_module) -> None: - for file_name in ( - "benchmark.py", - "write_benchmark.py", - "scaling_benchmark.py", - "memory_benchmark.py", - "atompack_bestcase_read_benchmark.py", - ): - module = load_benchmark_module(file_name.replace(".py", ""), file_name) - assert hasattr(module, "main") - - -def test_public_docs_reference_current_api_and_scripts() -> None: - files = [ - ROOT / "README.md", - ROOT / "docs" / "source" / "getting-started.rst", - ROOT / "docs" / "source" / "performance.rst", - ROOT / "docs" / "source" / "blog" / "atompack-release.md", - ] - combined = "\n".join(path.read_text(encoding="utf-8") for path in files) - - assert "scripts/benchmark.py" not in combined - assert "compare_real_omat_backends.py" not in combined - assert "compare_lmdb_layouts.py" not in combined - assert "atompack.Molecule(atoms)" not in combined - assert "Database.open_mmap" not in combined - assert "Database Format (v1)" not in combined - - assert "atompack-py/benchmarks/benchmark.py" in combined - assert "Molecule.from_arrays" in combined or "atompack.Molecule(" in combined diff --git a/atompack-py/tests/test_stub_surface.py b/atompack-py/tests/test_stub_surface.py new file mode 100644 index 0000000..b883fcc --- /dev/null +++ b/atompack-py/tests/test_stub_surface.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import ast +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +PRIVATE_STUB = ROOT / "python" / "atompack" / "_atompack_rs.pyi" +PUBLIC_STUB = ROOT / "python" / "atompack" / "__init__.pyi" +HUB_STUB = ROOT / "python" / "atompack" / "hub.pyi" + + +def _class_method_names(path: Path, class_name: str) -> set[str]: + tree = ast.parse(path.read_text(encoding="utf-8")) + for node in tree.body: + if isinstance(node, ast.ClassDef) and node.name == class_name: + return { + child.name + for child in node.body + if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)) + } + raise AssertionError(f"Class {class_name!r} not found in {path}") + + +def _class_docstring(path: Path, class_name: str) -> str | None: + tree = ast.parse(path.read_text(encoding="utf-8")) + for node in tree.body: + if isinstance(node, ast.ClassDef) and node.name == class_name: + return ast.get_docstring(node) + raise AssertionError(f"Class {class_name!r} not found in {path}") + + +def _function_docstring(path: Path, function_name: str) -> str | None: + tree = ast.parse(path.read_text(encoding="utf-8")) + for node in tree.body: + if isinstance(node, ast.FunctionDef) and node.name == function_name: + return ast.get_docstring(node) + raise AssertionError(f"Function {function_name!r} not found in {path}") + + +def _function_arg_names(path: Path, function_name: str) -> list[str]: + tree = ast.parse(path.read_text(encoding="utf-8")) + for node in tree.body: + if isinstance(node, ast.FunctionDef) and node.name == function_name: + args = [arg.arg for arg in node.args.args] + args.extend(arg.arg for arg in node.args.kwonlyargs) + return args + raise AssertionError(f"Function {function_name!r} not found in {path}") + + +def test_private_stub_tracks_low_level_surface() -> None: + molecule_methods = _class_method_names(PRIVATE_STUB, "PyMolecule") + assert { + "__init__", + "from_arrays", + "to_owned", + "_ase_builtin_tuple_fast", + "_ase_payload", + "__getitem__", + } <= molecule_methods + + database_methods = _class_method_names(PRIVATE_STUB, "PyAtomDatabase") + assert {"add_arrays_batch", "get_molecules_flat"} <= database_methods + + text = PRIVATE_STUB.read_text(encoding="utf-8") + assert 'compression: str = "none"' in text + assert "overwrite: bool = False" in text + assert "Parameters" in (_class_docstring(PRIVATE_STUB, "PyAtom") or "") + assert "Atomic positions" in (_class_docstring(PRIVATE_STUB, "PyMolecule") or "") + assert "Compression type" in (_class_docstring(PRIVATE_STUB, "PyAtomDatabase") or "") + + +def test_public_stub_exposes_flat_batch_reader() -> None: + database_methods = _class_method_names(PUBLIC_STUB, "Database") + assert "get_molecules_flat" in database_methods + + +def test_hub_stub_has_public_docstrings() -> None: + reader_doc = _class_docstring(HUB_STUB, "AtompackReader") or "" + assert "lexicographically ordered shard set" in reader_doc + + download_doc = _function_docstring(HUB_STUB, "download") or "" + assert "shard directory" in download_doc + + upload_doc = _function_docstring(HUB_STUB, "upload") or "" + assert "Xet" in upload_doc + assert "use_xet" in _function_arg_names(HUB_STUB, "upload") + + open_doc = _function_docstring(HUB_STUB, "open") or "" + assert "download" in open_doc.lower() + + open_path_doc = _function_docstring(HUB_STUB, "open_path") or "" + assert "Directories are scanned recursively" in open_path_doc