Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ help:
@echo " make fmt Format Rust + Python"
@echo " make fmt-check Check formatting"
@echo " make lint Run linters"
@echo " make test Run tests (Rust + Python)"
@echo " make test Run default tests (Rust + Python core suite)"
@echo " make ci Run all CI checks"
@echo ""
@echo "Language-specific:"
Expand All @@ -27,7 +27,7 @@ help:
@echo " make py-fmt uv ruff format python (atompack-py)"
@echo " make py-lint uv ruff check python (atompack-py)"
@echo " make py-test uv pytest core suite (atompack-py/tests without benchmark tooling)"
@echo " make py-test-benchmarks uv pytest benchmark tooling suite (atompack-py/tests/benchmarks)"
@echo " make py-test-benchmarks uv pytest benchmark tooling suite (manual only)"
@echo " make py-dev uv maturin develop (atompack-py)"
@echo " make py-dev-release uv maturin develop -r (atompack-py)"
@echo ""
Expand Down Expand Up @@ -107,10 +107,10 @@ fmt-check: rust-fmt-check py-fmt-check

lint: rust-lint py-lint

test: rust-test py-test py-test-benchmarks
test: rust-test py-test

ci-rust: rust-fmt-check rust-lint rust-test

ci-py: py-fmt-check py-lint py-test py-test-benchmarks
ci-py: py-fmt-check py-lint py-test

ci: ci-rust ci-py
7 changes: 4 additions & 3 deletions atompack-py/benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,15 @@ uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_bench
uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --codec zstd:3
uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --bench 2 --sizes 50000 500000 5000000
uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --bench 3 --batch-scale-atoms 64 256 --batch-scale-sizes 256 512 1024 2048 4096 10000
uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --scratch-dir /ogre/atompack-v2/tmp
uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --scratch-dir /tmp/atompack-bench
uv run --no-sync --project atompack-py python atompack-py/benchmarks/write_benchmark.py --out atompack-py/benchmarks/write_results.json
```

Notes:

- Temporary benchmark datasets default to `/ogre/tmp`; override with
`--scratch-dir ...` when you want a different filesystem.
- Benchmark datasets default to a temp-backed `atompack-benchmarks` directory;
override with `--scratch-dir ...` or `ATOMPACK_BENCHMARK_SCRATCH` when you
want a different filesystem.
- This script defaults to `--codec none` so raw write throughput is measured unless you explicitly opt into compression.
- Pass `--codec lz4` or `--codec zstd:3` when you want compressed-write numbers.
- Atompack now auto-sizes its write batch by atom count unless you pass
Expand Down
4 changes: 2 additions & 2 deletions atompack-py/benchmarks/atompack_batch_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

import atompack

from benchmark import _n_mols_for_atoms, _read_sample, bench, create_atompack_db
from benchmark import DEFAULT_SCRATCH, _n_mols_for_atoms, _read_sample, bench, create_atompack_db

DEFAULT_ATOMS = [64, 256, 512]
DEFAULT_BATCH_SIZES = [32, 128, 512, 2048]
Expand Down Expand Up @@ -241,7 +241,7 @@ def main(argv: list[str] | None = None) -> int:
parser.add_argument("--trials", type=int, default=5)
parser.add_argument("--batch-sizes", type=int, nargs="+", default=DEFAULT_BATCH_SIZES)
parser.add_argument("--threads", nargs="+", default=DEFAULT_THREADS)
parser.add_argument("--scratch-dir", type=Path, default=Path("/ogre/atompack-v2/benchmarks"))
parser.add_argument("--scratch-dir", type=Path, default=DEFAULT_SCRATCH)
parser.add_argument("--compression", type=str, default=DEFAULT_CODEC, choices=["none", "lz4", "zstd"])
parser.add_argument("--level", type=int, default=DEFAULT_LEVEL)
parser.add_argument("--seed", type=int, default=1234)
Expand Down
17 changes: 11 additions & 6 deletions atompack-py/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,12 +216,17 @@ def _ensure_scratch_has_space(path: Path, *, context: str, min_free_bytes: int =
"Free space on that filesystem or use --scratch-dir on a different disk."
)

DEFAULT_SCRATCH = Path("/ogre/atompack-v2/benchmarks")
DEFAULT_OMAT_ATOMPACK = Path(
"/ogre/atompack-v2/omat/train_50m_atompack_single_v3_soa/part_0000.atp"
)
DEFAULT_OMAT_LMDB_PACKED = Path("/ogre/atompack-v2/omat/train_50m_lmdb_single_v3")
DEFAULT_OMAT_LMDB_PICKLE = Path("/ogre/atompack-v2/omat/train_50m_lmdb_pickle_style_v1")
DEFAULT_SCRATCH_ENV = "ATOMPACK_BENCHMARK_SCRATCH"


def _default_scratch_dir() -> Path:
override = os.environ.get(DEFAULT_SCRATCH_ENV)
if override:
return Path(override).expanduser()
return Path(tempfile.gettempdir()) / "atompack-benchmarks"


DEFAULT_SCRATCH = _default_scratch_dir()

# Default molecule counts per atom count — sized so datasets are large enough
# to exceed page cache and stress real I/O.
Expand Down
11 changes: 9 additions & 2 deletions atompack-py/benchmarks/write_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@
HDF5_SOA_CHUNK_SIZE = 256
ASE_WRITE_MAX = 5_000
DEFAULT_WRITE_CODEC = "none"
DEFAULT_SCRATCH_DIR = "/ogre/tmp"
DEFAULT_SCRATCH_ENV = "ATOMPACK_BENCHMARK_SCRATCH"
DEFAULT_SCRATCH_DIR = str(
Path(os.environ.get(DEFAULT_SCRATCH_ENV, tempfile.gettempdir())).expanduser()
/ "atompack-benchmarks"
)
DEFAULT_ATOMPACK_TARGET_BATCH_MIB = 16.0
DEFAULT_BATCH_SWEEP_SIZES = [256, 512, 1024, 2048, 4096, WRITE_BATCH_SIZE]
DEFAULT_WARMUP_TRIALS = 1
Expand Down Expand Up @@ -1252,7 +1256,10 @@ def main(argv: list[str] | None = None) -> int:
)
parser.add_argument(
"--scratch-dir", type=str, default=DEFAULT_SCRATCH_DIR,
help=f"Directory for temporary datasets (default: {DEFAULT_SCRATCH_DIR}).",
help=(
"Directory for temporary datasets "
f"(default: {DEFAULT_SCRATCH_DIR}; override via {DEFAULT_SCRATCH_ENV})."
),
)
parser.add_argument(
"--out", type=Path, default=None,
Expand Down
37 changes: 0 additions & 37 deletions atompack-py/tests/benchmarks/test_publication_surface.py

This file was deleted.

93 changes: 93 additions & 0 deletions atompack-py/tests/test_stub_surface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

import ast
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]
PRIVATE_STUB = ROOT / "python" / "atompack" / "_atompack_rs.pyi"
PUBLIC_STUB = ROOT / "python" / "atompack" / "__init__.pyi"
HUB_STUB = ROOT / "python" / "atompack" / "hub.pyi"


def _class_method_names(path: Path, class_name: str) -> set[str]:
tree = ast.parse(path.read_text(encoding="utf-8"))
for node in tree.body:
if isinstance(node, ast.ClassDef) and node.name == class_name:
return {
child.name
for child in node.body
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef))
}
raise AssertionError(f"Class {class_name!r} not found in {path}")


def _class_docstring(path: Path, class_name: str) -> str | None:
tree = ast.parse(path.read_text(encoding="utf-8"))
for node in tree.body:
if isinstance(node, ast.ClassDef) and node.name == class_name:
return ast.get_docstring(node)
raise AssertionError(f"Class {class_name!r} not found in {path}")


def _function_docstring(path: Path, function_name: str) -> str | None:
tree = ast.parse(path.read_text(encoding="utf-8"))
for node in tree.body:
if isinstance(node, ast.FunctionDef) and node.name == function_name:
return ast.get_docstring(node)
raise AssertionError(f"Function {function_name!r} not found in {path}")


def _function_arg_names(path: Path, function_name: str) -> list[str]:
tree = ast.parse(path.read_text(encoding="utf-8"))
for node in tree.body:
if isinstance(node, ast.FunctionDef) and node.name == function_name:
args = [arg.arg for arg in node.args.args]
args.extend(arg.arg for arg in node.args.kwonlyargs)
return args
raise AssertionError(f"Function {function_name!r} not found in {path}")


def test_private_stub_tracks_low_level_surface() -> None:
molecule_methods = _class_method_names(PRIVATE_STUB, "PyMolecule")
assert {
"__init__",
"from_arrays",
"to_owned",
"_ase_builtin_tuple_fast",
"_ase_payload",
"__getitem__",
} <= molecule_methods

database_methods = _class_method_names(PRIVATE_STUB, "PyAtomDatabase")
assert {"add_arrays_batch", "get_molecules_flat"} <= database_methods

text = PRIVATE_STUB.read_text(encoding="utf-8")
assert 'compression: str = "none"' in text
assert "overwrite: bool = False" in text
assert "Parameters" in (_class_docstring(PRIVATE_STUB, "PyAtom") or "")
assert "Atomic positions" in (_class_docstring(PRIVATE_STUB, "PyMolecule") or "")
assert "Compression type" in (_class_docstring(PRIVATE_STUB, "PyAtomDatabase") or "")


def test_public_stub_exposes_flat_batch_reader() -> None:
database_methods = _class_method_names(PUBLIC_STUB, "Database")
assert "get_molecules_flat" in database_methods


def test_hub_stub_has_public_docstrings() -> None:
reader_doc = _class_docstring(HUB_STUB, "AtompackReader") or ""
assert "lexicographically ordered shard set" in reader_doc

download_doc = _function_docstring(HUB_STUB, "download") or ""
assert "shard directory" in download_doc

upload_doc = _function_docstring(HUB_STUB, "upload") or ""
assert "Xet" in upload_doc
assert "use_xet" in _function_arg_names(HUB_STUB, "upload")

open_doc = _function_docstring(HUB_STUB, "open") or ""
assert "download" in open_doc.lower()

open_path_doc = _function_docstring(HUB_STUB, "open_path") or ""
assert "Directories are scanned recursively" in open_path_doc
Loading