Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,13 @@ mlflow:
registry_uri: https://mlflow-staging.computing.als.lbl.gov

hpc_submission_settings832:
# ── RECON + MULTIRES SETTINGS ───────────────────────────────────────────────
nersc_reconstruction:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: realtime
account: als
reservation: ""
num_nodes: 4
reservation: "_CAP_TOMO_MOON_CPU"
num_nodes: 16
cpus-per-task: 128
walltime: "0:30:00"
nersc_multiresolution:
Expand All @@ -190,6 +191,8 @@ hpc_submission_settings832:
reservation: ""
cpus-per-task: 128
walltime: "0:15:00"

# ── PETIOLE SEGMENTATION SETTINGS ───────────────────────────────────────────
nersc_segmentation_sam3:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
Expand Down Expand Up @@ -258,3 +261,26 @@ hpc_submission_settings832:
cfs_path: /global/cfs/cdirs/als/data_mover/8.3.2
conda_env_path: /global/cfs/cdirs/als/data_mover/8.3.2/envs/dino_demo
seg_scripts_dir: /global/cfs/cdirs/als/data_mover/8.3.2/tomography_segmentation_scripts/inference_latest/forge_feb_seg_model_demo

# ── MOON SEGMENTATION SETTINGS ───────────────────────────────────────────
nersc_segmentation_dinov3_moon:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
account: als
constraint: gpu
reservation: "_CAP_TOMO_MOON_GPU"
num_nodes: 4
ntasks-per-node: 1
nproc_per_node: 4
gpus-per-node: 4
cpus-per-task: 128
walltime: "00:59:00"
# ── Inference parameters ──────────────────────────────────────────────────
script_name: "src.inference_dino_v2"
project: "moon"
batch_size: 4
# ── Paths ─────────────────────────────────────────────────────────────────
cfs_path: /global/cfs/cdirs/als/data_mover/8.3.2
conda_env_path: /global/cfs/cdirs/als/data_mover/8.3.2/envs/dino_demo
seg_scripts_dir: /global/cfs/cdirs/als/data_mover/8.3.2/tomography_segmentation_scripts/moon_seg/forge_feb_seg_model_demo/
dino_checkpoint_path: /global/cfs/cdirs/als/data_mover/8.3.2/tomography_segmentation_scripts/dino/best_moon.ckpt
90 changes: 89 additions & 1 deletion orchestration/_tests/test_bl832/test_nersc.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ def test_nersc_segmentation_dinov3_task_success(mocker, mock_config832):
config=mock_config832
)

mock_controller.segmentation_dinov3.assert_called_once_with(recon_folder_path="folder/recfile")
mock_controller.segmentation_dinov3.assert_called_once_with(recon_folder_path="folder/recfile", project="petiole")
assert result is True


Expand Down Expand Up @@ -706,3 +706,91 @@ def test_petiole_segment_flow_recon_failure(mocker, mock_config832):

with pytest.raises(ValueError, match="Reconstruction at NERSC Failed"):
nersc_petiole_segment_flow(file_path="folder/file.h5", num_nodes=4, config=None)

# ──────────────────────────────────────────────────────────────────────────────
# nersc_moon_segment_flow (recon + DINOv3-moon only, no SAM3, no combine)
# ──────────────────────────────────────────────────────────────────────────────


def test_moon_segment_flow_succeeds(mocker, mock_config832, mock_recon_success):
"""Recon + DINOv3-moon both succeed → flow returns True."""
from orchestration.flows.bl832.nersc import nersc_moon_segment_flow

mock_controller = mocker.MagicMock()
mock_controller.reconstruct.return_value = mock_recon_success
mocker.patch("orchestration.flows.bl832.nersc.get_controller", return_value=mock_controller)

mock_globus_transfer = mocker.patch("orchestration.flows.bl832.nersc.globus_transfer_task")
mock_globus_transfer.submit.return_value = _make_future(mocker, True)

mocker.patch("orchestration.flows.bl832.nersc.get_prune_controller", return_value=mocker.MagicMock())

mock_dinov3_task = mocker.patch("orchestration.flows.bl832.nersc.nersc_segmentation_dinov3_task")
mock_dinov3_task.submit.return_value = _make_future(mocker, True)

result = nersc_moon_segment_flow(file_path="folder/file.h5", num_nodes=4, config=None)

assert result is True
mock_controller.reconstruct.assert_called_once()
mock_dinov3_task.submit.assert_called_once_with(
recon_folder_path="folder/recfile", config=mock_config832, project="moon"
)


def test_moon_segment_flow_seg_failure(mocker, mock_config832, mock_recon_success):
"""Recon succeeds but DINOv3-moon fails → flow returns False."""
from orchestration.flows.bl832.nersc import nersc_moon_segment_flow

mock_controller = mocker.MagicMock()
mock_controller.reconstruct.return_value = mock_recon_success
mocker.patch("orchestration.flows.bl832.nersc.get_controller", return_value=mock_controller)

mock_globus_transfer = mocker.patch("orchestration.flows.bl832.nersc.globus_transfer_task")
mock_globus_transfer.submit.return_value = _make_future(mocker, False)

mocker.patch("orchestration.flows.bl832.nersc.get_prune_controller", return_value=mocker.MagicMock())

mock_dinov3_task = mocker.patch("orchestration.flows.bl832.nersc.nersc_segmentation_dinov3_task")
mock_dinov3_task.submit.return_value = _make_future(mocker, False)

result = nersc_moon_segment_flow(file_path="folder/file.h5", num_nodes=4, config=None)

assert result is False


def test_moon_segment_flow_recon_failure(mocker, mock_config832):
"""Recon failure should raise ValueError immediately."""
from orchestration.flows.bl832.nersc import nersc_moon_segment_flow

mock_controller = mocker.MagicMock()
mock_controller.reconstruct.return_value = {"success": False, "job_id": None, "timing": None}
mocker.patch("orchestration.flows.bl832.nersc.get_controller", return_value=mock_controller)
mocker.patch("orchestration.flows.bl832.nersc.globus_transfer_task")
mocker.patch("orchestration.flows.bl832.nersc.get_prune_controller", return_value=mocker.MagicMock())

with pytest.raises(ValueError, match="Reconstruction at NERSC failed"):
nersc_moon_segment_flow(file_path="folder/file.h5", num_nodes=4, config=None)


def test_moon_segment_flow_no_sam3_no_combine(mocker, mock_config832, mock_recon_success):
"""SAM3 and combine tasks should never be called in the moon flow."""
from orchestration.flows.bl832.nersc import nersc_moon_segment_flow

mock_controller = mocker.MagicMock()
mock_controller.reconstruct.return_value = mock_recon_success
mocker.patch("orchestration.flows.bl832.nersc.get_controller", return_value=mock_controller)

mock_globus_transfer = mocker.patch("orchestration.flows.bl832.nersc.globus_transfer_task")
mock_globus_transfer.submit.return_value = _make_future(mocker, True)

mocker.patch("orchestration.flows.bl832.nersc.get_prune_controller", return_value=mocker.MagicMock())

mock_sam3_task = mocker.patch("orchestration.flows.bl832.nersc.nersc_segmentation_sam3_task")
mock_combine_task = mocker.patch("orchestration.flows.bl832.nersc.nersc_combine_segmentations_task")
mock_dinov3_task = mocker.patch("orchestration.flows.bl832.nersc.nersc_segmentation_dinov3_task")
mock_dinov3_task.submit.return_value = _make_future(mocker, True)

nersc_moon_segment_flow(file_path="folder/file.h5", num_nodes=4, config=None)

mock_sam3_task.submit.assert_not_called()
mock_combine_task.submit.assert_not_called()
1 change: 1 addition & 0 deletions orchestration/flows/bl832/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ def _beam_specific_config(self) -> None:
self.nersc_segment_sam3_settings = self.config["hpc_submission_settings832"]["nersc_segmentation_sam3"]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was wondering if we should add petiole or synaps-i to nersc_segment_sam3_settings, nersc_segment_dinov3_settings, and nersc_combine_segmentation_settings.

self.nersc_segment_dinov3_settings = self.config["hpc_submission_settings832"]["nersc_segmentation_dinov3"]
self.nersc_combine_segmentation_settings = self.config["hpc_submission_settings832"]["nersc_combine_segmentations"]
self.nersc_segment_dinov3_moon_settings = self.config["hpc_submission_settings832"]["nersc_segmentation_dinov3_moon"]
14 changes: 14 additions & 0 deletions orchestration/flows/bl832/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ class FlowParameterMapper:
"num_nodes",
"config"],
"nersc_petiole_segment_flow/nersc_petiole_segment_flow": [
"file_path",
"num_nodes",
"config"],
"nersc_moon_segment_flow/nersc_moon_segment_flow": [
"file_path",
"num_nodes",
"config"]
Expand Down Expand Up @@ -65,6 +69,7 @@ def setup_decision_settings(
alcf_recon: bool,
nersc_recon: bool,
nersc_petiole_segment: bool,
nersc_moon_segment: bool,
new_file_832: bool
) -> dict:
"""
Expand All @@ -73,6 +78,7 @@ def setup_decision_settings(
:param alcf_recon: Boolean indicating whether to run the ALCF reconstruction flow.
:param nersc_recon: Boolean indicating whether to run the NERSC reconstruction flow.
:param nersc_petiole_segment: Boolean indicating whether to run the NERSC petiole segmentation flow.
:param nersc_moon_segment: Boolean indicating whether to run the NERSC moon segmentation flow.
:param new_file_832: Boolean indicating whether to move files to NERSC.
:return: A dictionary containing the settings for each flow.
"""
Expand All @@ -81,12 +87,14 @@ def setup_decision_settings(
logger.info(f"Setting up decision settings: alcf_recon={alcf_recon}, "
f"nersc_recon={nersc_recon}, "
f"nersc_petiole_segment={nersc_petiole_segment}, "
f"nersc_moon_segment={nersc_moon_segment}, "
f"new_file_832={new_file_832}")
# Define which flows to run based on the input settings
settings = {
"alcf_recon_flow/alcf_recon_flow": alcf_recon,
"nersc_recon_flow/nersc_recon_flow": nersc_recon,
"nersc_petiole_segment_flow/nersc_petiole_segment_flow": nersc_petiole_segment,
"nersc_moon_segment_flow/nersc_moon_segment_flow": nersc_moon_segment,
"new_832_file_flow/new_file_832": new_file_832
}
# Save the settings in a JSON block for later retrieval by other flows
Expand Down Expand Up @@ -172,6 +180,12 @@ async def dispatcher(
run_recon_flow_async("nersc_petiole_segment_flow/nersc_petiole_segment_flow", nersc_petiole_segment_params)
)

if decision_settings.get("nersc_moon_segment_flow/nersc_moon_segment_flow"):
moon_params = FlowParameterMapper.get_flow_parameters(
"nersc_moon_segment_flow/nersc_moon_segment_flow", available_params
)
tasks.append(run_recon_flow_async("nersc_moon_segment_flow/nersc_moon_segment_flow", moon_params))

# Run ALCF and NERSC flows in parallel, if any
if tasks:
try:
Expand Down
Loading
Loading