Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
986470a
Initial commit for adding NERSC IRI-API support alongside SFAPI for j…
davramov Mar 16, 2026
0512e58
Adding an abstraction for _submit_job() and _wait_for_job() that use …
davramov Mar 16, 2026
fe27519
moving NERSCLoginMethod(Enum) to the job_controller.py module
davramov Mar 17, 2026
eaf02fe
Removed NERSCLoginMethod(Enum) from nersc.py. Created a temporary tes…
davramov Mar 17, 2026
be2c571
Updating pytests
davramov Mar 17, 2026
cf15c20
Updating multires() method to use the generic _submit_job() and _wait…
davramov Mar 17, 2026
d0e8068
successfully ran reconstruction using the IRI-API
davramov Mar 30, 2026
6b8c843
removing token.py and moving the logic to get_globus_token.py
davramov Apr 1, 2026
27ea5b2
moving get_globus_token.py to orchestration/globus/ to be used as a m…
davramov Apr 1, 2026
bad1db5
Cleaning up nersc.py
davramov Apr 1, 2026
da16341
cleaning up old commented code
davramov Apr 1, 2026
d1d65ad
Updating unit tests
davramov Apr 1, 2026
dda78c5
updating login script
davramov Apr 7, 2026
596106a
Rebasing and including segmentation flows as part of iri/sfapi abstra…
davramov Apr 7, 2026
9da5e6e
commenting out petiole segmentation prune block for now, while testing
davramov Apr 13, 2026
ef227af
Making reconstruction run as a task
davramov Apr 13, 2026
b4558be
Making IRIAPI the default login method for now
davramov Apr 13, 2026
241c889
adjusting queue name and account
davramov Apr 14, 2026
c9e7b14
Making the IRI job submission read sbatch settings
davramov Apr 14, 2026
698d243
Switching to debug queue/2 nodes for the IRI demo
davramov Apr 14, 2026
6e01f8f
check globus token expiration before minting a new one. avoids race c…
davramov Apr 14, 2026
f4388e8
Fixing IRIAPI bugs, also commenting out Globus transfers for now
davramov Apr 14, 2026
a490bfe
removing IRIAPI client ID from nersc.py, since it is only used in glo…
davramov Apr 15, 2026
041f336
Updating logger comments
davramov Apr 23, 2026
863b24e
connecting to AmSC MLflow service
davramov Apr 24, 2026
0144f52
removing old commented code
davramov Apr 24, 2026
0ad03ac
updating pytest
davramov Apr 24, 2026
9d8e2c1
linting
davramov Apr 24, 2026
e4f4e08
adjusting import in pytest to avoid error on github that did not occu…
davramov Apr 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
BEAMLINE=8.3.2
GLOBUS_CLIENT_ID=<globus_client_id>
GLOBUS_CLIENT_SECRET=<globus_client_secret>
PREFECT_API_URL=<url_of_prefect_server>
PREFECT_API_KEY=<prefect_client_secret>
PUSHGATEWAY_URL=<url_of_pushgateway_server>
JOB_NAME=<jobname_for_pushgateway>
INSTANCE_LABEL=<label_for_pushgateway>
INSTANCE_LABEL=<label_for_pushgateway>
PATH_NERSC_CLIENT_ID=<path_to_nersc_client_id>
PATH_NERSC_PRI_KEY=<path_to_nersc_priv_key>
NERSC_USERNAME=<nersc_username>
AMSC_API_KEY=<amsc_api_key> # found here: https://profile.american-science-cloud.org/
30 changes: 17 additions & 13 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,20 +173,24 @@ mlflow:
staging:
tracking_uri: https://mlflow-staging.computing.als.lbl.gov
registry_uri: https://mlflow-staging.computing.als.lbl.gov
amsc:
tracking_uri: https://mlflow.american-science-cloud.org/
registry_uri: https://mlflow.american-science-cloud.org/
experiment_name: als-bl832-models

hpc_submission_settings832:
# ── RECON + MULTIRES SETTINGS ───────────────────────────────────────────────
nersc_reconstruction:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: realtime
qos: debug
account: als
reservation: "_CAP_TOMO_MOON_CPU"
num_nodes: 16
reservation: ""
num_nodes: 2
cpus-per-task: 128
walltime: "0:30:00"
nersc_multiresolution:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: realtime
qos: debug
account: als
reservation: ""
cpus-per-task: 128
Expand All @@ -195,15 +199,15 @@ hpc_submission_settings832:
# ── PETIOLE SEGMENTATION SETTINGS ───────────────────────────────────────────
nersc_segmentation_sam3:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
qos: debug
account: als
constraint: gpu
reservation: ""
num_nodes: 4
num_nodes: 2
ntasks-per-node: 1
gpus-per-node: 4
cpus-per-task: 128
walltime: "00:59:00"
walltime: "00:30:00"
# ── Inference parameters ──────────────────────────────────────────────────
script_name: "src/inference_v6.py"
batch_size: 1
Expand All @@ -226,16 +230,16 @@ hpc_submission_settings832:
finetuned_checkpoint_path: /global/cfs/cdirs/als/data_mover/8.3.2/tomography_segmentation_scripts/sam3_finetune/sam3/checkpoint_v6.pt
nersc_segmentation_dinov3:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
qos: debug
account: als
constraint: gpu
reservation: ""
num_nodes: 4
num_nodes: 2
ntasks-per-node: 1
nproc_per_node: 4
gpus-per-node: 4
cpus-per-task: 128
walltime: "00:59:00"
walltime: "00:30:00"
# ── Inference parameters ──────────────────────────────────────────────────
script_name: "src.inference_dino_v1"
batch_size: 4
Expand All @@ -246,14 +250,14 @@ hpc_submission_settings832:
dino_checkpoint_path: /global/cfs/cdirs/als/data_mover/8.3.2/tomography_segmentation_scripts/dino/best.ckpt
nersc_combine_segmentations:
# ── SLURM resource allocation ─────────────────────────────────────────────
qos: regular
qos: debug
account: als
constraint: cpu
reservation: ""
num_nodes: 4
num_nodes: 2
ntasks: 128
cpus-per-task: 1
walltime: "01:00:00"
walltime: "00:30:00"
# ── Combination parameters ────────────────────────────────────────────────
script_name: "src.combine_sam_dino_v3"
dilate_px: 5
Expand Down
Loading
Loading