Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cd7a881
fix(jobs): Separate compute and executor payload shapes
matthewgrossman Jun 15, 2026
665e334
self review
matthewgrossman Jun 15, 2026
af07f83
fixes
matthewgrossman Jun 15, 2026
0b59a34
Merge branch 'main' into AIRCORE-513-align-executorkind-with-original…
matthewgrossman Jun 15, 2026
78191cb
make update-sdk
matthewgrossman Jun 15, 2026
03120ab
add profiles
matthewgrossman Jun 15, 2026
91e6e65
fixes
matthewgrossman Jun 15, 2026
47c2516
self code review
matthewgrossman Jun 15, 2026
3b920b8
lint
matthewgrossman Jun 15, 2026
ca3bebc
fix(jobs): update config files and clean up PR review feedback
matthewgrossman Jun 15, 2026
aed8e5d
fix(jobs): fix lint errors and re-export FilesetMetadata from types.f…
matthewgrossman Jun 15, 2026
6552ef1
style: format test files
matthewgrossman Jun 15, 2026
ba8ec99
fix(lint): add unused-type-ignore-comment to ty ignore list
matthewgrossman Jun 15, 2026
a4f2afc
fix(tests): add kind to e2e executor dicts, suppress ty invalid-key i…
matthewgrossman Jun 15, 2026
ebee99c
fix(tests): convert e2e tests to subprocess executors, add kind to in…
matthewgrossman Jun 16, 2026
1cbe6a5
style: format test_job_search.py
matthewgrossman Jun 16, 2026
4eae0b1
Merge branch 'main' into AIRCORE-513-align-executorkind-with-original…
matthewgrossman Jun 16, 2026
0ff5d89
feat(jobs): thread resolved kind+profile through compile pipeline
matthewgrossman Jun 16, 2026
ae1b784
fix: resolve merge conflict in e2e/test_jobs.py
matthewgrossman Jun 16, 2026
85c29de
fix(jobs): default kind to "container" in compile signatures
matthewgrossman Jun 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 49 additions & 65 deletions e2e/test_jobs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
"""E2E tests for platform jobs.

These tests submit jobs with CPUExecutionProviderSpec (container + command).
The container image is omitted so that:
- On subprocess mode, the cpu→subprocess translation discards it anyway.
- On Kubernetes/Docker, the execution profile's default_task_image is used.
These tests submit jobs with SubprocessExecutionProviderSpec (host command).
The e2e test environment runs against the subprocess backend.
Comment on lines +3 to +4

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Subprocess-profile precondition is unmet in CI, so these tests fail at create-job time.

This suite now hard-depends on cpu/subprocess, but CI currently returns 422 (The execution profile 'cpu/subprocess' ... does not exist). Provision that profile in the e2e runtime config, or preflight-skip subprocess-only tests when it is absent.

Also applies to: 57-60

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@e2e/test_jobs.py` around lines 3 - 4, The tests in test_jobs.py that use
SubprocessExecutionProviderSpec depend on the cpu/subprocess execution profile
which does not exist in the CI environment, causing create-job operations to
fail with a 422 error. Fix this by adding a preflight check before the
subprocess-dependent tests (including those at lines 57-60) that verifies the
cpu/subprocess profile is available in the execution environment, and skip those
tests with an appropriate skip marker if the profile is absent. Alternatively,
provision the cpu/subprocess profile in the e2e runtime configuration so that it
is available when tests execute.

Source: Pipeline failures


Ported from Platform-Deploy e2e/test_jobs.py, adapted for the SDK's TypedDict
param types and filtered to tests that work without Docker.
Expand Down Expand Up @@ -56,10 +54,9 @@ def test_basic_platform_job_lifecycle(sdk: NeMoPlatform, workspace: str):
{
"name": "echo-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["echo", "Hello from e2e test!"],
},
"command": ["echo", "Hello from e2e test!"],
},
},
],
Expand Down Expand Up @@ -99,10 +96,9 @@ def test_job_logs_across_multiple_batches(sdk: NeMoPlatform, workspace: str):
{
"name": "multi-log-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", log_command],
},
"command": ["sh", "-c", log_command],
},
},
],
Expand Down Expand Up @@ -138,10 +134,9 @@ def test_job_config_is_readable(sdk: NeMoPlatform, workspace: str):
{
"name": "config-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", "echo 'Step config:'; cat $NEMO_JOB_STEP_CONFIG_FILE_PATH;"],
},
"command": ["sh", "-c", "echo 'Step config:'; cat $NEMO_JOB_STEP_CONFIG_FILE_PATH;"],
},
"config": {
"message": "Hello from job config!",
Expand Down Expand Up @@ -172,27 +167,25 @@ def test_job_passing_data_between_steps(sdk: NeMoPlatform, workspace: str):
{
"name": "generate-data-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": [
"sh",
"-c",
"echo 'Data from first step' > $NEMO_JOB_PERSISTENT_JOB_STORAGE_PATH/data.txt",
],
},
"command": [
"sh",
"-c",
"echo 'Data from first step' > $NEMO_JOB_PERSISTENT_JOB_STORAGE_PATH/data.txt",
],
},
},
{
"name": "consume-data-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": [
"sh",
"-c",
"echo 'Consuming data:'; cat $NEMO_JOB_PERSISTENT_JOB_STORAGE_PATH/data.txt",
],
},
"command": [
"sh",
"-c",
"echo 'Consuming data:'; cat $NEMO_JOB_PERSISTENT_JOB_STORAGE_PATH/data.txt",
],
},
},
],
Expand Down Expand Up @@ -228,10 +221,9 @@ def test_job_using_secret_environment_variable(sdk: NeMoPlatform, workspace: str
{
"name": "secret-envvar-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", 'echo "Secret value is: $SECRET_ENV_VAR"'],
},
"command": ["sh", "-c", 'echo "Secret value is: $SECRET_ENV_VAR"'],
},
"environment": [
{
Expand Down Expand Up @@ -276,10 +268,9 @@ def test_job_with_expected_failure(sdk: NeMoPlatform, workspace: str):
{
"name": "failing-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", "echo 'This step will fail'; exit 1;"],
},
"command": ["sh", "-c", "echo 'This step will fail'; exit 1;"],
},
},
],
Expand All @@ -305,10 +296,9 @@ def test_job_cancel_immediately(sdk: NeMoPlatform, workspace: str):
{
"name": "long-running-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", "sleep 60"],
},
"command": ["sh", "-c", "sleep 60"],
},
},
],
Expand All @@ -334,10 +324,9 @@ def test_job_cancel_once_active(sdk: NeMoPlatform, workspace: str):
{
"name": "long-running-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", "sleep 300"],
},
"command": ["sh", "-c", "sleep 300"],
},
},
],
Expand Down Expand Up @@ -374,10 +363,9 @@ def test_job_pause_resume(sdk: NeMoPlatform, workspace: str):
{
"name": "long-running-step-pause-resume",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", "sleep 300"],
},
"command": ["sh", "-c", "sleep 300"],
},
},
],
Expand Down Expand Up @@ -415,10 +403,9 @@ def test_job_pause_and_cancel(sdk: NeMoPlatform, workspace: str):
{
"name": "long-running-step-pause-cancel",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": ["sh", "-c", "sleep 300"],
},
"command": ["sh", "-c", "sleep 300"],
},
},
],
Expand Down Expand Up @@ -451,29 +438,27 @@ def test_job_using_additional_volume(sdk: NeMoPlatform, workspace: str):
{
"name": "write-data",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": [
"sh",
"-c",
"echo 'Hello, World!' > /mnt/additional_storage/shared_data.txt; "
"echo 'Successfully wrote data to persistent storage';",
],
},
"command": [
"sh",
"-c",
"echo 'Hello, World!' > /mnt/additional_storage/shared_data.txt; "
"echo 'Successfully wrote data to persistent storage';",
],
},
},
{
"name": "read-data",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"command": [
"sh",
"-c",
"cat /mnt/additional_storage/shared_data.txt; "
"echo 'Successfully read data from persistent storage';",
],
},
"command": [
"sh",
"-c",
"cat /mnt/additional_storage/shared_data.txt; "
"echo 'Successfully read data from persistent storage';",
],
},
},
],
Expand Down Expand Up @@ -506,11 +491,10 @@ def test_job_invalid_image_format(sdk: NeMoPlatform, workspace: str, bad_image:
{
"name": "bad-image-step",
"executor": {
"kind": "subprocess",
"provider": "cpu",
"container": {
"image": bad_image,
"command": ["echo", "This should not run"],
},
"image": bad_image,
"command": ["echo", "This should not run"],
},
},
],
Expand Down
Loading
Loading