Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e89279f
feat: add agent-context CLI introspection
johnnygreco Feb 13, 2026
e0ef33d
fix: correct agent-context field descriptions in column configs
johnnygreco Feb 15, 2026
914f3c4
feat: enhance pydantic and method inspectors with richer field details
johnnygreco Feb 16, 2026
ac91dc5
feat: add Field descriptions and docstrings to config models
johnnygreco Feb 16, 2026
311201c
feat: enhance formatters with rich field display, dedup, and new form…
johnnygreco Feb 16, 2026
55bb914
feat: add discovery for namespace tree, interface classes, and imports
johnnygreco Feb 16, 2026
de03cdb
refactor: rename agent-context CLI to introspect and add new subcommands
johnnygreco Feb 16, 2026
f18506c
test: add CLI usage scenario integration tests
johnnygreco Feb 16, 2026
fe2d87e
refactor: replace introspect command with types and reference command…
johnnygreco Feb 16, 2026
37ae075
refactor: update formatters and tests for new types/reference CLI str…
johnnygreco Feb 16, 2026
78950d9
drop stale review
johnnygreco Feb 16, 2026
52fd3aa
refactor: replace hardcoded discovery functions with introspection-ba…
johnnygreco Feb 16, 2026
35d10e2
fix: improve introspection defaults and depth checks
johnnygreco Feb 16, 2026
f7fa98d
fix: align enum output across text/json and remove dead try/except
johnnygreco Feb 16, 2026
93e0a61
fix: surface namespace import failures in debug logs
johnnygreco Feb 16, 2026
2ae47e7
sort
johnnygreco Feb 16, 2026
37a3c6c
refactor introspection discovery and normalize typed schema output
johnnygreco Feb 16, 2026
7496c8c
feat: add data-designer list-assets agent-helper command
johnnygreco Feb 17, 2026
221e1cc
refactor: replace types/reference commands with inspect agent-helper
johnnygreco Feb 18, 2026
b1778a7
feat: add list agent-helper command group
johnnygreco Feb 18, 2026
03db803
docs: clarify that constraints apply only to sampler columns
johnnygreco Feb 18, 2026
ea03168
refactor: rename inspect "builder" subcommand to "config_builder"
johnnygreco Feb 18, 2026
98288c6
docs: improve agent-helper CLI help descriptions for agent consumption
johnnygreco Feb 18, 2026
d11aa41
fix: use hyphenated config-builder for CLI subcommand name
johnnygreco Feb 18, 2026
7520939
docs: tighten agent-helper CLI help descriptions
johnnygreco Feb 18, 2026
4d19e90
docs: use column header names in list command tips for clarity
johnnygreco Feb 18, 2026
45e06a8
docs: sharpen inspect and list group-level help descriptions
johnnygreco Feb 18, 2026
e8d3708
refactor: remove related_inspect_tip from inspect command output
johnnygreco Feb 18, 2026
122346f
refactor: remove dead code from introspection services
johnnygreco Feb 18, 2026
9ad0399
fix: harden introspection service layer
johnnygreco Feb 18, 2026
fe9ebf6
refactor: clean up IntrospectionController
johnnygreco Feb 18, 2026
f1e3593
fix: harden ListController and eliminate DRY violation
johnnygreco Feb 18, 2026
8ca35c4
docs: polish help text and field description consistency
johnnygreco Feb 18, 2026
589aafa
test: add coverage for introspection edge cases and crash paths
johnnygreco Feb 18, 2026
50fffa6
refactor: simplify introspection inspectors without changing output
johnnygreco Feb 18, 2026
ad8da02
refactor: lazy-load inspect CLI commands
johnnygreco Feb 19, 2026
83ccb30
fix: restore agent-helper list CLI commands
johnnygreco Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions packages/data-designer-config/src/data_designer/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@ class SingleColumnConfig(ConfigBase, ABC):
name: Unique name of the column to be generated.
drop: If True, the column will be generated but removed from the final dataset.
Useful for intermediate columns that are dependencies for other columns.
allow_resize: If True, the column is allowed to be resized during generation.
column_type: Discriminator field that identifies the specific column type.
Subclasses must override this field to specify the column type with a `Literal` value.
"""

name: str
drop: bool = False
allow_resize: bool = False
column_type: str
name: str = Field(description="Unique name of the column to be generated")
drop: bool = Field(
default=False, description="If True, the column will be generated but removed from the final dataset"
)
allow_resize: bool = Field(
default=False, description="If True, the column is allowed to be resized during generation"
)
column_type: str = Field(description="Discriminator field that identifies the specific column type")

@staticmethod
def get_column_emoji() -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,22 @@ class SamplerColumnConfig(SingleColumnConfig):
```
"""

sampler_type: SamplerType
params: Annotated[SamplerParamsT, Discriminator("sampler_type")]
conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = {}
convert_to: str | None = None
column_type: Literal["sampler"] = "sampler"
sampler_type: SamplerType = Field(
description="Type of sampler to use (e.g., uuid, category, uniform, gaussian, person, datetime)"
)
params: Annotated[SamplerParamsT, Discriminator("sampler_type")] = Field(
description="Parameters specific to the chosen sampler type"
)
conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = Field(
default_factory=dict,
description="Optional dictionary for conditional parameters; keys are conditions, values are params to use when met",
)
convert_to: str | None = Field(
default=None, description="Optional type conversion after sampling: 'float', 'int', or 'str'"
)
column_type: Literal["sampler"] = Field(
default="sampler", description="Discriminator field, always 'sampler' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -136,14 +147,28 @@ class LLMTextColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "llm-text" for this configuration type.
"""

prompt: str
model_alias: str
system_prompt: str | None = None
multi_modal_context: list[ImageContext] | None = None
tool_alias: str | None = None
with_trace: TraceType = TraceType.NONE
extract_reasoning_content: bool = False
column_type: Literal["llm-text"] = "llm-text"
prompt: str = Field(
description="Jinja2 template for the LLM prompt; can reference other columns via {{ column_name }}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it worth warning here or somewhere else about providing fstrings here that could mess up the jinja template? I've found that cursor likes to auto convert this to f""

)
model_alias: str = Field(description="Alias of the model configuration to use for generation")
system_prompt: str | None = Field(
default=None, description="Optional system prompt to set model behavior and constraints"
)
multi_modal_context: list[ImageContext] | None = Field(
default=None, description="Optional list of ImageContext for vision model inputs"
)
tool_alias: str | None = Field(
default=None, description="Optional alias of the tool configuration to use for MCP tool calls"
)
with_trace: TraceType = Field(
default=TraceType.NONE, description="Trace capture mode: NONE, LAST_MESSAGE, or ALL_MESSAGES"
)
extract_reasoning_content: bool = Field(
default=False, description="If True, capture chain-of-thought in {name}__reasoning_content column"
)
column_type: Literal["llm-text"] = Field(
default="llm-text", description="Discriminator field, always 'llm-text' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -219,8 +244,12 @@ class LLMCodeColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

code_lang: CodeLang
column_type: Literal["llm-code"] = "llm-code"
code_lang: CodeLang = Field(
description="Target programming language or SQL dialect for code extraction from LLM response"
)
column_type: Literal["llm-code"] = Field(
default="llm-code", description="Discriminator field, always 'llm-code' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -252,8 +281,12 @@ class LLMStructuredColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

output_format: dict | type[BaseModel]
column_type: Literal["llm-structured"] = "llm-structured"
output_format: dict | type[BaseModel] = Field(
description="Pydantic model or JSON schema dict defining the expected structured output shape"
)
column_type: Literal["llm-structured"] = Field(
default="llm-structured", description="Discriminator field, always 'llm-structured' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -317,8 +350,12 @@ class LLMJudgeColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

scores: list[Score] = Field(..., min_length=1)
column_type: Literal["llm-judge"] = "llm-judge"
scores: list[Score] = Field(
..., min_length=1, description="List of Score objects defining rubric criteria for LLM judge evaluation"
)
column_type: Literal["llm-judge"] = Field(
default="llm-judge", description="Discriminator field, always 'llm-judge' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -341,10 +378,13 @@ class ExpressionColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "expression" for this configuration type.
"""

name: str
expr: str
dtype: Literal["int", "float", "str", "bool"] = "str"
column_type: Literal["expression"] = "expression"
expr: str = Field(description="Jinja2 expression to compute the column value from other columns")
dtype: Literal["int", "float", "str", "bool"] = Field(
default="str", description="Data type for expression result: 'int', 'float', 'str', or 'bool'"
)
column_type: Literal["expression"] = Field(
default="expression", description="Discriminator field, always 'expression' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -410,11 +450,13 @@ class ValidationColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "validation" for this configuration type.
"""

target_columns: list[str]
validator_type: ValidatorType
validator_params: ValidatorParamsT
target_columns: list[str] = Field(description="List of column names to validate")
validator_type: ValidatorType = Field(description="Validation method: 'code', 'local_callable', or 'remote'")
Copy link
Contributor

@nabinchha nabinchha Feb 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is 'code', 'local_callable', or 'remote' necessary since it's already strongly typed? Same comment for other similar chanages.

validator_params: ValidatorParamsT = Field(description="Validator-specific parameters (e.g., CodeValidatorParams)")
batch_size: int = Field(default=10, ge=1, description="Number of records to process in each batch")
column_type: Literal["validation"] = "validation"
column_type: Literal["validation"] = Field(
default="validation", description="Discriminator field, always 'validation' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -441,7 +483,9 @@ class SeedDatasetColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "seed-dataset" for this configuration type.
"""

column_type: Literal["seed-dataset"] = "seed-dataset"
column_type: Literal["seed-dataset"] = Field(
default="seed-dataset", description="Discriminator field, always 'seed-dataset' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -468,9 +512,11 @@ class EmbeddingColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "embedding" for this configuration type.
"""

target_column: str
model_alias: str
column_type: Literal["embedding"] = "embedding"
target_column: str = Field(description="Name of the text column to generate embeddings for")
model_alias: str = Field(description="Alias of the model to use for embedding generation")
column_type: Literal["embedding"] = Field(
default="embedding", description="Discriminator field, always 'embedding' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -502,10 +548,16 @@ class ImageColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "image" for this configuration type.
"""

prompt: str
model_alias: str
multi_modal_context: list[ImageContext] | None = None
column_type: Literal["image"] = "image"
prompt: str = Field(
description="Jinja2 template for the image generation prompt; can reference other columns via {{ column_name }}"
)
model_alias: str = Field(description="Alias of the model to use for image generation")
multi_modal_context: list[ImageContext] | None = Field(
default=None, description="Optional list of ImageContext for image-to-image generation inputs"
)
column_type: Literal["image"] = Field(
default="image", description="Discriminator field, always 'image' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -562,7 +614,9 @@ class CustomColumnConfig(SingleColumnConfig):
default=None,
description="Optional typed configuration object passed as second argument to generator function",
)
column_type: Literal["custom"] = "custom"
column_type: Literal["custom"] = Field(
default="custom", description="Discriminator field, always 'custom' for this configuration type"
)

@field_validator("generator_function")
@classmethod
Expand Down
38 changes: 24 additions & 14 deletions packages/data-designer-config/src/data_designer/config/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ class MCPProvider(ConfigBase):
... )
"""

provider_type: Literal["sse"] = "sse"
name: str
endpoint: str
api_key: str | None = None
provider_type: Literal["sse"] = Field(
default="sse", description="Transport type discriminator, always 'sse' for remote MCP providers"
)
name: str = Field(description="Unique name used to reference this MCP provider")
endpoint: str = Field(description="SSE endpoint URL for connecting to the remote MCP server")
api_key: str | None = Field(default=None, description="Optional API key for authentication")


class LocalStdioMCPProvider(ConfigBase):
Expand All @@ -63,11 +65,15 @@ class LocalStdioMCPProvider(ConfigBase):
... )
"""

provider_type: Literal["stdio"] = "stdio"
name: str
command: str
args: list[str] = Field(default_factory=list)
env: dict[str, str] = Field(default_factory=dict)
provider_type: Literal["stdio"] = Field(
default="stdio", description="Transport type discriminator, always 'stdio' for local subprocess MCP providers"
)
name: str = Field(description="Unique name used to reference this MCP provider")
command: str = Field(description="Executable to launch the MCP server via stdio transport")
args: list[str] = Field(default_factory=list, description="Arguments passed to the MCP server executable")
env: dict[str, str] = Field(
default_factory=dict, description="Environment variables passed to the MCP server subprocess"
)


MCPProviderT: TypeAlias = Annotated[MCPProvider | LocalStdioMCPProvider, Field(discriminator="provider_type")]
Expand Down Expand Up @@ -102,8 +108,12 @@ class ToolConfig(ConfigBase):
... )
"""

tool_alias: str
providers: list[str]
allow_tools: list[str] | None = None
max_tool_call_turns: int = Field(default=5, ge=1)
timeout_sec: float | None = Field(default=None, gt=0)
tool_alias: str = Field(description="User-defined alias to reference this tool configuration in column configs")
providers: list[str] = Field(description="Names of the MCP providers to use for tool calls")
allow_tools: list[str] | None = Field(
default=None, description="Optional allowlist of tool names that restricts which tools are permitted"
)
max_tool_call_turns: int = Field(
default=5, ge=1, description="Maximum number of tool-calling turns permitted in a single generation"
)
timeout_sec: float | None = Field(default=None, gt=0, description="Timeout in seconds for MCP tool calls")
Loading