diff --git a/README.md b/README.md index 023e488..1777a5c 100644 --- a/README.md +++ b/README.md @@ -8,17 +8,19 @@ This is not part of the core Starburst product and is not covered by Starburst support agreements. It is a community developed set of scripts to make your life easier when managing Starburst Enterprise Data Products. +This is a fork from https://github.com/ottensa/sepdpc adding options to filter Data Products on a domain and/or a catalog. + ## Introduction This Python package is a CLI for managing Starburst Enterprise Data Products in a local repository. -The motivation behind this project comes from the demand I see at customers to manage Starburst Enterprise Data Products in git. +The motivation behind this project comes from the demand from customers to manage Starburst Enterprise Data Products in git, or to move/update Data Products between different clusters. ## Installation Releases are not yet available on PyPI, but you can install using pip nonetheless: ```shell python -m pip install -U pip -python -m pip install -U pip install git+https://github.com/ottensa/sepdpc.git +python -m pip install -U pip install git+https://github.com/SaravananPrasadh-SP/sepdpc.git ``` ## Usage @@ -62,9 +64,27 @@ $ sepdpc configure --help │ * --user TEXT The username you are authenticating with │ │ [default: None] │ │ [required] │ -│ * --token TEXT The token used for authentication │ +│ * --token TEXT The token used for authentication, after "Basic " │ │ [default: None] │ │ [required] │ +│ --catalog TEXT The catalog to filter on, "none" for no filter │ +│ [default: None] │ +│ [optional] │ +│ --domain TEXT The domain to filter on, "none" for no filter │ +│ [default: None] │ +│ [optional] │ +│ --product TEXT The data product to filter on,"none" for no filter│ +│ [default: None] │ +│ [optional] │ +│ --roles TEXT The role to assume, "*" for all allowed roles │ +│ [default: None] │ +│ [optional] │ +│ --secure BOOL Allow insecure access, disable "Verify" │ +│ [default: False] │ +│ [optional] │ +│ --includedrafts BOOL Allow draft products to be exported/included │ +│ [default: False] │ +│ [optional] │ │ --help Show this message and exit. │ ╰───────────────────────────────────────────────────────────────────────────╯ ``` @@ -91,7 +111,7 @@ It will download the Data Products and persist them into the given path. ```shell sepdpc generate --help - Usage: sepdpc diff [OPTIONS] PATH + Usage: sepdpc generate [OPTIONS] PATH ╭─ Arguments ───────────────────────────────────────────────────────────────╮ │ * path TEXT [default: None] [required] │ @@ -106,7 +126,7 @@ It will delete Data Products that are not reflected in the repository, create ne ```shell sepdpc publish --help - Usage: sepdpc diff [OPTIONS] PATH + Usage: sepdpc publish [OPTIONS] PATH ╭─ Arguments ───────────────────────────────────────────────────────────────╮ │ * path TEXT [default: None] [required] │ @@ -121,7 +141,7 @@ The validation will also be implicitly called before you try to publish your rep ```shell sepdpc validate --help - Usage: sepdpc diff [OPTIONS] PATH + Usage: sepdpc validate [OPTIONS] PATH ╭─ Arguments ───────────────────────────────────────────────────────────────╮ │ * path TEXT [default: None] [required] │ diff --git a/pyproject.toml b/pyproject.toml index 08f6927..a501dec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "sepdpc" version = "0.1.0" description = "" -authors = ["Arne Ottens "] +authors = ["Arne Ottens ","Saravanan Prasadh "] readme = "README.md" packages = [{include = "sepdpc", from = "src"}] @@ -14,7 +14,7 @@ pyyaml = "^6.0.1" deepdiff = "^6.3.1" python-dotenv = "^1.0.0" rich = "^13.5.2" -adastra = {git = "https://github.com/ottensa/adastra.git"} +adastra = {git = "https://github.com/SaravananPrasadh-SP/adastra.git"} [tool.poetry.scripts] sepdpc = 'sepdpc.__main__:main' diff --git a/src/sepdpc/__main__.py b/src/sepdpc/__main__.py index e2aa1c2..809ad38 100644 --- a/src/sepdpc/__main__.py +++ b/src/sepdpc/__main__.py @@ -13,13 +13,19 @@ help_text = { 'host': 'The host of your Starburst Enterprise instance, e.g. https://sep.example.com:8443', 'user': 'The username you are authenticating with', - 'token': 'The token used for authentication' + 'token': 'The token used for authentication, the part after "Basic" and "-" if no authentication (no password)', + 'domain': 'The domain to filter data products, "none" for no domain filter', + 'catalog': 'The catalog to filter data products, "none" for no catalog filter', + 'product': 'The product to filter data products, "none" for no product filter', + 'insecure': 'Allow insecure connections', + 'roles': 'Roles to filter data products, "*" for all roles', + 'includeDrafts': 'Boolean toggle to include draft data products' } config_path = Path.home() / '.sepdpc' -def _opt(help, env=None): +def _opt(help, env=None, inputType=str): config = { 'help': help, 'prompt': env is None @@ -28,34 +34,50 @@ def _opt(help, env=None): config['envvar'] = env option = typer.Option(**config) - return Annotated[str, option] + return Annotated[inputType, option] HostOpt = _opt(help_text['host'], 'SEPDPC_HOST') UserOpt = _opt(help_text['user'], 'SEPDPC_USER') TokenOpt = _opt(help_text['token'], 'SEPDPC_TOKEN') +DomainOpt = _opt(help_text['domain'], 'SEPDPC_DOMAIN') +CatalogOpt = _opt(help_text['catalog'], 'SEPDPC_CATALOG') +productOpt = _opt(help_text['product'], 'SEPDPC_PRODUCT') +rolesOpt = _opt(help_text['roles'], 'SEPDPC_ROLES') +secureOpt = _opt(help_text['insecure'], 'SEPDPC_SECURE', bool) +includeDraftsOpt = _opt(help_text['includeDrafts'], 'SEPDPC_INCLUDE_DRAFTS',bool) app = typer.Typer() @app.command() -def configure(host: _opt(help_text['host']), user: _opt(help_text['user']), token: _opt(help_text['token'])): +def configure(host: _opt(help_text['host']), user: _opt(help_text['user']), token: _opt(help_text['token']), \ + domain: _opt(help_text['domain']), catalog: _opt(help_text['catalog']), \ + secure: _opt(help_text['insecure']), roles: _opt(help_text['roles']), \ + product: _opt(help_text['product']), includeDrafts: _opt(help_text['includeDrafts'])): config_path.touch(mode=0o600, exist_ok=False) set_key(dotenv_path=config_path, key_to_set="SEPDPC_HOST", value_to_set=host) set_key(dotenv_path=config_path, key_to_set="SEPDPC_USER", value_to_set=user) set_key(dotenv_path=config_path, key_to_set="SEPDPC_TOKEN", value_to_set=token) + set_key(dotenv_path=config_path, key_to_set="SEPDPC_DOMAIN", value_to_set=domain) + set_key(dotenv_path=config_path, key_to_set="SEPDPC_CATALOG", value_to_set=catalog) + set_key(dotenv_path=config_path, key_to_set="SEPDPC_PRODUCT", value_to_set=product) + set_key(dotenv_path=config_path, key_to_set="SEPDPC_INSECURE", value_to_set=secure) + set_key(dotenv_path=config_path, key_to_set="SEPDPC_ROLES", value_to_set=roles) + set_key(dotenv_path=config_path, key_to_set="SEPDPC_INCLUDE_DRAFTS", value_to_set=includeDrafts) print('Configured sepdpc') @app.command() -def generate(host: HostOpt, user: UserOpt, token: TokenOpt, path: str): - client = SepClient(host=host, user=user, token=token) - repo = repository.from_server(client) +def generate(host: HostOpt, user: UserOpt, token: TokenOpt, path: str, domain: DomainOpt = "none", catalog: CatalogOpt = "none", \ + secure: secureOpt = True, includeDrafts: includeDraftsOpt = False, roles: rolesOpt = "*", product: productOpt = "none"): + if token != '-': token='Basic '+token + client = SepClient(host=host, user=user, token=token, verify=secure, roles=roles) + repo = repository.from_server(client, domain, catalog, product, includeDrafts) repository.persist(repo, path) print('Remote repository persisted into:', path) - @app.command() def validate(path: str): repo = repository.from_local(path) @@ -64,9 +86,11 @@ def validate(path: str): @app.command() -def diff(host: HostOpt, user: UserOpt, token: TokenOpt, path: str): - client = SepClient(host=host, user=user, token=token) - remote_repo = repository.from_server(client) +def diff(host: HostOpt, user: UserOpt, token: TokenOpt, path: str, domain: DomainOpt = "none", catalog: CatalogOpt = "none", \ + secure: secureOpt = True, includeDrafts: includeDraftsOpt = False, roles: rolesOpt = "*", product: productOpt = "none"): + if token != '-': token='Basic '+token + client = SepClient(host=host, user=user, token=token, verify=secure, roles=roles) + remote_repo = repository.from_server(client, domain, catalog, product, includeDrafts) local_repo = repository.from_local(path) delta = repository.diff(remote_repo, local_repo) @@ -93,10 +117,12 @@ def diff(host: HostOpt, user: UserOpt, token: TokenOpt, path: str): @app.command() -def publish(host: HostOpt, user: UserOpt, token: TokenOpt, path: str): - client = SepClient(host=host, user=user, token=token) +def publish(host: HostOpt, user: UserOpt, token: TokenOpt, path: str, domain: DomainOpt = "none", catalog: CatalogOpt = "none", \ + secure: secureOpt = True, includeDrafts: includeDraftsOpt = False, roles: rolesOpt = "*", product: productOpt = "none"): + if token != '-': token='Basic '+token + client = SepClient(host=host, user=user, token=token, verify=secure, roles=roles) local_repo = repository.from_local(path) - repository.publish(client, local_repo) + repository.publish(client, domain, catalog, product, includeDrafts, local_repo) print('Published repository from:', path) diff --git a/src/sepdpc/repository.py b/src/sepdpc/repository.py index d76b189..584e293 100644 --- a/src/sepdpc/repository.py +++ b/src/sepdpc/repository.py @@ -58,7 +58,7 @@ class ProductStruct(InterstellarBaseModel): tags: Optional[list[str]] = None samples: Optional[list[SampleQuery]] = None datasets: list[DatasetStruct] = None - + status: Optional[str] = None class RepositoryDiff(BaseModel): deleted_domains: List[DomainStruct] @@ -75,7 +75,7 @@ class Repository(BaseModel): products: List[ProductStruct] -def from_server(server_client: SepClient) -> Repository: +def from_server(server_client: SepClient, domain_filter: str, catalog_filter: str, product_filter: str, include_drafts: bool ) -> Repository: """Laden der Domains und der Produkte von SEP und in die Repository Struktur bringen""" dpc = server_client.data_product_service() dc = server_client.domain_service() @@ -86,45 +86,52 @@ def from_server(server_client: SepClient) -> Repository: domains = [DomainStruct(**domain.model_dump()) for domain in sep_domains] products = [] + if domain_filter!="none": domainID=next(d.id for d in domains if d.name == domain_filter) + for dp in sep_products: - tags = dpc.get_tags(dp.id) - samples = dpc.get_samples(dp.id) - datasets = [] - for view in dp.views: - datasets.append( - DatasetStruct( - name=view.name, - query=view.definitionQuery, - summary=view.description, - columns=view.columns + + if ((catalog_filter == "none" or dp.catalogName == catalog_filter) and + (domain_filter == "none" or dp.dataDomainId == domainID) and + (product_filter == "none" or dp.name == product_filter) and + (dp.status == "PUBLISHED" or include_drafts)): + tags = dpc.get_tags(dp.id) + samples = dpc.get_samples(dp.id) + datasets = [] + for view in dp.views: + datasets.append( + DatasetStruct( + name=view.name, + query=view.definitionQuery, + summary=view.description, + columns=view.columns + ) ) - ) - for view in dp.materializedViews: - datasets.append( - DatasetStruct( - name=view.name, - query=view.definitionQuery, - summary=view.description, - columns=view.columns, - materialization=view.definitionProperties + for view in dp.materializedViews: + datasets.append( + DatasetStruct( + name=view.name, + query=view.definitionQuery, + summary=view.description, + columns=view.columns, + materialization=view.definitionProperties + ) ) - ) - product = ProductStruct( - id=dp.id, - name=dp.name, - desc=dp.description, - summary=dp.summary, - catalog=dp.catalogName, - domain=next(d.name for d in domains if d.id == dp.dataDomainId), - owner=dp.owners, - links=dp.relevantLinks, - tags=[tag.value for tag in tags], - samples=samples, - datasets=datasets - ) - products.append(product) + product = ProductStruct( + id=dp.id, + name=dp.name, + desc=dp.description, + summary=dp.summary, + catalog=dp.catalogName, + domain=next(d.name for d in domains if d.id == dp.dataDomainId), + owner=dp.owners, + links=dp.relevantLinks, + tags=[tag.value for tag in tags], + samples=samples, + datasets=datasets + ) + products.append(product) return Repository(domains=domains, products=products) @@ -317,7 +324,10 @@ def _persist_data_product(path: Path, data_product: ProductStruct): yaml.dump(metadata, f, sort_keys=False) readme_path = dp_path / "readme.md" - readme_path.write_text(data_product.desc, encoding='utf-8') + if not isinstance(data_product.desc, str): + readme_path.write_text("") + else: + readme_path.write_text(data_product.desc, encoding='utf-8') _persist_datasets(dp_path, data_product.datasets) _persist_samples(dp_path, data_product.samples) @@ -383,10 +393,10 @@ def _upsert_data_product(dpc: DataProductService, product_struct: ProductStruct, dpc.publish(data_product.id) -def publish(server_client: SepClient, repo: Repository): +def publish(server_client: SepClient, domain_filter: str, catalog_filter: str, product_filter:str, include_drafts:bool, repo: Repository): """Auf den Server schieben""" validate(repo) - remote_repo = from_server(server_client) + remote_repo = from_server(server_client, domain_filter, catalog_filter, product_filter,include_drafts) # mapping von domain name auf domain id domain_mapping = dict([(domain.name, domain.id) for domain in remote_repo.domains]) @@ -395,23 +405,24 @@ def publish(server_client: SepClient, repo: Repository): dc = server_client.domain_service() delta = diff(remote_repo, repo) + # 1. delete products for dp in delta.deleted_products: dpc.delete(dp.id) # 2. create domains for d in delta.created_domains: - domain_to_create = Domain(name=d.name, description=d.desc, schemaLocation=d.path) - new_domain = dc.create(domain_to_create) - domain_mapping[new_domain.name] = new_domain.id + domain_to_create = Domain(name=d.name, description=d.desc, schemaLocation=d.path) + new_domain = dc.create(domain_to_create) + domain_mapping[new_domain.name] = new_domain.id # 3. reassign products for dp in delta.reassigned_products: - dpc.reassign(dp.id, domain_mapping[dp.domain]) + dpc.reassign(dp.id, domain_mapping[dp.domain]) # 4. delete domains for d in delta.deleted_domains: - dc.delete(d.id) + dc.delete(d.id) # 5. update products for dp in delta.updated_products: @@ -419,7 +430,7 @@ def publish(server_client: SepClient, repo: Repository): # 6. update domains for d in delta.updated_domains: - dc.update(Domain(id=d.id, name=d.name, description=d.desc, schemaLocation=d.path)) + dc.update(Domain(id=d.id, name=d.name, description=d.desc, schemaLocation=d.path)) # 7. create products for dp in delta.created_products: