diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 6d5dcc7..070a0f3 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -185,6 +185,9 @@ jobs: - name: Validate lockfile run: uv lock --check + - name: Scan for Unicode characters + run: uv run --frozen python tests/unicode_scan.py + - name: Lint Python run: uv run --frozen ruff check . diff --git a/AGENTS.md b/AGENTS.md index af578f5..3ae950a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,9 +14,8 @@ actionlint ### Markdown files npx --yes -p markdownlint-cli2@0.22.1 -p markdownlint-rule-relative-links@5.1.0 markdownlint-cli2 -### Confusable Unicode characters in non-Python files -# (ruff RUF001-RUF003 covers Python strings/comments/docstrings) -uv run python tests/confusables.py +### Non-ASCII characters in tracked text files +uv run python tests/unicode_scan.py ### Python files @@ -41,7 +40,7 @@ own checks. ```bash # Fast, no network (also what the pre-commit hook runs): -# lint, format, pyright, confusable-character scan, unit + fixture +# lint, format, pyright, non-ASCII character scan, unit + fixture # tests, CLI rejection matrix, # randomized permission invariants uv run tests/run.py @@ -64,7 +63,7 @@ uv run tests/run.py --performance --baseline-command "uvx src-auth-perms-sync@la uv run tests/run.py --update-golden ``` -- Fixture cases live in `tests/e2e/fixtures//` — see the README there +- Fixture cases live in `tests/e2e/fixtures//` - see the README there for the format. Add cases there to cover new mapping behaviors. - For manual verification against a real instance, dry-run first (no `--apply`), read the planned changes, then `--apply` on a scratch instance @@ -165,7 +164,7 @@ gh run watch "${RUN_ID}" --repo "${GH_REPO}" --exit-status gh release view "v${VERSION}" --repo "${GH_REPO}" ``` -## Hard invariants — do not break +## Hard invariants - do not break Violating these can silently grant the wrong users access to the wrong repos. @@ -206,31 +205,31 @@ organization sync maps SAML groups to Sourcegraph org membership. Read CLI lives in `src/src_auth_perms_sync/`; invoke with `uv run src-auth-perms-sync`. Strict pyright covers the package. Root modules are entrypoints only: -- `cli.py` — `main()`, arg parsing, owns the CLI description. Module +- `cli.py` - `main()`, arg parsing, owns the CLI description. Module wrappers (`Get`/`Set`/`Restore`/`SyncSamlOrgs`) return result dataclasses and never install logging handlers; only `main()` runs CLI-mode logging. -- `shared/` — cross-workflow helpers: Sourcegraph auth-provider/user list +- `shared/` - cross-workflow helpers: Sourcegraph auth-provider/user list helpers, shared GraphQL operations and TypedDicts, site-config validation, and SAML group parsing. `shared/backups.py` defines `RunPaths`: every filesystem path for one run, resolved once at the edge - (`resolve_run_paths`) and threaded explicitly — never recompute paths + (`resolve_run_paths`) and threaded explicitly - never recompute paths from cwd or globals below the edge, and honor `run_paths.write_files` (False under `--no-files`) before any disk write. Business workflows live in packages: -- `permissions/` — repo permission sync (`command.py`, `maps.py`, +- `permissions/` - repo permission sync (`command.py`, `maps.py`, `mapping.py`, `sourcegraph.py`, `snapshot.py`, `apply.py`, `queries.py`, `types.py`). Add new mapping filters in `permissions/types.py` and `permissions/mapping.py`. -- `orgs/` — SAML group → Sourcegraph organization sync (`command.py`, +- `orgs/` - SAML group -> Sourcegraph organization sync (`command.py`, `queries.py`, `types.py`). ## Toolchain - Python 3.11 + [uv](https://docs.astral.sh/uv/). Never invoke `python` directly; always `uv run ...`. -- `uv run pyright` must be clean. No `# type: ignore` to silence — +- `uv run pyright` must be clean. No `# type: ignore` to silence - fix the underlying type. - Local tests use stdlib `unittest`: `uv run python -m unittest discover -s tests`. - For Sourcegraph mutation-path changes, also verify by dry-running `--get` / @@ -268,7 +267,7 @@ Short names that ARE acceptable (don't rewrite these on sight): `key`, `value`, `name`, `kind`. - **Stdlib idioms**: `ctx` for `contextvars.copy_context()`. - **Loop / comprehension variables when the type is obvious from one - line of context** is still discouraged — prefer `user`, `repo`, + line of context** is still discouraged - prefer `user`, `repo`, `provider`, `service`, `permission`, `node`, `entry`, `match`, `account`, `future`, `executor`, `exception`, `event`, `connection`, `response`, `timestamp`, `current`, `outcome`, `index`, `field_name`. diff --git a/README.md b/README.md index 5cccb72..656eaf7 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # src-auth-perms-sync - + src-auth-perms-sync automates Sourcegraph's Explicit Permissions GraphQL API, setting user-to-repo permissions based on mapping rules, for example: @@ -147,7 +147,7 @@ for provider in get_result.auth_providers: for code_host in get_result.code_hosts: ... -# Mapping rules can be passed in memory instead of a maps YAML file — +# Mapping rules can be passed in memory instead of a maps YAML file - # same structure and validation as maps.yaml entries: rules: list[src.MappingRule] = [ { @@ -167,11 +167,11 @@ result = src.Set(config, mapping_rules=rules) ``` The import API does not read environment variables or `.env` files on its -own — those apply to the CLI only. Pass every value explicitly to +own - those apply to the CLI only. Pass every value explicitly to `src.Config(...)` (read `os.environ` yourself if you want env-driven configuration, as the example above does). -Module mode never touches your `logging` handlers or the root logger — your +Module mode never touches your `logging` handlers or the root logger - your application's logging config stays in charge. To see progress messages: ```python @@ -297,7 +297,7 @@ snapshots that make `--apply` reversible. src-auth-perms-sync sync-saml-orgs --users-without-explicit-perms ``` - - Same user filters as `get` and `set`; a mode flag is required — there + - Same user filters as `get` and `set`; a mode flag is required - there is no bare `sync-saml-orgs` ### Org sync behavior @@ -305,14 +305,14 @@ snapshots that make `--apply` reversible. - Org names are `synced--` (non-alphanumeric characters become `-`). The `synced-` prefix marks tool ownership: the sync only ever modifies orgs whose name carries it, so manually created orgs are never touched. -- The org sync mode is always explicit — no surprises: +- The org sync mode is always explicit - no surprises: - **Full** (`sync-saml-orgs --full`, or `set --full` / `--repos*` `--sync-saml-orgs`): converges every synced org against all users. A synced org whose SAML group disappeared has all members removed, but the org itself is kept (its settings survive in case the group comes back). - **Scoped** (user filters on `sync-saml-orgs`, or `set --users` / `--users-without-explicit-perms` / `--created-after` with - `--sync-saml-orgs`): syncs org membership for exactly the selected users — + `--sync-saml-orgs`): syncs org membership for exactly the selected users - per-user additions AND removals, computed from each user's own SAML assertion and org list. Other users' memberships never change, and no full user scan or org member listing is needed, so API traffic stays @@ -326,16 +326,16 @@ Run `src-auth-perms-sync --help` for options ```text src-auth-perms-sync-runs// -├── auth-providers.yaml -├── code-hosts.yaml -├── maps.yaml -└── runs - └── timestamp-command - ├── before.json - ├── after.json - ├── diff.json - ├── log.json - └── maps.yaml +|-- auth-providers.yaml +|-- code-hosts.yaml +|-- maps.yaml +`-- runs + `-- timestamp-command + |-- before.json + |-- after.json + |-- diff.json + |-- log.json + `-- maps.yaml ``` - The `src-auth-perms-sync-runs` dir is created under your current working directory @@ -353,4 +353,4 @@ src-auth-perms-sync-runs// - An `after.json` file, capturing the new state - A `diff.json` file, a shorter, reviewable file containing the diffs between before and after - + diff --git a/dev/TODO.md b/dev/TODO.md index 350ac16..e0f1887 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -32,7 +32,7 @@ - `get --repos ` still scans every user's explicit grants to find one repo's holders (~400 s at 10k users). A repo-centric read (`repository.permissionsInfo.users` + site-admin disambiguation, as the - test harness already does) would make it seconds — see the repo-centric + test harness already does) would make it seconds - see the repo-centric section below. ## Low priority: Repo-centric path, when users > repos, or for cross-checking @@ -50,10 +50,10 @@ Reasons we might want to bring it back later: - **Targeted snapshots.** A "planned-scope" capture (only the repos the mapping rules touch) is faster than a full instance scan when the user-centric path is the long pole AND the planned set is small. - Would need either a server-side `source` filter on the repo→users + Would need either a server-side `source` filter on the repo->users connection, or a follow-up user-centric `source: API` query per ambiguous (site-admin) user to disambiguate. -- **Adaptive capture path after SG adds `source` to repo→users.** Once +- **Adaptive capture path after SG adds `source` to repo->users.** Once `RepositoryPermissionsInfo.users(source: PermissionSource)` exists, compute the expected request count both ways before snapshotting: sum `userCount` across all auth providers and sum `repoCount` across @@ -70,7 +70,7 @@ If/when we revisit: ambiguous-user follow-up to be correct). 2. Restore `QUERY_REPO_EXPLICIT_USERS` from git history; implement `list_repo_explicit_users` returning `(definitely, ambiguous)` and - actually consume both buckets — the previous code did neither. + actually consume both buckets - the previous code did neither. 3. Add a CLI flag (e.g. `--cross-check-capture`) gated behind a clear "this doubles capture cost" warning. @@ -89,7 +89,7 @@ SAML actually persists the group list. Recovery options for each: - OIDC has no `allowGroups` field on `OpenIDConnectAuthProvider`. `UserClaims` stores only name/email fields; the `groups` claim is never parsed. Recovery needs an upstream change to persist the claim. -- GitHub OAuth has `allowOrgs`, `allowOrgsMap` (org→teams), and +- GitHub OAuth has `allowOrgs`, `allowOrgsMap` (org->teams), and `requiredSsoOrgs`. Org/team checks happen live in `verifyUserOrgs` / `verifyUserTeams` and are discarded. Recovery needs an upstream change to persist the claim. diff --git a/dev/engineering-requests.md b/dev/engineering-requests.md index e39d8e0..299df62 100644 --- a/dev/engineering-requests.md +++ b/dev/engineering-requests.md @@ -88,15 +88,15 @@ Observed during the concurrent captures: - `pgsql-0` CPU (`kubectl top`): 7,636-7,683 millicores of 8,000 (saturated). - `frontend` / `gitserver` CPU: 124-138m / 2-3m (idle bystanders). - `pg_stat_activity`: 29 active statements, all - `permsStore.ListUserPermissions`, **zero wait events** — pure CPU, no lock + `permsStore.ListUserPermissions`, **zero wait events** - pure CPU, no lock contention. - `pg_stat_statements`: `permsStore.ListUserPermissions` at 24,026 calls, 27,635.6s total, 1,150ms mean. -- Per-client capture throughput: 23 users/sec solo → 2-4 users/sec at 4-way +- Per-client capture throughput: 23 users/sec solo -> 2-4 users/sec at 4-way concurrency. -- Aggregate throughput: 8-16 users/sec at 4-way — **below the 23 users/sec a +- Aggregate throughput: 8-16 users/sec at 4-way - **below the 23 users/sec a single client achieves alone** (negative scaling). -- ALB (CloudWatch): no 5xx, no rejected connections — the edge and frontend +- ALB (CloudWatch): no 5xx, no rejected connections - the edge and frontend are not the bottleneck. - Collateral failure: the fifth client's queries exceeded the 60s read timeout under this load; 5 retry attempts exhausted; its run failed with exit 1. @@ -144,7 +144,7 @@ from `github.com/sourcegraph/sourcegraph`: Measured on the 10k-user / 50k-repo test instance, the presence probe `User.permissionsInfo.repositories(source: API, first: 1)` costs 225-350ms of server work per user, and alias batching barely helps (21,004 single-user -probes averaged 351.6ms; 25-user batches averaged 5,616ms ≈ 224.7ms/user). A +probes averaged 351.6ms; 25-user batches averaged 5,616ms ~ 224.7ms/user). A single `set --users-without-explicit-perms` run probing all 10,002 users at batch size 1 spent 4,269s of its 5,210s total in these probes. @@ -158,9 +158,9 @@ Reading the resolver code in `github.com/sourcegraph/sourcegraph` explains why `updatedAt` fields. The rows are discarded afterward. - `userPermissionsInfoResolver.Repositories` (permissions_info.go) builds a CTE (`reposPermissionsInfoQueryFmt` in perms_store.go) that **materializes - every repo accessible to the user** — a full `repo` ⋈ - `external_service_repos` ⋈ `external_services` join with the correlated - authz `EXISTS` predicate and an `ORDER BY` — before the outer query applies + every repo accessible to the user** - a full `repo` join + `external_service_repos` join `external_services` join with the correlated + authz `EXISTS` predicate and an `ORDER BY` - before the outer query applies `urp.source = 'API'` and the LIMIT. `first: 1` becomes `LIMIT 2` on the outer query only; the CTE is not short-circuited. - Requesting `totalCount` adds a second independent execution of the same CTE @@ -180,7 +180,7 @@ Client-side mitigation shipped in `src-auth-perms-sync` (2026-06-12): locally BEFORE probing, so probes scale with the rule-matched user count instead of the instance's user count, and user hydration runs as aliased 25-user batches instead of one `UserByID` request per user. The remaining -inherent cost — ~225ms x probed user — is exactly what the +inherent cost - ~225ms x probed user - is exactly what the presence/filter API requested below would remove, and `get --users-without-explicit-perms` still has to probe every active user because its semantics are instance-wide. diff --git a/dev/memory-analysis/memory-efficiency-analyze.py b/dev/memory-analysis/memory-efficiency-analyze.py index f9c024a..7903f23 100755 --- a/dev/memory-analysis/memory-efficiency-analyze.py +++ b/dev/memory-analysis/memory-efficiency-analyze.py @@ -470,7 +470,7 @@ def write_text_report( ) r_squared = "n/a" if model.r_squared is None else f"{model.r_squared:.4f}" print("\nFit quality:") - print(f" R²: {r_squared}") + print(f" R^2: {r_squared}") print(f" mean absolute error: {model.mean_absolute_error_megabytes:.2f} MiB") print(f" p95 absolute error: {model.p95_absolute_error_megabytes:.2f} MiB") print(f" max absolute error: {model.max_absolute_error_megabytes:.2f} MiB") diff --git a/examples/maps.yaml b/examples/maps.yaml index 49afbd6..92249ec 100644 --- a/examples/maps.yaml +++ b/examples/maps.yaml @@ -1,4 +1,4 @@ -# User → Repo permission mapping rules +# User -> Repo permission mapping rules # Maintain your maps.yaml file, using the values from auth-providers.yaml and code-hosts.yaml, # which are created by the --get command, under `src-auth-perms-sync-runs//` diff --git a/pyproject.toml b/pyproject.toml index 3465bd4..258ec28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,9 @@ classifiers = [ "Typing :: Typed", ] dependencies = [ - "json5>=0.14.0", + "json5>=0.15.0", "pyyaml>=6.0.3", - "src-py-lib[otel]==0.3.0", + "src-py-lib[otel]==0.3.1", ] keywords = [ "Sourcegraph" @@ -63,9 +63,7 @@ line-length = 100 extend-exclude = ["src-auth-perms-sync-runs"] [tool.ruff.lint] -# RUF001-RUF003 ban confusable Unicode (en dash, multiplication sign, -# lookalike letters) in Python strings, comments, and docstrings; -# tests/confusables.py applies the same policy to non-Python files. +# tests/unicode_scan.py bans non-ASCII characters in tracked text files. select = ["E", "F", "I", "B", "UP", "SIM", "RUF001", "RUF002", "RUF003"] [tool.uv] diff --git a/src/src_auth_perms_sync/orgs/sync.py b/src/src_auth_perms_sync/orgs/sync.py index dada8fb..a37a876 100644 --- a/src/src_auth_perms_sync/orgs/sync.py +++ b/src/src_auth_perms_sync/orgs/sync.py @@ -85,7 +85,7 @@ def _load_organization_sync_state( if provider["serviceType"] == saml_groups.SAML_SERVICE_TYPE ] if not saml_providers: - log.warning("No SAML auth providers found — nothing to sync.") + log.warning("No SAML auth providers found - nothing to sync.") return None attribute_names_by_provider = saml_groups.attribute_names_by_provider_key( providers, saml_groups_attribute_name_by_config_id @@ -104,7 +104,7 @@ def _load_organization_sync_state( # Truncated discovery: resolve target names individually and skip # orphaned-org cleanup this run. if not targets: - log.warning("No SAML group memberships found in user accountData — nothing to sync.") + log.warning("No SAML group memberships found in user accountData - nothing to sync.") return None current_user, current_states = _load_current_organization_states( client, @@ -131,7 +131,7 @@ def _load_organization_sync_state( if not targets: log.warning( "No SAML group memberships found in user accountData " - "and no synced orgs exist — nothing to sync." + "and no synced orgs exist - nothing to sync." ) return None current_states = { @@ -179,7 +179,7 @@ def _load_scoped_organization_sync_state( Each user's `accountData` is the complete truth for that user's SAML groups, and their own org list is the complete truth for their current - synced-org memberships — so additions AND removals are both safe per + synced-org memberships - so additions AND removals are both safe per user. Users outside the scope are never touched, and neither full user streams nor org member pages are loaded. """ @@ -217,7 +217,7 @@ def _load_scoped_organization_sync_state( len(target.desired_members_by_id) for target in targets.values() ) if not targets: - log.info("Selected user(s) hold no SAML group or synced org memberships — nothing to sync.") + log.info("Selected user(s) hold no SAML group or synced org memberships - nothing to sync.") return None # Org IDs for orgs the scoped users belong to come from their own org @@ -307,7 +307,7 @@ def _log_organization_sync_plan(sync_state: _OrganizationSyncState) -> None: len(sync_state.plan["removals"]), ) log.info( - "Diff (current → desired):\n%s", + "Diff (current -> desired):\n%s", _render_organization_diff(sync_state.before_snapshot, sync_state.expected_snapshot), ) @@ -448,7 +448,7 @@ def _finish_organization_apply_with_backup( ) if current_user_after["id"] != sync_state.current_user["id"]: log.warning( - "Current user changed during org sync (%s → %s); validation still uses org members.", + "Current user changed during org sync (%s -> %s); validation still uses org members.", sync_state.current_user["username"], current_user_after["username"], ) diff --git a/src/src_auth_perms_sync/permissions/apply.py b/src/src_auth_perms_sync/permissions/apply.py index 9992d41..65896e7 100644 --- a/src/src_auth_perms_sync/permissions/apply.py +++ b/src/src_auth_perms_sync/permissions/apply.py @@ -34,7 +34,7 @@ class CircuitBreaker: failure). Once `failure_rate` over that window exceeds `failure_threshold` AND we have at least `min_samples` outcomes recorded, the breaker opens and `is_open()` returns True for the rest - of the run (no half-open / reset logic — once we decide the backend + of the run (no half-open / reset logic - once we decide the backend is too unhealthy, we stay tripped). Designed to bail out of a hopeless run (e.g., backend down, severely @@ -130,13 +130,13 @@ def set_repo_permissions( """Overwrite a repo's explicit permissions with `user_perms` in one call. `user_perms` is a list of `{"bindID": , "permission": "READ"}`. - bindID is always the Sourcegraph username — validate_site_config() + bindID is always the Sourcegraph username - validate_site_config() enforces that the site is configured with `bindID: "username"`. Retries on transient transport failures (network errors, HTTP 408/429/5xx) - happen inside the shared Sourcegraph client — every GraphQL call goes through the + happen inside the shared Sourcegraph client - every GraphQL call goes through the same retry plumbing. Application-level GraphQL errors (auth, validation, - schema) are NOT retried — they propagate on the first attempt. + schema) are NOT retried - they propagate on the first attempt. """ with src.span( "set_repo_perms", @@ -233,7 +233,7 @@ def record_result(result: run_context.ParallelResult[PermissionChange, None]) -> succeeded += 1 breaker.record(success=True) log.debug( - " OK %s %s → %s (id=%d).", + " OK %s %s -> %s (id=%d).", action, change.username, change.repo_name, @@ -246,7 +246,7 @@ def record_result(result: run_context.ParallelResult[PermissionChange, None]) -> if is_missing_mutation_resource_error(exception): skipped += 1 log.warning( - " SKIP %s %s → %s (id=%d): repo/user no longer exists: %s", + " SKIP %s %s -> %s (id=%d): repo/user no longer exists: %s", action, change.username, change.repo_name, @@ -257,7 +257,7 @@ def record_result(result: run_context.ParallelResult[PermissionChange, None]) -> failed += 1 breaker.record(success=False) log.error( - " FAIL %s %s → %s (id=%d): %s", + " FAIL %s %s -> %s (id=%d): %s", action, change.username, change.repo_name, @@ -369,7 +369,7 @@ def record_result( succeeded += 1 breaker.record(success=True) log.info( - " OK %s (id=%d) — %d users.", + " OK %s (id=%d) - %d users.", overwrite.repository_name, src.decode_repository_id(overwrite.repository_id), len(overwrite.usernames), diff --git a/src/src_auth_perms_sync/permissions/command.py b/src/src_auth_perms_sync/permissions/command.py index d0aa5a1..4ce1d06 100644 --- a/src/src_auth_perms_sync/permissions/command.py +++ b/src/src_auth_perms_sync/permissions/command.py @@ -304,13 +304,13 @@ def cmd_get( ), # SAML providers always get the field (possibly empty) so # operators can see at a glance whether the IdP is releasing - # a groups claim. Non-SAML providers get None → field omitted. + # a groups claim. Non-SAML providers get None -> field omitted. saml_group_user_counts=( saml_group_counts.get((provider["serviceID"], provider["clientID"]), {}) if provider["serviceType"] == saml_groups.SAML_SERVICE_TYPE else None ), - # Match by explicit `configID` only — Sourcegraph + # Match by explicit `configID` only - Sourcegraph # synthesizes one for entries that omit it, but the synth # is a content-addressed hash we can't safely replicate. # Such providers get only the GraphQL-derived view. @@ -428,7 +428,7 @@ def load_selected_users( selected_users.append(user) continue log.info( - "User %s was not created on or after %s — no user metadata selected.", + "User %s was not created on or after %s - no user metadata selected.", user["username"], user_created_after, ) @@ -606,8 +606,8 @@ def _additive_command_data( When `retain_saml_group_users` is set, the selected users are compacted into `scoped_saml_group_users` so a subsequent `--sync-saml-orgs` phase - syncs org membership for exactly these users — per-user additions and - removals — without streaming all users again. An empty selection yields + syncs org membership for exactly these users - per-user additions and + removals - without streaming all users again. An empty selection yields an empty scope (org sync no-ops), never a full-instance sync. """ providers = context.providers if context is not None else None @@ -793,7 +793,7 @@ def cmd_set_additive_users( ): mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) + log.warning("No maps defined in %s - nothing to do.", run_paths.maps_path) return _additive_command_data(None, [], retain_saml_group_users) include_user_emails = permissions_mapping.mapping_rules_need_user_emails(mapping_rules) include_user_account_data = ( @@ -820,7 +820,7 @@ def cmd_set_additive_users( selected_users.append(user) continue log.info( - "User %s was not created on or after %s — nothing to do.", + "User %s was not created on or after %s - nothing to do.", user["username"], user_created_after, ) @@ -924,7 +924,7 @@ def cmd_set_additive_users_without_explicit_perms( ): mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) + log.warning("No maps defined in %s - nothing to do.", run_paths.maps_path) return _additive_command_data(None, [], retain_saml_group_users) context = load_mapping_context_discovery( client, @@ -1101,7 +1101,7 @@ def cmd_set_additive_created_after( ): mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) + log.warning("No maps defined in %s - nothing to do.", run_paths.maps_path) return _additive_command_data(None, [], retain_saml_group_users) context = load_mapping_context_discovery( client, @@ -1403,7 +1403,7 @@ def _write_additive_initial_artifacts( else: log.info("Skipping additive snapshot and maps backup files because --no-files is set.") log.info( - "Diff (before → planned after):\n%s", + "Diff (before -> planned after):\n%s", permission_snapshot.render_user_scoped_diff(before_snapshot, after_planned_snapshot), ) return before_snapshot @@ -1493,7 +1493,7 @@ def _finish_additive_apply_with_backup( else: log.info("Skipping scoped after-snapshot files because --no-files is set.") log.info( - "Diff (before → after):\n%s", + "Diff (before -> after):\n%s", permission_snapshot.render_user_scoped_diff(before_snapshot, after_snapshot), ) _validate_additive_after(after_snapshot, additions) @@ -1525,7 +1525,7 @@ def _run_additive_apply( ) -> None: """Snapshot, dry-run, apply, and validate an additive permission plan.""" if not users: - log.info("No users selected — nothing to do.") + log.info("No users selected - nothing to do.") return snapshot_users = _snapshot_users_from_users(users) @@ -1549,7 +1549,7 @@ def _run_additive_apply( return if not additions: - log.info("All selected users already have the mapped explicit grants — nothing to apply.") + log.info("All selected users already have the mapped explicit grants - nothing to apply.") return mutations = _apply_additive_permissions(client, additions, parallelism, worker_pool) @@ -1654,7 +1654,7 @@ def _validate_additive_after( log.warning("VALIDATION: %d requested additive grant(s) are missing.", len(missing)) for addition in missing[:20]: log.warning( - " missing %s → %s (id=%d)", + " missing %s -> %s (id=%d)", addition.username, addition.repo_name, src.decode_repository_id(addition.repo_id), diff --git a/src/src_auth_perms_sync/permissions/full_set.py b/src/src_auth_perms_sync/permissions/full_set.py index 6248fa5..2faa4c3 100644 --- a/src/src_auth_perms_sync/permissions/full_set.py +++ b/src/src_auth_perms_sync/permissions/full_set.py @@ -306,7 +306,7 @@ def plan_full_set_permissions( ) log.info(" Matched %d user(s).", len(matched_users)) if not matched_users: - log.warning(" No users matched — skipping rule.") + log.warning(" No users matched - skipping rule.") continue matched_repos = permissions_mapping.resolve_repos( @@ -317,7 +317,7 @@ def plan_full_set_permissions( ) log.info(" Matched %d repo(s).", len(matched_repos)) if not matched_repos: - log.warning(" No repos matched — skipping rule.") + log.warning(" No repos matched - skipping rule.") continue matched_usernames = tuple(sorted({user["username"] for user in matched_users})) @@ -404,7 +404,7 @@ def _finish_full_set_dry_run( after_snapshot = projected_snapshot_shell(before_snapshot, plan.expected_users) log.info("Skipping dry-run snapshot files because --no-files is set.") log.info( - "Diff (before → dry-run after):\n%s", + "Diff (before -> dry-run after):\n%s", render_projected_snapshot_diff( before_snapshot, after_snapshot, @@ -453,7 +453,7 @@ def _filter_full_set_plans( if skipped_repo_ids: log.info( "Short-circuit: %d / %d planned repo(s) already at the " - "desired explicit-permissions state — skipping their " + "desired explicit-permissions state - skipping their " "setRepositoryPermissionsForUsers calls.", len(skipped_repo_ids), len(overwrites), @@ -472,7 +472,7 @@ def _overwrites_with_preserved_pending( """Resend each repo's pending bindIDs so overwrites don't delete them. `setRepositoryPermissionsForUsers` replaces a repo's whole explicit - list — real grants AND pending ones. Appending the repo's current + list - real grants AND pending ones. Appending the repo's current pending bindIDs to the payload re-creates the same pending rows in the same transaction, so the script neither creates nor loses them. """ @@ -541,7 +541,7 @@ def _apply_full_set_plans( full_short_circuit = bool(skipped_repo_ids) and not overwrites if full_short_circuit: log.info( - "All %d planned repo(s) already at the desired state — nothing to apply.", + "All %d planned repo(s) already at the desired state - nothing to apply.", len(skipped_repo_ids), ) return _FullSetApplyResult( @@ -635,7 +635,7 @@ def _finish_full_set_apply_with_backup( else: log.info("Skipping after-snapshot and diff files because --no-files is set.") log.info( - "Diff (before → after):\n%s", + "Diff (before -> after):\n%s", permission_snapshot.render_snapshot_diff(before_snapshot, after_snapshot), ) @@ -714,7 +714,7 @@ def _finish_empty_full_set_mapping_rules( command_event: dict[str, Any], worker_pool: ThreadPoolExecutor | None = None, ) -> None: - log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) + log.warning("No maps defined in %s - nothing to do.", run_paths.maps_path) if not (dry_run or do_backup): return @@ -879,7 +879,7 @@ def _finish_empty_full_set_plan( do_backup: bool, command_event: dict[str, Any], ) -> None: - log.warning("No repos resolved across any mapping — nothing to do.") + log.warning("No repos resolved across any mapping - nothing to do.") if dry_run or do_backup: _write_noop_full_set_artifacts( run_paths, diff --git a/src/src_auth_perms_sync/permissions/mapping.py b/src/src_auth_perms_sync/permissions/mapping.py index e2c8a59..1aefc98 100644 --- a/src/src_auth_perms_sync/permissions/mapping.py +++ b/src/src_auth_perms_sync/permissions/mapping.py @@ -3,7 +3,7 @@ Each mapping rule has a `users:` section and a `repos:` section. Top-level selectors under each section AND together to keep each rule restrictive. Values inside each supplied selector list OR together. Across mapping rules, -`cmd_set` unions the per-repo user sets at apply time — see +`cmd_set` unions the per-repo user sets at apply time - see `src/src_auth_perms_sync/permissions/types.py` for the rationale. Adding a new matcher type: @@ -36,10 +36,10 @@ # validators to reject typos. The mapping from matcher key to # discovered-entry key is hard-coded inside `_providers_matching` / # `_services_matching` (only `authProvider.type` differs: -# matcher `type` ↔ AuthProvider `serviceType`). +# matcher `type` <-> AuthProvider `serviceType`). # Discovered-provider fields that AND together inside `_providers_matching`. # `samlGroup` is allowed under `authProvider:` too but is not a provider -# field — it filters within the matched provider's users (see +# field - it filters within the matched provider's users (see # `_users_matching_auth_provider`). AUTH_PROVIDER_MATCHER_FIELDS: set[str] = { "type", @@ -84,7 +84,7 @@ def validate_mapping_rules(rules: Sequence[object]) -> None: Semantic warnings (e.g. an authProvider matcher with no fields set, which would match every provider on the instance) are logged at - apply time by the resolver, not raised here — they're not always + apply time by the resolver, not raised here - they're not always bugs. """ errors: list[str] = [] @@ -348,12 +348,12 @@ def resolve_users( """Return users matching ALL top-level selectors under `users:`. `saml_groups_attribute_names` overrides the default `"groups"` SAML - assertion attribute name per (serviceID, clientID) — see + assertion attribute name per (serviceID, clientID) - see `src/src_auth_perms_sync/shared/saml_groups.py`. When `None`, every SAML provider falls back to the default. Only consulted by the `authProvider.samlGroup` sub-field. - Empty sections return an empty user set — `validate_mapping_rules` + Empty sections return an empty user set - `validate_mapping_rules` rejects this at config-load time, so this branch only fires for programmatic callers. """ @@ -456,7 +456,7 @@ def _users_matching_email_values( exact_values = set(emails) matched = [user for user in all_users if _user_matches_email(user, exact_values, [])] log.info( - " emails → %d user(s) matched %d email selector(s)", + " emails -> %d user(s) matched %d email selector(s)", len(matched), len(exact_values), ) @@ -470,7 +470,7 @@ def _users_matching_email_regexes( patterns = _compiled_regexes(email_regexes) matched = [user for user in all_users if _user_matches_email(user, set(), patterns)] log.info( - " emailRegexes → %d user(s) matched %d email regex selector(s)", + " emailRegexes -> %d user(s) matched %d email regex selector(s)", len(matched), len(set(email_regexes)), ) @@ -494,7 +494,7 @@ def _users_matching_username_values( exact_values = set(usernames) matched = [user for user in all_users if _text_matches(user["username"], exact_values, [])] log.info( - " usernames → %d user(s) matched %d username selector(s)", + " usernames -> %d user(s) matched %d username selector(s)", len(matched), len(exact_values), ) @@ -508,7 +508,7 @@ def _users_matching_username_regexes( patterns = _compiled_regexes(username_regexes) matched = [user for user in all_users if _text_matches(user["username"], set(), patterns)] log.info( - " usernameRegexes → %d user(s) matched %d username regex selector(s)", + " usernameRegexes -> %d user(s) matched %d username regex selector(s)", len(matched), len(set(username_regexes)), ) @@ -557,7 +557,7 @@ def _users_matching_auth_provider( return [] for provider in matching_providers: log.info( - " authProvider → %s (type=%s serviceID=%s clientID=%s)", + " authProvider -> %s (type=%s serviceID=%s clientID=%s)", provider["displayName"], provider["serviceType"], provider["serviceID"], @@ -581,7 +581,7 @@ def _users_matching_auth_provider( matched[user["id"]] = user if saml_group: log.info( - " samlGroup → %d user(s) in group %r", + " samlGroup -> %d user(s) in group %r", len(matched), saml_group, ) @@ -734,7 +734,7 @@ def _repo_ids_matching_names( exact_values = set(names) matched = {repo["id"] for repo in repos if _repo_name_matches(repo["name"], exact_values, [])} log.info( - " names → %d repo(s) matched %d name selector(s)", + " names -> %d repo(s) matched %d name selector(s)", len(matched), len(exact_values), ) @@ -748,7 +748,7 @@ def _repo_ids_matching_name_regexes( patterns = _compiled_regexes(name_regexes) matched = {repo["id"] for repo in repos if _repo_name_matches(repo["name"], set(), patterns)} log.info( - " nameRegexes → %d repo(s) matched %d name regex selector(s)", + " nameRegexes -> %d repo(s) matched %d name regex selector(s)", len(matched), len(set(name_regexes)), ) @@ -770,7 +770,7 @@ def _repos_matching_code_host_connection( matched_repos: dict[str, permission_types.Repository] = {} for service in matching_services: log.info( - " codeHostConnection → %s (id=%d kind=%s)", + " codeHostConnection -> %s (id=%d kind=%s)", service["displayName"], src.decode_external_service_id(service["id"]), service["kind"], @@ -846,7 +846,7 @@ def _parsed_service_config(service: permission_types.ExternalService) -> dict[st if not raw_config: return {} try: - parsed = cast(Any, json5.loads(raw_config)) + parsed = json5.loads(raw_config) except ValueError: return {} if not isinstance(parsed, dict): diff --git a/src/src_auth_perms_sync/permissions/maps.py b/src/src_auth_perms_sync/permissions/maps.py index fc0a0c3..3289e0b 100644 --- a/src/src_auth_perms_sync/permissions/maps.py +++ b/src/src_auth_perms_sync/permissions/maps.py @@ -20,8 +20,8 @@ def _strip_redacted(value: Any) -> Any: Sourcegraph's `ExternalService.config` resolver replaces secrets with that literal sentinel before returning the JSONC blob (see internal/types/secret.go in sourcegraph/sourcegraph). Some redactions - live in nested arrays — e.g. GitHub `webhooks[].secret`, - `gitSSHCredential.privateKey` — so the strip is recursive. + live in nested arrays - e.g. GitHub `webhooks[].secret`, + `gitSSHCredential.privateKey` - so the strip is recursive. Lists / scalars pass through unchanged. The redaction sentinel itself, if it appears as a top-level scalar (it shouldn't, but defensively), @@ -56,22 +56,22 @@ def auth_provider_to_yaml( `src/src_auth_perms_sync/shared/site_config.py`). Any fields it carries that aren't already emitted from GraphQL are surfaced verbatim, so operators see the full provider config in the - YAML — e.g. `identityProviderMetadataURL`, `serviceProviderIssuer`, + YAML - e.g. `identityProviderMetadataURL`, `serviceProviderIssuer`, `requireEmailDomain`, `allowSignup`. Order: GraphQL-derived identity keys first, then site-config extras, then observation-derived metadata. - For SAML providers, `saml_group_user_counts` (group name → distinct + For SAML providers, `saml_group_user_counts` (group name -> distinct user count) is ALWAYS surfaced under `samlGroupUserCounts:`, even when the mapping is empty. The empty case (`{}`) tells the operator the feature is supported but the IdP didn't release any `groupsAttributeName` (default `groups`) claim in this provider's - assertions — typically because the IdP hasn't been configured to do + assertions - typically because the IdP hasn't been configured to do so. Operators authoring `authProvider.samlGroup` mapping rules can use this field to size groups before writing rules, or to learn that they need to fix their IdP first. Pass `None` (the default for non-SAML providers) to omit the field entirely. - Empty-string fields are omitted — the builtin provider has no + Empty-string fields are omitted - the builtin provider has no serviceID / clientID / configID, so those keys would just be noise. """ rendered: dict[str, Any] = {"type": provider["serviceType"]} @@ -118,7 +118,7 @@ def count_users_per_provider( (they have a password set on the builtin provider). A user can therefore be counted under multiple providers (e.g. SAML + - builtin) — this matches reality: such a user can sign in either way. + builtin) - this matches reality: such a user can sign in either way. """ seen: dict[tuple[str, str, str], set[str]] = {} for user in users: @@ -176,7 +176,7 @@ def external_service_to_yaml(service: permission_types.ExternalService) -> dict[ raw_config = service.get("config") if raw_config: try: - parsed_config = cast(Any, json5.loads(raw_config)) + parsed_config = json5.loads(raw_config) except ValueError: pass else: @@ -242,7 +242,7 @@ def _dump_readonly_discovery_yaml( def create_maps_yaml_if_missing(path: Path) -> bool: """Create the operator-edited maps file once, preserving existing files.""" content = ( - "# Auth provider → code host connection mapping rules\n" + "# Auth provider -> code host connection mapping rules\n" "# Maintain this file, using values from auth-providers.yaml " "and code-hosts.yaml as references\n" "\n" diff --git a/src/src_auth_perms_sync/permissions/queries.py b/src/src_auth_perms_sync/permissions/queries.py index ae5b64e..b4cbc71 100644 --- a/src/src_auth_perms_sync/permissions/queries.py +++ b/src/src_auth_perms_sync/permissions/queries.py @@ -300,8 +300,8 @@ def users_by_ids_batch_query( } """ -# Server-side filtered to PermissionSource.API — explicit grants only, never -# code-host-synced. We always invert (user→repos) here because +# Server-side filtered to PermissionSource.API - explicit grants only, never +# code-host-synced. We always invert (user->repos) here because # Repository.permissionsInfo.users does NOT accept a `source` filter on this # SG version, so the repo-centric direction can't cleanly distinguish # explicit-API grants from sync/site-admin grants. @@ -334,7 +334,7 @@ def users_by_ids_batch_query( # For a bindID with no matching user, this resolver falls back to the # pending-permissions store and returns the repos the bindID is pending -# on ("late binding" — see the GraphQL schema comment). That fallback is +# on ("late binding" - see the GraphQL schema comment). That fallback is # the only API that exposes WHICH repos a pending bindID has. QUERY_PENDING_USER_REPOS = """ query PendingUserRepos($bindID: String!, $first: Int!, $after: String) { diff --git a/src/src_auth_perms_sync/permissions/restore.py b/src/src_auth_perms_sync/permissions/restore.py index 5ff7405..6672a1e 100644 --- a/src/src_auth_perms_sync/permissions/restore.py +++ b/src/src_auth_perms_sync/permissions/restore.py @@ -205,14 +205,14 @@ def _validate_user_scoped_restore_snapshot_context( """Warn when a user-scoped restore target differs from the current context.""" if target_snapshot["bindID_mode"] != bind_id_mode: log.warning( - "Snapshot bindID_mode=%s differs from live bindID_mode=%s — " + "Snapshot bindID_mode=%s differs from live bindID_mode=%s - " "captured usernames may not resolve to the same users.", target_snapshot["bindID_mode"], bind_id_mode, ) if target_snapshot["endpoint"] != client.endpoint: log.warning( - "Snapshot endpoint=%s differs from live endpoint=%s — restoring " + "Snapshot endpoint=%s differs from live endpoint=%s - restoring " "across instances. Proceeding anyway; review the plan diff.", target_snapshot["endpoint"], client.endpoint, @@ -254,7 +254,7 @@ def _log_user_scoped_restore_plan( len(plan.removals), ) log.info( - "Diff (current → scoped snapshot):\n%s", + "Diff (current -> scoped snapshot):\n%s", permission_snapshot.render_user_scoped_diff( snapshot_state.current_snapshot, snapshot_state.target_snapshot, @@ -293,7 +293,7 @@ def _finish_empty_user_scoped_restore_plan( current_snapshot: permission_snapshot.UserScopedSnapshot, do_backup: bool, ) -> None: - log.info("Scoped restore target already matches current state — nothing to apply.") + log.info("Scoped restore target already matches current state - nothing to apply.") if not do_backup: return if not run_paths.write_files: @@ -467,7 +467,7 @@ def _validate_restore_snapshot_context( ) if target_snapshot["bindID_mode"] != bind_id_mode: log.warning( - "Snapshot bindID_mode=%s differs from live bindID_mode=%s — " + "Snapshot bindID_mode=%s differs from live bindID_mode=%s - " "captured bindIDs may not resolve to the same users. Proceeding " "anyway; review the plan diff carefully.", target_snapshot["bindID_mode"], @@ -475,7 +475,7 @@ def _validate_restore_snapshot_context( ) if target_snapshot["endpoint"] != client.endpoint: log.warning( - "Snapshot endpoint=%s differs from live endpoint=%s — restoring " + "Snapshot endpoint=%s differs from live endpoint=%s - restoring " "across instances. Proceeding anyway; review the plan diff.", target_snapshot["endpoint"], client.endpoint, @@ -534,7 +534,7 @@ def plan_full_restore(snapshot_state: RestoreSnapshotState) -> RestorePlan: Each overwrite carries the target's real usernames PLUS the target's pending bindIDs for that repo: `setRepositoryPermissionsForUsers` replaces both kinds in one transaction, and unresolved bindIDs become - pending rows again — restoring pending grants exactly as captured. + pending rows again - restoring pending grants exactly as captured. """ target_snapshot = snapshot_state.target_snapshot current_snapshot = snapshot_state.current_snapshot @@ -633,7 +633,7 @@ def _log_full_restore_plan(snapshot_state: RestoreSnapshotState, plan: RestorePl plan.extra_repo_count, ) log.info( - "Diff (current → snapshot):\n%s", + "Diff (current -> snapshot):\n%s", permission_snapshot.render_snapshot_diff( snapshot_state.current_snapshot, snapshot_state.target_snapshot, @@ -773,7 +773,7 @@ def _finish_restore_apply_with_backup( if residual != "No changes.": log.warning( "VALIDATION: post-restore state does NOT match the target " - "snapshot exactly. Residual diff (post-restore → snapshot):\n%s", + "snapshot exactly. Residual diff (post-restore -> snapshot):\n%s", residual, ) else: diff --git a/src/src_auth_perms_sync/permissions/snapshot.py b/src/src_auth_perms_sync/permissions/snapshot.py index 42b8911..bbadf8c 100644 --- a/src/src_auth_perms_sync/permissions/snapshot.py +++ b/src/src_auth_perms_sync/permissions/snapshot.py @@ -62,7 +62,7 @@ class Snapshot(TypedDict): config_file: str | None # absolute path of the YAML, if known config_sha256: str | None # sha256 of the YAML at capture time # Explicit-API grants whose bindID has not resolved to a real user yet - # ("grant before first login"): bindID → repos it is pending on. + # ("grant before first login"): bindID -> repos it is pending on. pending_users: dict[str, list[permission_types.Repository]] stats: SnapshotStats repos: dict[str, RepoSnapshot] @@ -184,7 +184,7 @@ class UserScopedSnapshotDiff(TypedDict): def pending_bind_ids_by_repository_id( pending_users: dict[str, list[permission_types.Repository]], ) -> dict[str, list[str]]: - """Invert bindID → repos into repo ID → sorted pending bindIDs.""" + """Invert bindID -> repos into repo ID -> sorted pending bindIDs.""" bind_ids_by_repository_id: dict[str, list[str]] = {} for bind_id, repositories in pending_users.items(): for repository in repositories: @@ -197,7 +197,7 @@ def pending_bind_ids_by_repository_id( def pending_repository_names_by_id( pending_users: dict[str, list[permission_types.Repository]], ) -> dict[str, str]: - """Return repo ID → name for every repo referenced by pending grants.""" + """Return repo ID -> name for every repo referenced by pending grants.""" return { repository["id"]: repository["name"] for repositories in pending_users.values() @@ -234,9 +234,9 @@ def capture_explicit_grants( """Build the per-repo inverse index of explicit-API grants. Fetches `user.permissionsInfo.repositories(source: API)` for batches of - users in parallel via a thread pool, then inverts to `repo_id → RepoSnapshot`. + users in parallel via a thread pool, then inverts to `repo_id -> RepoSnapshot`. - Accepts any `Iterable[User]` — including a streaming generator from + Accepts any `Iterable[User]` - including a streaming generator from `list_users_streaming`. When passed a streaming source, this function submits batched UserExplicitRepos calls **while** iterating, so the submission loop blocking on the next ListUsers page overlaps with @@ -261,9 +261,9 @@ def capture_explicit_grants( if selected_repository_ids is not None and not selected_repository_ids: # No repos selected (e.g. --repos-created-after matched nothing): no # per-user permission lookup could contribute anything, so skip them. - # Still drain the users iterable — callers pass recording streams + # Still drain the users iterable - callers pass recording streams # whose side effects feed later phases (mapping, SAML extraction). - log.info("No repositories selected — skipping the explicit-permissions lookups.") + log.info("No repositories selected - skipping the explicit-permissions lookups.") return {}, sum(1 for _ in users) # Invert directly as each per-user fetch completes. Store only repo IDs @@ -359,7 +359,7 @@ def _fetch_one_user_at_a_time( progress_step = max(1, expected_user_count // 10) if expected_user_count else 1000 # Start the timer BEFORE submission. Iterating `users` may block on # ListUsers pagination, but workers process already-submitted tasks - # during those blocks — so progress reflects real wall-clock work. + # during those blocks - so progress reflects real wall-clock work. progress_started = time.perf_counter() completed = 0 next_progress_report = progress_step @@ -944,7 +944,7 @@ def write_snapshot(path: Path, snapshot: Snapshot) -> None: Repo IDs are decoded from their opaque GraphQL Node form (`Repository:` base64) to plain integer DB primary keys before - write — they're far easier to grep, diff, and read by eye. + write - they're far easier to grep, diff, and read by eye. `read_snapshot` re-encodes them on load so the in-memory shape (and every consumer of `Snapshot`) keeps using opaque IDs unchanged. """ diff --git a/src/src_auth_perms_sync/permissions/workflow.py b/src/src_auth_perms_sync/permissions/workflow.py index 662c70b..db1fa4e 100644 --- a/src/src_auth_perms_sync/permissions/workflow.py +++ b/src/src_auth_perms_sync/permissions/workflow.py @@ -130,7 +130,7 @@ def load_mapping_context( """Load maps, providers, services, and repos for permission planning.""" mapping_rules = load_mapping_rules(input_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", input_path) + log.warning("No maps defined in %s - nothing to do.", input_path) return None return load_mapping_context_for_rules( @@ -520,8 +520,8 @@ def validate_post_apply( 1. Pending bindIDs: any username we just wrote that didn't resolve to a real User now appears in `usersWithPendingPermissions`. In our use - case this should never happen — we enumerate users via the users - query before mutating — but it's a cheap safety net. + case this should never happen - we enumerate users via the users + query before mutating - but it's a cheap safety net. 2. Per-repo expected vs. actual: for every repo we touched, the after-snapshot's explicit-user list must equal the union we asked diff --git a/src/src_auth_perms_sync/shared/saml_groups.py b/src/src_auth_perms_sync/shared/saml_groups.py index f0d9275..548eadb 100644 --- a/src/src_auth_perms_sync/shared/saml_groups.py +++ b/src/src_auth_perms_sync/shared/saml_groups.py @@ -6,7 +6,7 @@ attribute named by the provider's `groupsAttributeName` site config (default `"groups"`). -This module does NOT fetch — it only parses user rows fetched with +This module does NOT fetch - it only parses user rows fetched with `include_account_data=True`. Two on-disk shapes are handled defensively: 1. Raw `*saml2.AssertionInfo`: @@ -135,7 +135,7 @@ def extract_saml_groups( """Pull the group-name strings out of one SAML `accountData` blob. Returns `[]` for null/empty data, missing attribute, or unknown shape - — never raises. Duplicate group names within one assertion are + - never raises. Duplicate group names within one assertion are de-duplicated; ordering is preserved. """ if not account_data: @@ -268,7 +268,7 @@ def compact_scoped_saml_group_users( ) -> list[shared_types.ScopedSamlGroupUser]: """Compact in-scope users for a scoped (per-user) organization sync. - Every user is kept — even with zero SAML group memberships — because + Every user is kept - even with zero SAML group memberships - because scoped org sync also removes users from synced orgs they left. Each user's row must have been fetched with `include_organizations=True`; only `synced-` prefixed org memberships are retained. @@ -312,7 +312,7 @@ def count_users_per_saml_group( when no site config is available; every provider then falls back to the default. - A user is counted at most once per (provider, group) — multiple + A user is counted at most once per (provider, group) - multiple accounts under the same provider with overlapping groups don't double-count, and groups that don't appear in any user's assertion don't appear in the output at all. diff --git a/src/src_auth_perms_sync/shared/site_config.py b/src/src_auth_perms_sync/shared/site_config.py index 560bb51..f8ee33a 100644 --- a/src/src_auth_perms_sync/shared/site_config.py +++ b/src/src_auth_perms_sync/shared/site_config.py @@ -14,7 +14,7 @@ # HTTP statuses that genuinely indicate the access token can't read site # config (missing Site Admin role / SITE_CONFIG#READ). Everything else # (5xx, network, parse, etc.) is a transport / server failure, not an -# authorization problem — say so instead of misleading the operator. +# authorization problem - say so instead of misleading the operator. AUTHORIZATION_HTTP_STATUSES = frozenset({401, 403}) log = logging.getLogger(__name__) @@ -30,14 +30,14 @@ class SiteConfig: """ bind_id_mode: str - """`"USERNAME"` (the only value validate_site_config accepts) — kept + """`"USERNAME"` (the only value validate_site_config accepts) - kept for downstream snapshot / apply layers that pass it through.""" auth_providers_by_config_id: dict[str, dict[str, Any]] """Raw `auth.providers[*]` site-config entries keyed by explicit `configID`, with redacted/secret fields stripped (see `_strip_sensitive_provider_fields`). Entries without an explicit - `configID` are dropped — Sourcegraph synthesizes one as a content- + `configID` are dropped - Sourcegraph synthesizes one as a content- addressed hash we can't safely replicate from Python. Surfaced to `cmd_get` so the YAML config carries every non-secret @@ -54,14 +54,14 @@ class SiteConfig: Only populated for SAML site-config entries that set BOTH a non- default `groupsAttributeName` AND an explicit `configID`. Entries that customize `groupsAttributeName` without setting `configID` - are skipped (with a warning) — Sourcegraph synthesizes a `configID` + are skipped (with a warning) - Sourcegraph synthesizes a `configID` of `:` for them internally, but that synthesis is an implementation detail and order-dependent. Operators who need this override should set explicit `configID` on each affected SAML provider in site config. Providers without an entry fall back to - `DEFAULT_GROUPS_ATTRIBUTE_NAME` (`"groups"`) — the same default + `DEFAULT_GROUPS_ATTRIBUTE_NAME` (`"groups"`) - the same default Sourcegraph itself uses when `groupsAttributeName` is unset, so the fallback is safe.""" @@ -118,7 +118,7 @@ def validate_site_config(client: src.SourcegraphClient) -> SiteConfig: safety_errors.append( "auth.enableUsernameChanges must be `false` (currently true). " "Username-keyed permissions become unstable if users can rename " - "themselves — a user could rename into another user's old name " + "themselves - a user could rename into another user's old name " "and inherit their permissions." ) @@ -136,7 +136,7 @@ def validate_site_config(client: src.SourcegraphClient) -> SiteConfig: "Site-config safety requirements not met:" + bullet + bullet.join(safety_errors) - + "\n\nFix: edit site config (Site admin → Configuration) so it " + + "\n\nFix: edit site config (Site admin -> Configuration) so it " "includes:\n" ' "permissions.userMapping": { "enabled": true, "bindID": "username" },\n' ' "auth.enableUsernameChanges": false' @@ -217,14 +217,14 @@ def _query_site_configuration( # Sourcegraph's `effectiveContents` resolver redacts secrets by replacing # them with this literal sentinel string (see internal/conf/validate.go # in sourcegraph/sourcegraph). Any field carrying this value is stripped -# from the YAML — value-based, so it stays correct if Sourcegraph adds +# from the YAML - value-based, so it stays correct if Sourcegraph adds # more redactions in the future without us having to enumerate them. REDACTED_SENTINEL = "REDACTED" # SAML fields Sourcegraph does NOT redact but we still drop: # private keys / certs / inline IdP metadata blobs are large secrets that # don't belong in a config-discovery YAML. The URL-form -# (`identityProviderMetadataURL`) is kept — it's a reference, not a blob. +# (`identityProviderMetadataURL`) is kept - it's a reference, not a blob. _DROPPED_PROVIDER_FIELDS: frozenset[str] = frozenset( { "serviceProviderPrivateKey", @@ -249,7 +249,7 @@ def _extract_auth_providers_by_config_id( ) -> dict[str, dict[str, Any]]: """`auth.providers[*]` site-config entries keyed by explicit `configID`, with secrets stripped. Entries without an explicit `configID` are - silently skipped — see `SiteConfig.auth_providers_by_config_id` for + silently skipped - see `SiteConfig.auth_providers_by_config_id` for the rationale.""" by_config_id: dict[str, dict[str, Any]] = {} raw_providers = contents.get("auth.providers") @@ -363,7 +363,7 @@ def _missing_config_id_error(entry: dict[str, Any], attribute_name: str) -> str: " Why: this script needs a stable name to refer to your SAML\n" " provider. If you don't set `configID`, Sourcegraph generates\n" " one for you, but that auto-generated value silently changes\n" - " whenever you edit any field on the provider — which would\n" + " whenever you edit any field on the provider - which would\n" " break this script the next time you re-run it after a\n" " site-config edit." ) diff --git a/src/src_auth_perms_sync/shared/sourcegraph.py b/src/src_auth_perms_sync/shared/sourcegraph.py index 969a66f..53bbe7a 100644 --- a/src/src_auth_perms_sync/shared/sourcegraph.py +++ b/src/src_auth_perms_sync/shared/sourcegraph.py @@ -70,7 +70,7 @@ def list_users_streaming( If `collect_into` is provided, every yielded user is appended to that list, so the caller ends up with the materialized list AND the - streaming benefit in one pass — no double-pagination. + streaming benefit in one pass - no double-pagination. """ for node in client.stream_connection_nodes( queries.query_users( diff --git a/tests/README.md b/tests/README.md index 20dce24..63c438c 100644 --- a/tests/README.md +++ b/tests/README.md @@ -14,9 +14,9 @@ All testing is driven by one entrypoint and one case registry: ## How the pieces fit ```text -tests.yaml ──registry──▶ e2e/case_runner.py ◀──imports── run.py - ▲ (--local/--live/--performance) - │ +tests.yaml --registry--> e2e/case_runner.py <--imports-- run.py + ^ (--local/--live/--performance) + | e2e/test_local_cases.py (unittest discovery: local cases + registry validation) ``` @@ -24,15 +24,15 @@ tests.yaml ──registry──▶ e2e/case_runner.py ◀──imports── run - `case_runner.py` is a library, not a test module: it executes registry cases without any network. Both consumers above import it. - `test_local_cases.py` exists so plain `uv run python -m unittest discover - -s tests` asserts every local case with no orchestrator — which is exactly + -s tests` asserts every local case with no orchestrator - which is exactly what run.py's "unit + fixture tests" gate, the release checklist, and CI run. -- Live and performance execution (instance prerequisites, seed → apply → - verify → restore, traces, sampling) lives only in `run.py`. +- Live and performance execution (instance prerequisites, seed -> apply -> + verify -> restore, traces, sampling) lives only in `run.py`. ## Files in a fixture case directory -A directory is only needed when the case uses files — a read-only non-set +A directory is only needed when the case uses files - a read-only non-set command can be registered in tests.yaml with no directory at all. - `before.json`: Full instance state before the run: providers, services, users, @@ -50,48 +50,48 @@ files (e.g. `test_user_09991`, `test-repo-49981`), and exact selectors only ## What each mode does with a case -- **local** — runs every state case TWICE against an in-memory instance +- **local** - runs every state case TWICE against an in-memory instance built from `before.json`: once with `cliCommand` through the real argument parser, and once through the Python import API with a Config - derived from the same command line — both must produce the exact + derived from the same command line - both must produce the exact `after.json` state, proving CLI/import parity for every behavior. An explicit `importConfig` overrides the derived one (to pin specific kwargs spellings). Replay-style cases (`expectedExitCode`/`expectedOutput`) assert parser behavior instead and need no files. -- **live** — FUNCTIONAL tier: fast, scoped checks against the `.env` test +- **live** - FUNCTIONAL tier: fast, scoped checks against the `.env` test instance; the whole tier should take minutes. Read-only commands assert exit code and output. Mutating `set --apply` commands run the full cycle: seed the `before.json` state onto the involved repos, run, verify the result with an independent GraphQL read-back, then restore the original state. Seeding and restoring write the involved repos directly via - GraphQL — never through the product's `restore` command, whose full + GraphQL - never through the product's `restore` command, whose full instance capture takes minutes at 10k users and whose whole-instance semantics clobber concurrent runs. Cases may declare `live.involvedRepos` (extra repos to read/seed/restore; the ones absent from `after.json` are - canaries that must come back unchanged — this is how widened regex + canaries that must come back unchanged - this is how widened regex selectors get caught) and `live.usersWithoutOtherGrants` (preflight: named users must hold no grants outside the involved repos). Cases whose main command intrinsically scans the whole instance (full captures, candidate scans over all users/repos) belong in **performance**, not live. -- **performance** — SCALE tier: same workflow as live, but timed and +- **performance** - SCALE tier: same workflow as live, but timed and measured (traces, RSS sampling, TSV row), and the place for cases whose commands walk all 10k users / 50k repos. Run deliberately, not pre-commit. The legacy whole-instance stress cycle (`set --full` with the - root maps.yaml — 10k users x ~1,150 repos, known to crash the test + root maps.yaml - 10k users x ~1,150 repos, known to crash the test instance's Postgres) is opt-in only: `uv run tests/run.py --live "full cycle"`. Two live flows need seeding beyond the registry's repo-grant model, so they live as harness checks in run.py rather than tests.yaml cases: -- **`live: sync-saml-orgs seeded`** — diverges one synthetic-group org's +- **`live: sync-saml-orgs seeded`** - diverges one synthetic-group org's membership both ways (adds a member no SAML group justifies, removes a member the group requires), then one `sync-saml-orgs --apply` must converge every synthetic-group org back to SAML truth, verified by an independent member read-back. -- **`live: perms follow saml group change`** — proves a user added to a +- **`live: perms follow saml group change`** - proves a user added to a mapped SAML group gains the mapped perms: baseline apply with the saml-group-live mapping, then the fabricated SAML account of a non-member gains the group (setup.py's SQL path), the same apply runs @@ -101,13 +101,13 @@ they live as harness checks in run.py rather than tests.yaml cases: Functional coverage of scale-only code paths (pagination, batch stepping, dedupe) does NOT require scale data: the local fake serves site-user pages of at most 2 (`SITE_USERS_PAGE_CAP` in `e2e/case_runner.py`), so a fixture -with 4 users already spans 2 pages — that is what catches selection +with 4 users already spans 2 pages - that is what catches selection truncation bugs locally in milliseconds. ## Instance state: setup.py / setup.yaml [setup.py](./setup.py) converges the test instance to the desired state in -[setup.yaml](./setup.yaml) — run it BEFORE `run.py --live`: +[setup.yaml](./setup.yaml) - run it BEFORE `run.py --live`: ```bash uv run tests/setup.py # report drift, change nothing @@ -119,7 +119,7 @@ user's email that drifted from `{username}@perms-sync.test`, fabricates SAML external accounts (group claims for `samlGroups` live cases, written via SQL on the pgsql pod and verified back through the product's own GraphQL parser), and deletes orphaned explicit grants attached to -soft-deleted repos (unreachable rows — the only state it ever removes). +soft-deleted repos (unreachable rows - the only state it ever removes). Pending permissions and grants on live repos are REPORTED, never deleted: our suite doesn't create them, so their origin is unknown and removal is a human decision. GraphQL is used for instance-level reads; bulk state @@ -130,7 +130,7 @@ creates or deletes users itself. Live cases declare their identity preconditions in tests.yaml: `live.requiredSamlGroups` (preflight: fabricated accounts must match, with a pointer to setup.py on drift) and `live.temporaryUsers` (the harness -creates the named users fresh via `createUser` — `created_at` = now — and +creates the named users fresh via `createUser` - `created_at` = now - and hard-deletes them afterwards; `{today}` in a cliCommand resolves to the run's UTC date, which makes positive `--created-after` selection deterministic against the long-pre-existing synthetic users). @@ -140,7 +140,7 @@ deterministic against the long-pre-existing synthetic users). `uv run tests/run.py --install` pip-installs the **published** package into a clean venv (`--install-python`, default `python3.13`) and runs every `--help` command, asserting exit 0 and usage output. It needs network to pypi.org -only — no Sourcegraph instance. `--install-package` pins a version +only - no Sourcegraph instance. `--install-package` pins a version (`src-auth-perms-sync==1.2.3`) or points at a wheel path. This complements the live tier's "wheel install smoke", which builds and installs the *local* wheel; CI separately installs the locally-built wheel in @@ -154,7 +154,7 @@ operators actually download. 2. Either write `after.json` by hand (strongest: states your intent), or run `uv run tests/run.py --update-golden` to generate it from the actual result. -3. **Review `after.json` carefully** — it is the assertion. Confirm every +3. **Review `after.json` carefully** - it is the assertion. Confirm every added/removed grant is what you intended before committing. 4. Run `uv run tests/run.py` to confirm the suite passes. The unit tests fail on unregistered fixture directories, missing required files, or diff --git a/tests/confusables.py b/tests/confusables.py deleted file mode 100644 index 04f3d0c..0000000 --- a/tests/confusables.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python3 -"""Fail on confusable Unicode characters in non-Python tracked files. - -Ruff's RUF001-RUF003 rules catch confusable characters (en dash for -hyphen, multiplication sign for x, Cyrillic lookalike letters, ...) in -Python strings, comments, and docstrings. This script applies the same -policy to every other tracked text file: Markdown, YAML, TOML, and -GitHub Actions workflows. - -The character set below mirrors ruff's curated subset of the Unicode -confusables list (UTS #39): characters that render like ASCII and sneak -in through copy-paste. Intentional prose characters that look nothing -like ASCII (em dash, arrows, box drawing, check marks) are NOT flagged. - -Usage: uv run python tests/confusables.py -Exit code 1 when any confusable character is found. -""" - -from __future__ import annotations - -import subprocess -import sys -import unicodedata -from pathlib import Path - -# char -> suggested ASCII replacement. Grouped by accident type. -CONFUSABLE_SUGGESTIONS: dict[str, str] = { - # Quotes and apostrophes - "\u2018": "'", # left single quotation mark - "\u2019": "'", # right single quotation mark - "\u201a": ",", # single low-9 quotation mark - "\u201b": "'", # single high-reversed-9 quotation mark - "\u201c": '"', # left double quotation mark - "\u201d": '"', # right double quotation mark - "\u201e": '"', # double low-9 quotation mark - "\u2032": "'", # prime - "\u2033": '"', # double prime - "\u00b4": "'", # acute accent - "\u02b9": "'", # modifier letter prime - "\u02bb": "'", # modifier letter turned comma - "\u02bc": "'", # modifier letter apostrophe - "\u02c8": "'", # modifier letter vertical line - # Dashes and minus signs (em dash is NOT confusable; it stays legal) - "\u2010": "-", # hyphen - "\u2011": "-", # non-breaking hyphen - "\u2012": "-", # figure dash - "\u2013": "-", # en dash - "\u2212": "-", # minus sign - # Other punctuation and symbols - "\u00d7": "x", # multiplication sign - "\u2044": "/", # fraction slash - "\u2215": "/", # division slash - "\u037e": ";", # greek question mark - "\u0387": ":", # greek ano teleia - "\u2236": ":", # ratio - "\u201f": '"', # double high-reversed-9 quotation mark - "\u02d0": ":", # modifier letter triangular colon - "\u0589": ":", # armenian full stop - "\u06d4": ".", # arabic full stop - "\u2024": ".", # one dot leader - "\u22c5": "*", # dot operator - "\u2219": "*", # bullet operator - # Spaces that render like a plain space - "\u00a0": " ", # no-break space - "\u2000": " ", # en quad - "\u2001": " ", # em quad - "\u2002": " ", # en space - "\u2003": " ", # em space - "\u2004": " ", # three-per-em space - "\u2005": " ", # four-per-em space - "\u2006": " ", # six-per-em space - "\u2007": " ", # figure space - "\u2008": " ", # punctuation space - "\u2009": " ", # thin space - "\u200a": " ", # hair space - "\u202f": " ", # narrow no-break space - "\u205f": " ", # medium mathematical space - "\u3000": " ", # ideographic space - # Invisible characters: suggest deleting them - "\u00ad": "", # soft hyphen - "\u061c": "", # arabic letter mark - "\u200b": "", # zero width space - "\u200c": "", # zero width non-joiner - "\u200d": "", # zero width joiner - "\u200e": "", # left-to-right mark - "\u200f": "", # right-to-left mark - "\u2028": "", # line separator - "\u2029": "", # paragraph separator - "\u202a": "", # left-to-right embedding - "\u202b": "", # right-to-left embedding - "\u202c": "", # pop directional formatting - "\u202d": "", # left-to-right override - "\u202e": "", # right-to-left override (Trojan Source attacks) - "\u2060": "", # word joiner - "\u2061": "", # function application - "\ufeff": "", # zero width no-break space / BOM - # Cyrillic letters that render like Latin - "\u0430": "a", - "\u0435": "e", - "\u043e": "o", - "\u0440": "p", - "\u0441": "c", - "\u0443": "y", - "\u0445": "x", - "\u0455": "s", - "\u0456": "i", - "\u0458": "j", - "\u0410": "A", - "\u0412": "B", - "\u0415": "E", - "\u041a": "K", - "\u041c": "M", - "\u041d": "H", - "\u041e": "O", - "\u0420": "P", - "\u0421": "C", - "\u0422": "T", - "\u0425": "X", - "\u0405": "S", - "\u0406": "I", - "\u0408": "J", - # Greek letters that render like Latin - "\u0391": "A", - "\u0392": "B", - "\u0395": "E", - "\u0396": "Z", - "\u0397": "H", - "\u0399": "I", - "\u039a": "K", - "\u039c": "M", - "\u039d": "N", - "\u039f": "O", - "\u03a1": "P", - "\u03a4": "T", - "\u03a5": "Y", - "\u03a7": "X", - "\u03bf": "o", - "\u03c5": "u", -} - -# Fullwidth ASCII variants (U+FF01..U+FF5E) map to ASCII by fixed offset. -FULLWIDTH_FIRST = 0xFF01 -FULLWIDTH_LAST = 0xFF5E -FULLWIDTH_TO_ASCII_OFFSET = 0xFEE0 - -# Ruff (RUF001-RUF003 with PLC2401/PLC2403 available) owns Python files. -RUFF_OWNED_SUFFIXES = {".py"} - - -def suggestion_for(character: str) -> str | None: - """Return the ASCII replacement for a confusable character, else None.""" - known = CONFUSABLE_SUGGESTIONS.get(character) - if known is not None: - return known - if FULLWIDTH_FIRST <= ord(character) <= FULLWIDTH_LAST: - return chr(ord(character) - FULLWIDTH_TO_ASCII_OFFSET) - return None - - -def findings_in_text(text: str) -> list[tuple[int, int, str, str]]: - """Return (line, column, character, suggestion) findings, 1-based.""" - findings: list[tuple[int, int, str, str]] = [] - for line_number, line in enumerate(text.splitlines(), start=1): - for column_number, character in enumerate(line, start=1): - suggestion = suggestion_for(character) - if suggestion is not None: - findings.append((line_number, column_number, character, suggestion)) - return findings - - -def tracked_files(root: Path) -> list[Path]: - """Return tracked files the gate should scan.""" - listing = subprocess.run( - ["git", "ls-files", "-z"], - capture_output=True, - text=True, - check=True, - cwd=root, - ) - return [ - root / name - for name in listing.stdout.split("\0") - if name and Path(name).suffix not in RUFF_OWNED_SUFFIXES - ] - - -def describe(character: str) -> str: - name = unicodedata.name(character, f"U+{ord(character):04X}") - return f"`{character}` ({name})" - - -def main() -> int: - root = Path(__file__).resolve().parent.parent - finding_count = 0 - for path in tracked_files(root): - try: - text = path.read_text(encoding="utf-8") - except (UnicodeDecodeError, FileNotFoundError, IsADirectoryError): - continue # binary or vanished files are not lintable text - for line_number, column_number, character, suggestion in findings_in_text(text): - replacement = f"`{suggestion}`" if suggestion else "deleting it" - print( - f"{path.relative_to(root)}:{line_number}:{column_number} " - f"confusable {describe(character)} - did you mean {replacement}?" - ) - finding_count += 1 - if finding_count: - print(f"\nFound {finding_count} confusable character(s).") - return 1 - print("No confusable characters found.") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index 9db6b8a..9906497 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -112,7 +112,7 @@ class FixtureCase(TypedDict): description: str modes: NotRequired[list[str]] # local, live, performance (default: [local]) # State cases declare `args`: the command plus Config fields. The CLI - # argv is GENERATED from it (field names → real cli_flag metadata) and + # argv is GENERATED from it (field names -> real cli_flag metadata) and # the import API consumes it directly, so one mapping drives both # entrypoints. Replay cases declare a raw `cliCommand` instead, because # their point is argv-level parser behavior. @@ -598,7 +598,7 @@ def _repository_names_by_id(self, variables: dict[str, object]) -> dict[str, Any def _set_repo_permissions(self, variables: dict[str, object]) -> None: """Mirror the real resolver: bindIDs matching a user become explicit - grants; the rest replace the repo's pending rows — both in one call.""" + grants; the rest replace the repo's pending rows - both in one call.""" repository_id = self._repository_integer_id(variables["repo"]) user_permissions = cast(list[dict[str, str]], variables["userPerms"]) bind_ids = {user_permission["bindID"] for user_permission in user_permissions} @@ -621,7 +621,7 @@ def _pending_bind_ids(self) -> list[str]: def _authorized_user_repositories(self, bind_id_value: object) -> list[dict[str, Any]]: """Real users get their explicit repos; unknown bindIDs fall back to - the pending store — the server's "late binding" behavior.""" + the pending store - the server's "late binding" behavior.""" if not isinstance(bind_id_value, str): raise AssertionError("bindID variable must be a string") user = self._users_by_username.get(bind_id_value) @@ -796,7 +796,7 @@ def case_runners(case: FixtureCase) -> list[str]: Every state case (declared via `args`) runs BOTH ways: the generated command line through the real argument parser, and the same mapping - through the Python import API — both must produce the same state, + through the Python import API - both must produce the same state, proving CLI/import parity for every behavior. Replay cases assert parser behavior on a raw cliCommand, which has no import equivalent. """ @@ -808,8 +808,8 @@ def case_runners(case: FixtureCase) -> list[str]: def cli_flags_by_field_name() -> dict[str, str]: """Map Config field names to their real CLI flags (from field metadata). - Mechanical snake→kebab casing would be wrong for several fields - (e.g. open_telemetry → --otel, sync_saml_orgs → + Mechanical snake->kebab casing would be wrong for several fields + (e.g. open_telemetry -> --otel, sync_saml_orgs -> --sync-saml-orgs), so the generator reads the same metadata the argument parser is built from. """ @@ -823,8 +823,8 @@ def cli_flags_by_field_name() -> dict[str, str]: def case_cli_arguments(case: FixtureCase, case_name: str) -> list[str]: """Return the case's argv: generated from `args`, or raw cliCommand. - Generated values render as: True → bare flag, False/None → omitted, - list → one comma-joined value, anything else → str(). --maps-path is + Generated values render as: True -> bare flag, False/None -> omitted, + list -> one comma-joined value, anything else -> str(). --maps-path is appended for set commands that do not declare maps_path. """ args = case.get("args") @@ -949,7 +949,7 @@ def cli_input_for_case( if command_name == "set" and "maps_path" not in options: options["maps_path"] = FIXTURES_DIR / case_name / "maps.yaml" # Keyword construction (not model_copy) so pydantic validates and - # coerces values exactly as it would for a library consumer — strings + # coerces values exactly as it would for a library consumer - strings # become Paths, lists become tuples. config = cli.Config( src_endpoint=endpoint, @@ -1003,7 +1003,7 @@ def run_fixture_case( cli_input = cli_input_for_case(case, case_name, client.endpoint, runner) # Local runs execute in-process against the in-memory fake, where # client parallelism buys nothing and only adds scheduling - # nondeterminism — pin it to 1 regardless of the case's command + # nondeterminism - pin it to 1 regardless of the case's command # line. Live/performance runs use the command line as written. config_updates: dict[str, object] = {"parallelism": 1} if no_files: diff --git a/tests/e2e/test_local_cases.py b/tests/e2e/test_local_cases.py index f899213..3277eec 100644 --- a/tests/e2e/test_local_cases.py +++ b/tests/e2e/test_local_cases.py @@ -1,8 +1,8 @@ """Assert every tests.yaml case that runs locally, and validate the registry. Live and performance execution happens in tests/run.py; here, all local-mode -cases run without any network — state cases against an in-memory instance, -replay cases through the real argument parser — and every registry entry is +cases run without any network - state cases against an in-memory instance, +replay cases through the real argument parser - and every registry entry is structurally validated, including the live/performance ones. """ @@ -180,8 +180,8 @@ def test_in_memory_mapping_rules_reject_invalid_structure(self) -> None: def test_no_files_set_dry_run_matches_files_enabled(self) -> None: """no_files must not change a set dry-run's decisions, and writes nothing. - The same fixture case runs twice through the import API — once with - files enabled and once with no_files — and must plan identical + The same fixture case runs twice through the import API - once with + files enabled and once with no_files - and must plan identical mutations and end in identical instance state, while the no_files run's temporary artifacts directory stays completely empty. """ diff --git a/tests/run.py b/tests/run.py index f12c68c..cc92950 100644 --- a/tests/run.py +++ b/tests/run.py @@ -204,7 +204,7 @@ # identical structured records; this flag drops them from BOTH handlers. SUPPRESS_PACKAGE_LOGS = threading.Event() -# With --quiet, package chatter stays out of the console entirely — including +# With --quiet, package chatter stays out of the console entirely - including # the expected warnings produced by intentionally-failing cases. Runner # failures are still shown (they log at ERROR), and the file keeps everything. CONSOLE_QUIET = threading.Event() @@ -248,7 +248,7 @@ def filter(self, record: logging.LogRecord) -> bool: class FileLogFormatter(logging.Formatter): - """Time-of-day prefix only — the date is in the log file name. + """Time-of-day prefix only - the date is in the log file name. Command output lines pass through verbatim: the subprocess already timestamps its own lines, so run.py's prefix would just repeat it. @@ -831,8 +831,8 @@ class TestSuite: def record(self, name: str, level: str, passed: bool, seconds: float, detail: str = "") -> None: self.results.append(CheckResult(name, level, passed, seconds, detail)) - marker = "✓" if passed else "✗" - suffix = f" — {detail}" if detail and not passed else "" + marker = "PASS" if passed else "FAIL" + suffix = f" - {detail}" if detail and not passed else "" log.log( logging.INFO if passed else logging.ERROR, "%s [%s] %s (%.1fs)%s", @@ -861,7 +861,7 @@ def test_selected(self, *names: str) -> bool: def explicitly_selected(self, *names: str) -> bool: """Return whether a filter token names one of `names`. - Unlike `test_selected`, returns False when no filter was given — + Unlike `test_selected`, returns False when no filter was given - for checks that must be opt-in (instance-wide stress runs). """ if not self.arguments.test_filter: @@ -952,10 +952,7 @@ def run_toolchain_gates(self) -> None: self.gate("ruff check", ["uv", "run", "ruff", "check", "."]) self.gate("ruff format --check", ["uv", "run", "ruff", "format", "--check", "."]) self.gate("pyright", ["uv", "run", "pyright"]) - self.gate( - "confusable characters (non-Python files)", - ["uv", "run", "python", "tests/confusables.py"], - ) + self.gate("non-ASCII characters", ["uv", "run", "python", "tests/unicode_scan.py"]) self.gate( "unit + fixture tests", ["uv", "run", "python", "-m", "unittest", "discover", "-s", "tests"], @@ -980,7 +977,7 @@ def run_fixture_checks(self, update_golden: bool) -> None: if is_replay_case(case): if update_golden: continue - log.info("— %s (parse) —", case_name) + log.info("- %s (parse) -", case_name) started = time.monotonic() failure = run_local_replay_case(case_name) self.record( @@ -997,7 +994,7 @@ def run_fixture_checks(self, update_golden: bool) -> None: self._update_golden_after(FIXTURES_DIR / case_name, result) continue for runner in runners: - log.info("— %s (%s) —", case_name, runner) + log.info("- %s (%s) -", case_name, runner) started = time.monotonic() result = run_fixture_case(case_name, runner) self.record( @@ -1036,7 +1033,7 @@ def _update_golden_after(self, case_directory: Path, result: FixtureRunResult) - return after_path.write_text(json.dumps(result.actual_state, indent=2) + "\n", encoding="utf-8") log.info( - "golden: %s after.json updated — review the diff before committing", + "golden: %s after.json updated - review the diff before committing", case_directory.name, ) @@ -1424,12 +1421,12 @@ def check_live_hygiene(self) -> None: details: list[str] = [] if synthetic: details.append( - f"synthetic leftovers from an interrupted run: {synthetic[:5]} — " + f"synthetic leftovers from an interrupted run: {synthetic[:5]} - " "`uv run tests/setup.py --apply` clears them" ) if unknown: details.append( - f"pending bindIDs of unknown origin: {unknown[:5]} — investigate " + f"pending bindIDs of unknown origin: {unknown[:5]} - investigate " "before clearing (an empty setRepositoryPermissionsForUsers on the " "affected repo removes its pending rows)" ) @@ -1629,8 +1626,8 @@ def run_seeded_fixture_apply( ) -> None: """Seed the case's before-state, run it with --apply, verify, restore. - Every involved repo — fixture state repos, exact rule names, and any - declared `live.involvedRepos` — is read, seeded, verified, and + Every involved repo - fixture state repos, exact rule names, and any + declared `live.involvedRepos` - is read, seeded, verified, and restored, all SCOPED to those repos via direct GraphQL (seconds), never through the product's restore command (which performs a full instance capture: minutes at 10k users, and whole-instance restore @@ -1734,7 +1731,7 @@ def run_seeded_fixture_apply( return # Repos in scope but absent from after.json must come back exactly as - # seeded — these are the canaries that detect widened selectors. + # seeded - these are the canaries that detect widened selectors. expected_after = { name: after_grants.get(name, before_grants.get(name, set())) for name in involved_names } @@ -1930,8 +1927,8 @@ def run_seeded_org_sync_check(self, environment: dict[str, str]) -> None: The fabricated SAML accounts (tests/setup.yaml samlAccounts) define the desired members of the throwaway orgs derived from the synthetic - groups. Seeding makes one org diverge both ways — a member no SAML - group justifies, and a missing member the group requires — then one + groups. Seeding makes one org diverge both ways - a member no SAML + group justifies, and a missing member the group requires - then one `sync-saml-orgs --apply` must converge every synthetic-group org back to SAML truth, verified by an independent member read-back. """ @@ -2004,7 +2001,7 @@ def seed_organization_divergence( """Force one org's membership to diverge from SAML truth in both directions. Adds `unjustified_member` and removes `required_member`. Creates the - org when missing — createOrganization auto-adds the calling admin, + org when missing - createOrganization auto-adds the calling admin, which is one more unjustified member the sync must remove. """ started = time.monotonic() @@ -2115,7 +2112,7 @@ def repair_organization_divergence( def run_saml_group_change_check(self, environment: dict[str, str]) -> None: """A user added to a mapped SAML group must gain the mapped perms. - Reuses the saml-group-live fixture's mapping (samlGroup → exact + Reuses the saml-group-live fixture's mapping (samlGroup -> exact repos). Baseline: a full apply grants only the group's current members. Then the fabricated SAML account of a non-member gains the mapped group (the same SQL path tests/setup.py uses), the same apply @@ -2321,7 +2318,7 @@ def read_back_pending_repo_names(self, bind_id: str) -> set[str]: """Return the repo names a pending bindID has explicit-API grants on. `authorizedUserRepositories` falls back to the pending-permissions - store when the bindID matches no user — the only API that exposes a + store when the bindID matches no user - the only API that exposes a pending bindID's repos. """ names: set[str] = set() @@ -2380,7 +2377,7 @@ def run_live_permission_cycles(self, environment: dict[str, str]) -> None: # any of them is selected. want_user_cycle = self.select("live: set --users apply", "user cycle") # The full cycle applies the ROOT maps.yaml to the whole instance - # (10k users x ~1,150 repos) — an instance-wide stress run that has + # (10k users x ~1,150 repos) - an instance-wide stress run that has # crashed the test instance's Postgres. Opt-in only: # uv run tests/run.py --live "full cycle" want_full_cycle = self.explicitly_selected("live: set --full", "full cycle") @@ -2848,7 +2845,7 @@ def print_summary(self) -> int: failed = len(self.results) - passed for result in self.results: if not result.passed: - log.error("FAILED [%s] %s — %s", result.level, result.name, result.detail) + log.error("FAILED [%s] %s - %s", result.level, result.name, result.detail) skipped_suffix = ( f" Skipped {len(self.skipped_check_names)} check(s) not matching the test filter." if self.skipped_check_names @@ -3296,7 +3293,7 @@ def run_property_checks(seed: int, iterations: int) -> list[PropertyCheckOutcome def load_setup_config() -> dict[str, Any]: - """Parse tests/setup.yaml — the source of truth for fabricated SAML accounts.""" + """Parse tests/setup.yaml - the source of truth for fabricated SAML accounts.""" import yaml return cast("dict[str, Any]", yaml.safe_load(SETUP_CONFIG_PATH.read_text(encoding="utf-8"))) @@ -3339,7 +3336,7 @@ def fixture_maps_repo_scope( touch, so they capture and restore exactly that set. Exact `names:` selectors enumerate themselves; any other repo selector (regexes, code-host matchers) requires the case to declare `live.involvedRepos` - covering everything the selector can match — undeclared matches are + covering everything the selector can match - undeclared matches are mutated without restore and only detected by the canary checks. User-side selectors are unrestricted: whatever users a rule matches, the diff --git a/tests/setup.py b/tests/setup.py index a3db8b6..d41f2da 100644 --- a/tests/setup.py +++ b/tests/setup.py @@ -157,7 +157,11 @@ class Setup: def record(self, name: str, ok: bool, detail: str) -> None: self.outcomes.append(Outcome(name, ok, detail)) log.log( - logging.INFO if ok else logging.ERROR, "%s %s — %s", "✓" if ok else "✗", name, detail + logging.INFO if ok else logging.ERROR, + "%s %s - %s", + "PASS" if ok else "FAIL", + name, + detail, ) def sql(self, statement: str) -> list[list[str]]: @@ -207,7 +211,7 @@ def check_users_and_repos(self) -> None: "users", user_count >= int(users_config["count"]), f"{user_count} live synthetic users (need {users_config['count']}); " - "bulk creation is out of setup's scope — reprovision the instance if short", + "bulk creation is out of setup's scope - reprovision the instance if short", ) self.record( "repos", @@ -270,7 +274,7 @@ def check_saml_accounts(self) -> None: current = self.fabricated_groups_on_instance(username, service_id, client_id) if current == list(groups): continue - drift.append(f"{username}: {current} → {list(groups)}") + drift.append(f"{username}: {current} -> {list(groups)}") if self.apply: upsert_saml_account( self.config["kubectl"], @@ -371,7 +375,7 @@ def check_pending_permissions(self) -> None: Live fixture cases seed pending bindIDs matching the synthetic prefix and restore them away; leftovers mean an interrupted run and --apply deletes exactly those rows. Anything else has an UNKNOWN - origin — setup must not silently destroy it. Investigate, then + origin - setup must not silently destroy it. Investigate, then clear deliberately (an empty setRepositoryPermissionsForUsers on the affected repo removes its pending rows). """ @@ -386,7 +390,7 @@ def check_pending_permissions(self) -> None: not unknown, "none of unknown origin" if not unknown - else f"{len(unknown)} pending bindID(s) of unknown origin: {unknown[:5]} — " + else f"{len(unknown)} pending bindID(s) of unknown origin: {unknown[:5]} - " "investigate before clearing (setup never deletes these)", ) if not synthetic: diff --git a/tests/tests.yaml b/tests/tests.yaml index bbc15a2..ceec101 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -1352,7 +1352,7 @@ cases: Additive set --users with --sync-saml-orgs runs a SCOPED org sync: the selected user is added to the synced org their SAML group requires (created on demand, creator membership cleaned up) and - removed from a synced org their groups no longer justify — while an + removed from a synced org their groups no longer justify - while an out-of-scope member of that same org is left untouched. One repo grant + create + add + creator removal + stale removal = 5 mutations. modes: @@ -1403,7 +1403,7 @@ cases: description: >- Standalone full org sync converges synced orgs both ways: creates the org a SAML group requires (cleaning up the creator's auto-membership) - and empties — but never deletes — a synced org whose SAML group + and empties - but never deletes - a synced org whose SAML group disappeared from every user's assertion. modes: - local diff --git a/tests/unicode_scan.py b/tests/unicode_scan.py new file mode 100644 index 0000000..0b27cb7 --- /dev/null +++ b/tests/unicode_scan.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""Fail on non-ASCII characters in tracked text files. + +Usage: uv run python tests/unicode_scan.py +Exit code 1 when any Unicode character outside ASCII is found. +""" + +from __future__ import annotations + +import subprocess +import sys +import unicodedata +from pathlib import Path + + +def findings_in_text(text: str) -> list[tuple[int, int, str]]: + """Return (line, column, character) findings, 1-based.""" + findings: list[tuple[int, int, str]] = [] + for line_number, line in enumerate(text.splitlines(), start=1): + for column_number, character in enumerate(line, start=1): + if not character.isascii(): + findings.append((line_number, column_number, character)) + return findings + + +def tracked_files(root: Path) -> list[Path]: + """Return tracked files the gate should scan.""" + listing = subprocess.run( + ["git", "ls-files", "-z"], + capture_output=True, + text=True, + check=True, + cwd=root, + ) + return [root / name for name in listing.stdout.split("\0") if name] + + +def describe(character: str) -> str: + name = unicodedata.name(character, f"U+{ord(character):04X}") + return f"`{character}` ({name}, U+{ord(character):04X})" + + +def main() -> int: + root = Path(__file__).resolve().parent.parent + finding_count = 0 + for path in tracked_files(root): + try: + text = path.read_text(encoding="utf-8") + except (UnicodeDecodeError, FileNotFoundError, IsADirectoryError): + continue # binary or vanished files are not lintable text + for line_number, column_number, character in findings_in_text(text): + print( + f"{path.relative_to(root)}:{line_number}:{column_number} " + f"non-ASCII character {describe(character)}" + ) + finding_count += 1 + if finding_count: + print(f"\nFound {finding_count} non-ASCII character(s).") + return 1 + print("No non-ASCII characters found.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/unit/test_confusables.py b/tests/unit/test_confusables.py deleted file mode 100644 index 56cfc7d..0000000 --- a/tests/unit/test_confusables.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import unittest - -from tests import confusables - - -class ConfusablesTests(unittest.TestCase): - def test_ascii_text_has_no_findings(self) -> None: - self.assertEqual(confusables.findings_in_text("plain - ascii 'text' x 2\n"), []) - - def test_prose_characters_are_not_flagged(self) -> None: - # Em dash, arrows, box drawing, and check marks look nothing like - # ASCII; the gate must leave them alone. - self.assertEqual(confusables.findings_in_text("a \u2014 b \u2192 c \u2713 \u2502"), []) - - def test_en_dash_is_flagged_with_position_and_suggestion(self) -> None: - findings = confusables.findings_in_text("first line\nan \u2013 here\n") - self.assertEqual(findings, [(2, 4, "\u2013", "-")]) - - def test_invisible_character_suggests_deletion(self) -> None: - findings = confusables.findings_in_text("zero\u200bwidth") - self.assertEqual(findings, [(1, 5, "\u200b", "")]) - - def test_fullwidth_letters_map_to_ascii(self) -> None: - self.assertEqual(confusables.suggestion_for("\uff43"), "c") - self.assertEqual(confusables.suggestion_for("\uff01"), "!") - - def test_cyrillic_lookalike_is_flagged(self) -> None: - findings = confusables.findings_in_text("p\u0430ssword") - self.assertEqual(findings, [(1, 2, "\u0430", "a")]) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/unit/test_permissions_sourcegraph.py b/tests/unit/test_permissions_sourcegraph.py index 702aa45..f7657de 100644 --- a/tests/unit/test_permissions_sourcegraph.py +++ b/tests/unit/test_permissions_sourcegraph.py @@ -242,7 +242,7 @@ def test_get_users_by_ids_batches_hydration_and_preserves_order(self) -> None: hydrated_ids, [user_id for user_id in requested_user_ids if user_id != "user-30"], ) - # 60 users at the hydration batch size of 25 → 3 requests. + # 60 users at the hydration batch size of 25 -> 3 requests. self.assertEqual([len(call) for call in client.calls], [25, 25, 10]) for query in client.queries: self.assertIn("query UsersByIDBatch", query) @@ -303,7 +303,7 @@ def test_user_ids_with_explicit_repos_batches_existence_checks(self) -> None: def test_candidates_without_explicit_repos_pages_past_first_page_sequentially(self) -> None: # Regression: with parallelism=1 and more users than one page, the # selection used to silently consider ONLY the first page (1000 - # users) — every later user was excluded from candidates. + # users) - every later user was excluded from candidates. site_users = _SiteUsersClient(total_count=2500) explicit_repos = _ExplicitReposClient({"user-1500"}) diff --git a/tests/unit/test_restore.py b/tests/unit/test_restore.py index ea660ba..08690e2 100644 --- a/tests/unit/test_restore.py +++ b/tests/unit/test_restore.py @@ -120,19 +120,19 @@ def test_plan_full_restore_restores_and_wipes_pending_grants(self) -> None: overwrite.repository_id: (overwrite.repository_name, overwrite.usernames) for overwrite in plan.overwrites } - # Real users and pending both match — no mutation. + # Real users and pending both match - no mutation. self.assertNotIn(matching_repo_id, overwrites_by_repo) - # Pending grant missing from current state — restored alongside alice. + # Pending grant missing from current state - restored alongside alice. self.assertEqual( (drifted_repo["name"], ("alice", "ghost")), overwrites_by_repo[drifted_repo_id], ) - # Repo with only a pending grant in the target — recreated. + # Repo with only a pending grant in the target - recreated. self.assertEqual( (pending_only_target_repo["name"], ("ghost",)), overwrites_by_repo[pending_only_target_repo_id], ) - # Pending-only repo absent from the target — wiped. + # Pending-only repo absent from the target - wiped. self.assertEqual( (pending_only_current_repo["name"], ()), overwrites_by_repo[pending_only_current_repo_id], diff --git a/tests/unit/test_unicode_scan.py b/tests/unit/test_unicode_scan.py new file mode 100644 index 0000000..385aea9 --- /dev/null +++ b/tests/unit/test_unicode_scan.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import unittest + +from tests import unicode_scan + + +class UnicodeScanTests(unittest.TestCase): + def test_ascii_text_has_no_findings(self) -> None: + self.assertEqual(unicode_scan.findings_in_text("plain - ascii 'text' x 2\n"), []) + + def test_non_ascii_characters_are_flagged(self) -> None: + findings = unicode_scan.findings_in_text("first line\na \u2014 b \u2192 c\n") + self.assertEqual(findings, [(2, 3, "\u2014"), (2, 7, "\u2192")]) + + def test_invisible_character_is_flagged(self) -> None: + findings = unicode_scan.findings_in_text("zero\u200bwidth") + self.assertEqual(findings, [(1, 5, "\u200b")]) + + +if __name__ == "__main__": + unittest.main() diff --git a/uv.lock b/uv.lock index db38909..936a07a 100644 --- a/uv.lock +++ b/uv.lock @@ -182,11 +182,11 @@ wheels = [ [[package]] name = "json5" -version = "0.14.0" +version = "0.15.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9c/4b/6f8906aaf67d501e259b0adab4d312945bb7211e8b8d4dcc77c92320edaa/json5-0.14.0.tar.gz", hash = "sha256:b3f492fad9f6cdbced8b7d40b28b9b1c9701c5f561bef0d33b81c2ff433fefcb", size = 52656, upload-time = "2026-03-27T22:50:48.108Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/7d/05c46a96a78147ae3bf99c2f4169ce144a70220b8d6fcd56f6ec368b8ce9/json5-0.15.0.tar.gz", hash = "sha256:7424d1f1eb1d56da6e3d70643f53619862b4ce81440bdb8ecfd6f875e5ba4a71", size = 53278, upload-time = "2026-06-19T20:08:27.716Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/42/cf027b4ac873b076189d935b135397675dac80cb29acb13e1ab86ad6c631/json5-0.14.0-py3-none-any.whl", hash = "sha256:56cf861bab076b1178eb8c92e1311d273a9b9acea2ccc82c276abf839ebaef3a", size = 36271, upload-time = "2026-03-27T22:50:47.073Z" }, + { url = "https://files.pythonhosted.org/packages/eb/be/59527c99478aade6bb33a68d72e6e18dd4e6ff6eacfc7d01bdb15bc76912/json5-0.15.0-py3-none-any.whl", hash = "sha256:56636a30c0e8a4665fe2179c0212f32eae3796dea89ea6f649b9436ecdb39618", size = 36570, upload-time = "2026-06-19T20:08:26.748Z" }, ] [[package]] @@ -200,31 +200,31 @@ wheels = [ [[package]] name = "opentelemetry-api" -version = "1.42.1" +version = "1.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b4/1c/125e1c936c0873796771b7f04f6c93b9f1bf5d424cea90fda94a99f61da8/opentelemetry_api-1.42.1.tar.gz", hash = "sha256:56c63bea9f77b62856be8c47600474acad853b2924b99b1687c4cb6297166716", size = 72296, upload-time = "2026-05-21T16:32:49.335Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/cc/e4c9584181f86494df0f6bdec1a4f3280c50db44704dc2a407e994fc87bb/opentelemetry_api-1.43.0.tar.gz", hash = "sha256:107d0d03857ea8fc7c5fcbbbd83f800c281f0d560553d61c1d675fccfd1761c1", size = 73476, upload-time = "2026-06-24T15:19:55.323Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/ca/9520cc1f3dfbbd03ac5903bbf55833e257bc64b1cf30fa8b0d6df374d821/opentelemetry_api-1.42.1-py3-none-any.whl", hash = "sha256:51a69edacadbc03a8950ace1c4c21099cacc538820ac2c9e36277e78cebba714", size = 61311, upload-time = "2026-05-21T16:32:28.822Z" }, + { url = "https://files.pythonhosted.org/packages/17/83/6dba32b85f31868400440dc7ad2ca1eab94cbbf3a7b0459ed39f8311a9e2/opentelemetry_api-1.43.0-py3-none-any.whl", hash = "sha256:20acf45e9b21851926835292e4045d290acade1edd2ff3de86d2f069687ba1fd", size = 61912, upload-time = "2026-06-24T15:19:35.434Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-common" -version = "1.42.1" +version = "1.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-proto" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0e/9c/216acfeaedadf2e1937f4373929b20f73197c5c4a2546d4f584b7fa63813/opentelemetry_exporter_otlp_proto_common-1.42.1.tar.gz", hash = "sha256:04f1f01fb597c4249dfcd7f8b861c902c2102369d376d9d346ff38de4469a2ee", size = 21433, upload-time = "2026-05-21T16:32:55.526Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/c1/e8098490ab15abf116dcaf9fa89ededcb35547c7d08d4b5a62f573dc1e63/opentelemetry_exporter_otlp_proto_common-1.43.0.tar.gz", hash = "sha256:c4e32ba6d6b13bdb2b8f6764c4fd28d00192826561aa04f6d14eedfce7ac076f", size = 20197, upload-time = "2026-06-24T15:20:00.247Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/43/2375e7612e1121a4518c17603b6e0b03ad94f565aafad53f464dc5be2bf6/opentelemetry_exporter_otlp_proto_common-1.42.1-py3-none-any.whl", hash = "sha256:f48d395ab815b444da118868977e9798ea354c25737d5cf39578ae894011c140", size = 17327, upload-time = "2026-05-21T16:32:33.387Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b2/41ebc74ae1d5859901f1b69305de58724bf043381103d6ef413521cbc35a/opentelemetry_exporter_otlp_proto_common-1.43.0-py3-none-any.whl", hash = "sha256:123c3f9cc87218562490c63b36f497bf3a722faf174a515d1443f31ababa6264", size = 17048, upload-time = "2026-06-24T15:19:41.264Z" }, ] [[package]] name = "opentelemetry-exporter-otlp-proto-http" -version = "1.42.1" +version = "1.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "googleapis-common-protos" }, @@ -235,48 +235,48 @@ dependencies = [ { name = "requests" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/32/826bfa1d80ecea24f47808de03cd4a0d13c17ecc07712f45123f0f61e4ac/opentelemetry_exporter_otlp_proto_http-1.42.1.tar.gz", hash = "sha256:bf142a21035d7571ac3a09cb2e5639f49886f243972883cfe777ed3bf02b734d", size = 25406, upload-time = "2026-05-21T16:32:56.807Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/92/0b9f56412483a8891d4843890294796c9df8ab42417bd9bad8035d840cb3/opentelemetry_exporter_otlp_proto_http-1.43.0.tar.gz", hash = "sha256:fa8a42bb7d00ee5391f4c0b04d8e6a46c03caa437903296ab73a81dc11ba118f", size = 25406, upload-time = "2026-06-24T15:20:01.515Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/96/82cb223a1502f0787d4bbff12907f5f8d870a50731febcd5818d93ef9555/opentelemetry_exporter_otlp_proto_http-1.42.1-py3-none-any.whl", hash = "sha256:00a16da1b312a1d6c7233d600d557c91df71125af73020f3b9a7765bd699d59d", size = 21793, upload-time = "2026-05-21T16:32:35.277Z" }, + { url = "https://files.pythonhosted.org/packages/b3/20/b685ed7af2e17c29ffc8af56f1fa8bc2033258fc30fb0d2b722f49d13ba0/opentelemetry_exporter_otlp_proto_http-1.43.0-py3-none-any.whl", hash = "sha256:647f603aa8efdbdb4dbff842e0729d0406a6fff26b295a72d3d60e7d963b2610", size = 21795, upload-time = "2026-06-24T15:19:43.164Z" }, ] [[package]] name = "opentelemetry-proto" -version = "1.42.1" +version = "1.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "protobuf" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b4/55/63eac3e1089b768ba014091fdd2ae8a9a440c821ef5e2b786909c94c8836/opentelemetry_proto-1.42.1.tar.gz", hash = "sha256:c6a51e6b4f05ae63565f3a113217f3d2bfaec68f78c02d7a6c85f9010d1cfca6", size = 45839, upload-time = "2026-05-21T16:33:03.937Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/b9/d357faefb40bda1d4799913e6af611171ff22a2dedcb93576bc92242d056/opentelemetry_proto-1.43.0.tar.gz", hash = "sha256:224778df17e1f3fafeaaa21d874236ca5f6ffc2f86e0899298ec7351aac27924", size = 46481, upload-time = "2026-06-24T15:20:07.625Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/9d/171c02c84a76940b7e601805b3bb536985aded9168fbcc9ba52f0a730fa2/opentelemetry_proto-1.42.1-py3-none-any.whl", hash = "sha256:dedb74cba2886c59c7789b227a7a670613025a07489040050aedff6e5c0fb43c", size = 71782, upload-time = "2026-05-21T16:32:44.867Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a7/3e5308cf548b8f72529c7db1afdb3a404211982376a12927fd7759f77bf3/opentelemetry_proto-1.43.0-py3-none-any.whl", hash = "sha256:c58f1f7ef84bc7dc2834016c0c37fe0081dde7ca9f6339be1970fbf9cdaaa90d", size = 72489, upload-time = "2026-06-24T15:19:51.164Z" }, ] [[package]] name = "opentelemetry-sdk" -version = "1.42.1" +version = "1.43.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "opentelemetry-semantic-conventions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/40/f7/b390bd9bfd703bf98a68fea1f27786c6872331fd617164a54b8a59bdc008/opentelemetry_sdk-1.42.1.tar.gz", hash = "sha256:8c834e8f8c9ba4171d4ec843d0cb8a67e4c7394d3f9e9297e582cbd9456ddbf7", size = 239262, upload-time = "2026-05-21T16:33:04.641Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3e/eb/5041074274ac0956b03637cc039d434569112468e875eddfcc9a0674ce06/opentelemetry_sdk-1.43.0.tar.gz", hash = "sha256:d8187c81c162df9913e4003dd6485f7390d9a24fc17026ec7387b8b8218b08e9", size = 254744, upload-time = "2026-06-24T15:20:08.467Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/6b/4287766cfbde577ae2272e8884abac325aeaac0d64f41c61d5b8cc595105/opentelemetry_sdk-1.42.1-py3-none-any.whl", hash = "sha256:083cd4bbfaa5aa7b5a9e552430d9951219967cfb27aa61feb13a77aba1fc839d", size = 170907, upload-time = "2026-05-21T16:32:45.894Z" }, + { url = "https://files.pythonhosted.org/packages/49/e3/b17be23af124201c9f52eececd4cc8ddfed1597d37b4ee771895d325805c/opentelemetry_sdk-1.43.0-py3-none-any.whl", hash = "sha256:d1323a547c1ce69d6a069a17a44b7da82bb8b332051ecb074041f87642c86823", size = 178852, upload-time = "2026-06-24T15:19:52.169Z" }, ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.63b1" +version = "0.64b0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "opentelemetry-api" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/93/99/4d7dd6df64795951413ce6e815f8cf1eb191daf7196ae86574589643d5f3/opentelemetry_semantic_conventions-0.63b1.tar.gz", hash = "sha256:3daf963611334b365e98a57438183eb012d3bfb40b2d931a9af613476b8701a9", size = 148340, upload-time = "2026-05-21T16:33:05.455Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/30/5f26df29509eccd86b99b481ac9ffa39da49ba9577cc69071c552ae30447/opentelemetry_semantic_conventions-0.64b0.tar.gz", hash = "sha256:72f76fb2d1582d9d033dd1fcd84532e961e6ff3d90d24ba6fabc72975a83864c", size = 148340, upload-time = "2026-06-24T15:20:09.267Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/7a/7fe66f5f3682b1dd47d88cc4e11f1c6c0966b737de2d16671146e23c39a5/opentelemetry_semantic_conventions-0.63b1-py3-none-any.whl", hash = "sha256:dfe5ef4dee82586b746f522b818ceb298d00b3d59f660042bd79404bff8d0682", size = 203713, upload-time = "2026-05-21T16:32:47.016Z" }, + { url = "https://files.pythonhosted.org/packages/f2/ca/23ba87a221b574a7c5a99d48849d80bfe8b047624681357e2b002e566187/opentelemetry_semantic_conventions-0.64b0-py3-none-any.whl", hash = "sha256:ea77e85e354b8f604ddbe5f3d9135216f982fa4d77e5859ac30f6d8a50505aa6", size = 203713, upload-time = "2026-06-24T15:19:53.339Z" }, ] [[package]] @@ -546,9 +546,9 @@ dev = [ [package.metadata] requires-dist = [ - { name = "json5", specifier = ">=0.14.0" }, + { name = "json5", specifier = ">=0.15.0" }, { name = "pyyaml", specifier = ">=6.0.3" }, - { name = "src-py-lib", extras = ["otel"], specifier = "==0.3.0" }, + { name = "src-py-lib", extras = ["otel"], specifier = "==0.3.1" }, ] [package.metadata.requires-dev] @@ -560,7 +560,7 @@ dev = [ [[package]] name = "src-py-lib" -version = "0.3.0" +version = "0.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -568,9 +568,9 @@ dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/5565c6bde9a214d6fc6100cd0a77bcd11facbdbbe5509d6f073a43747dbd/src_py_lib-0.3.0.tar.gz", hash = "sha256:683699e9d25b9c0a515c861e90d3613545d3625dd51ef3bb45f5eee5de29921a", size = 96387, upload-time = "2026-06-12T09:49:14.754Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/bb/65afd84cf2c730a50d5b995236c5cf7700c35d397f375b23517f44f97d72/src_py_lib-0.3.1.tar.gz", hash = "sha256:e2fb46269fb0a493275a2e7701b7ae34532c11f7167f98481c09370abb3ab41f", size = 97312, upload-time = "2026-06-26T06:51:23.265Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/82/60c11be1a31e54bff2653803c94600b181963490eacc8392e64eb8d7bdfc/src_py_lib-0.3.0-py3-none-any.whl", hash = "sha256:0508960fddf49211eecc0e01018f4074fba8fbabe87a0cd0909d645b581bc25e", size = 54388, upload-time = "2026-06-12T09:49:13.617Z" }, + { url = "https://files.pythonhosted.org/packages/93/a1/cfc286ec004747082fa9f6d17058dcb4c80e45d68d29a4e9ebf456be9a70/src_py_lib-0.3.1-py3-none-any.whl", hash = "sha256:f507a4cf3738275fd5dc6b7efd954b1ed420d3c934ed53f21739f46464fd2705", size = 54382, upload-time = "2026-06-26T06:51:21.855Z" }, ] [package.optional-dependencies]