Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ jobs:
- name: Validate lockfile
run: uv lock --check

- name: Scan for Unicode characters
run: uv run --frozen python tests/unicode_scan.py

- name: Lint Python
run: uv run --frozen ruff check .

Expand Down
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
actionlint
npx --yes markdownlint-cli2@0.22.1
uv sync
uv run python tests/unicode_scan.py
uv run ruff format .
uv run ruff check .
uv run pyright
Expand Down
2 changes: 1 addition & 1 deletion src/src_py_lib/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def cli_logging_handlers(
"""Attach terminal (and optional bridge) handlers to the named loggers.

Adds and removes only its own handlers, restores prior logger levels on
exit, and never touches the root logger or other handlers safe to
exit, and never touches the root logger or other handlers - safe to
compose with a host application's logging configuration.

With `suppress_http_dependency_logs=False`, httpx/httpcore loggers are
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def test_format_tsv_value_sanitizes_and_truncates_non_url_fields(self) -> None:

def test_display_width_handles_wide_and_combining_characters(self) -> None:
self.assertEqual(display_width("a"), 1)
self.assertEqual(display_width(""), 2)
self.assertEqual(display_width("\u6e2c"), 2)
self.assertEqual(display_width("e\u0301"), 1)
self.assertEqual(pad_display("", 4), " ")
self.assertEqual(pad_display("\u6e2c", 4), "\u6e2c ")

def test_write_tsv_creates_aligned_table(self) -> None:
with tempfile.TemporaryDirectory() as directory:
Expand Down
22 changes: 22 additions & 0 deletions tests/test_unicode_scan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from __future__ import annotations

import unittest

from tests import unicode_scan


class UnicodeScanTests(unittest.TestCase):
def test_ascii_text_has_no_findings(self) -> None:
self.assertEqual(unicode_scan.findings_in_text("plain - ascii 'text' x 2\n"), [])

def test_non_ascii_characters_are_flagged(self) -> None:
findings = unicode_scan.findings_in_text("first line\na \u2014 b \u2192 c\n")
self.assertEqual(findings, [(2, 3, "\u2014"), (2, 7, "\u2192")])

def test_invisible_character_is_flagged(self) -> None:
findings = unicode_scan.findings_in_text("zero\u200bwidth")
self.assertEqual(findings, [(1, 5, "\u200b")])


if __name__ == "__main__":
unittest.main()
65 changes: 65 additions & 0 deletions tests/unicode_scan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python3
"""Fail on non-ASCII characters in tracked text files.

Usage: uv run python tests/unicode_scan.py
Exit code 1 when any Unicode character outside ASCII is found.
"""

from __future__ import annotations

import subprocess
import sys
import unicodedata
from pathlib import Path


def findings_in_text(text: str) -> list[tuple[int, int, str]]:
"""Return (line, column, character) findings, 1-based."""
findings: list[tuple[int, int, str]] = []
for line_number, line in enumerate(text.splitlines(), start=1):
for column_number, character in enumerate(line, start=1):
if not character.isascii():
findings.append((line_number, column_number, character))
return findings


def tracked_files(root: Path) -> list[Path]:
"""Return tracked files the gate should scan."""
listing = subprocess.run(
["git", "ls-files", "-z"],
capture_output=True,
text=True,
check=True,
cwd=root,
)
return [root / name for name in listing.stdout.split("\0") if name]


def describe(character: str) -> str:
name = unicodedata.name(character, f"U+{ord(character):04X}")
return f"`{character}` ({name}, U+{ord(character):04X})"


def main() -> int:
root = Path(__file__).resolve().parent.parent
finding_count = 0
for path in tracked_files(root):
try:
text = path.read_text(encoding="utf-8")
except (UnicodeDecodeError, FileNotFoundError, IsADirectoryError):
continue # binary or vanished files are not lintable text
for line_number, column_number, character in findings_in_text(text):
print(
f"{path.relative_to(root)}:{line_number}:{column_number} "
f"non-ASCII character {describe(character)}"
)
finding_count += 1
if finding_count:
print(f"\nFound {finding_count} non-ASCII character(s).")
return 1
print("No non-ASCII characters found.")
return 0


if __name__ == "__main__":
sys.exit(main())