From dd13097378522c9a4dcaef1111d6c337b2a9abaf Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 3 Jul 2026 13:16:42 +0900 Subject: [PATCH 1/5] Run conformance tests via pytest --- .github/workflows/ci.yaml | 1 - poe_tasks.toml | 28 +--------- scripts/generate_cel.py | 2 +- scripts/generate_protovalidate.py | 5 +- test/conformance/test_conformance.py | 77 ++++++++++++++++++++++++++++ test/test_format.py | 4 +- test/versions.py | 21 ++++++++ 7 files changed, 105 insertions(+), 33 deletions(-) create mode 100644 test/conformance/test_conformance.py create mode 100644 test/versions.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 989936a..0634b7a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -34,7 +34,6 @@ jobs: python-version: ${{ matrix.python-version }} - run: uv sync - run: uv run poe test - - run: uv run poe test-conformance lint: runs-on: ubuntu-latest diff --git a/poe_tasks.toml b/poe_tasks.toml index 021bc60..a83412e 100644 --- a/poe_tasks.toml +++ b/poe_tasks.toml @@ -1,8 +1,5 @@ #:schema https://json.schemastore.org/partial-poe.json -[env] -PROTOVALIDATE_VERSION.default = "v1.2.0" - [tasks.add-license-header] help = "Add license header to all source files" cmd = """ @@ -28,8 +25,6 @@ help = "Run code checks" sequence = [ "lint", "test", - "test-conformance", - "test-conformance-legacy", ] [tasks.diffcheck] @@ -49,7 +44,7 @@ sequence = [ script = "scripts.generate_cel:main" [tasks.generate-protovalidate] -script = "scripts.generate_protovalidate:main(environ['PROTOVALIDATE_VERSION'])" +script = "scripts.generate_protovalidate:main" [tasks.generate-test] sequence = [ @@ -116,27 +111,6 @@ sequence = [ { cmd = "tombi lint" }, ] -[tasks.test-conformance] -help = "Run the CEL conformance tests" -cmd = """ -go run github.com/bufbuild/protovalidate/tools/protovalidate-conformance@${PROTOVALIDATE_VERSION} - --strict_message - --expected_failures=test/conformance/nonconforming.yaml - --timeout 10s - python -- -m test.conformance.runner -""" - -[tasks.test-conformance-legacy] -help = "Run the CEL conformance tests through the legacy google.protobuf message path" -env = { PROTOVALIDATE_CONFORMANCE_LEGACY = "1" } -cmd = """ -go run github.com/bufbuild/protovalidate/tools/protovalidate-conformance@${PROTOVALIDATE_VERSION} - --strict_message - --expected_failures=test/conformance/nonconforming.yaml - --timeout 10s - python -- -m test.conformance.runner -""" - [tasks.test] help = "Run unit tests" cmd = "pytest" diff --git a/scripts/generate_cel.py b/scripts/generate_cel.py index efac2f5..7af137b 100644 --- a/scripts/generate_cel.py +++ b/scripts/generate_cel.py @@ -19,7 +19,7 @@ from fix_protobuf_imports.fix_protobuf_imports import fix_protobuf_imports -from test.test_format import CEL_SPEC_VERSION +from test.versions import CEL_SPEC_VERSION test_dir = Path(__file__).parent.parent / "test" diff --git a/scripts/generate_protovalidate.py b/scripts/generate_protovalidate.py index f541ac9..91f5da3 100644 --- a/scripts/generate_protovalidate.py +++ b/scripts/generate_protovalidate.py @@ -17,8 +17,11 @@ import subprocess from pathlib import Path +from test.versions import PROTOVALIDATE_VERSION -def main(version: str) -> None: + +def main() -> None: + version = PROTOVALIDATE_VERSION if re.match(r"^v\d+\.\d+\.\d+(\-.+)?$", version): # Version tag, fetch from BSR protovalidate_path = f"buf.build/bufbuild/protovalidate:{version}" diff --git a/test/conformance/test_conformance.py b/test/conformance/test_conformance.py new file mode 100644 index 0000000..59fe5e0 --- /dev/null +++ b/test/conformance/test_conformance.py @@ -0,0 +1,77 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path +from textwrap import dedent + +import pytest + +from test.versions import PROTOVALIDATE_VERSION + + +def maybe_patch_args_with_debug(args: list[str]) -> list[str]: + # Do a best effort to invoke the child with debugging. + # This invokes internal methods from bundles provided by the IDE + # and may not always work. + try: + from pydevd import ( # ty: ignore[unresolved-import] - provided by IDE # pyright: ignore[reportMissingImports] # noqa: PLC0415 + _pydev_bundle, + ) + + return _pydev_bundle.pydev_monkey.patch_args(args) + except Exception: + return args + + +@pytest.mark.parametrize("legacy", [False, True], ids=["py", "legacy"]) +def test_conformance(*, legacy: bool) -> None: + # Workaround pydevd monkeypatching of -m invocation not being compatible + # with Python 3.14 yet by executing a script that uses runpy itself. + # pydevd does monkeypatch -c form correctly. + script = dedent( + """ + import runpy + runpy.run_module( + 'test.conformance.runner', + run_name='__main__', + alter_sys=True + ) + """ + ) + command = [sys.executable, "--", "-c", script] + command = maybe_patch_args_with_debug(command) + + env = os.environ.copy() + if legacy: + env["PROTOVALIDATE_CONFORMANCE_LEGACY"] = "1" + + subprocess.run( # noqa: S603 + [ # noqa: S607 + "go", + "run", + f"github.com/bufbuild/protovalidate/tools/protovalidate-conformance@{PROTOVALIDATE_VERSION}", + "--strict_message", + f"--expected_failures={Path(__file__).parent / 'nonconforming.yaml'}", + "--timeout", + "10s", + *command, + ], + env=env, + check=True, + ) diff --git a/test/test_format.py b/test/test_format.py index c045fca..cf375a7 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -27,9 +27,7 @@ from .gen.cel.expr import eval_pb2 from .gen.cel.expr.conformance.test import simple_pb2 - -# Version of the cel-spec that this implementation is conformant with. -CEL_SPEC_VERSION = "v0.25.1" +from .versions import CEL_SPEC_VERSION skipped_tests = [ # cel-python seems to have a bug with ints and booleans in the same map which evaluate to the same value diff --git a/test/versions.py b/test/versions.py new file mode 100644 index 0000000..4342b4c --- /dev/null +++ b/test/versions.py @@ -0,0 +1,21 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +# Version of the cel-spec that this implementation is conformant with. +CEL_SPEC_VERSION = os.getenv("CEL_SPEC_VERSION", "v0.25.1") + +# Version of protovalidate this implementation targets. +PROTOVALIDATE_VERSION = os.getenv("PROTOVALIDATE_VERSION", "v1.2.0") From 6bf6154317abcb72707c2fb667c1ca5c4cf49944 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 3 Jul 2026 13:18:07 +0900 Subject: [PATCH 2/5] cleanup --- scripts/generate_protovalidate.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/generate_protovalidate.py b/scripts/generate_protovalidate.py index 91f5da3..8004061 100644 --- a/scripts/generate_protovalidate.py +++ b/scripts/generate_protovalidate.py @@ -21,17 +21,16 @@ def main() -> None: - version = PROTOVALIDATE_VERSION - if re.match(r"^v\d+\.\d+\.\d+(\-.+)?$", version): + if re.match(r"^v\d+\.\d+\.\d+(\-.+)?$", PROTOVALIDATE_VERSION): # Version tag, fetch from BSR - protovalidate_path = f"buf.build/bufbuild/protovalidate:{version}" - protovalidate_testing_path = f"buf.build/bufbuild/protovalidate-testing:{version}" + protovalidate_path = f"buf.build/bufbuild/protovalidate:{PROTOVALIDATE_VERSION}" + protovalidate_testing_path = f"buf.build/bufbuild/protovalidate-testing:{PROTOVALIDATE_VERSION}" else: # Not a tag, generally an unreleased commit, fetch directly from git - protovalidate_path = f"https://github.com/bufbuild/protovalidate.git#subdir=proto/protovalidate,ref={version}" - protovalidate_testing_path = ( - f"https://github.com/bufbuild/protovalidate.git#subdir=proto/protovalidate-testing,ref={version}" + protovalidate_path = ( + f"https://github.com/bufbuild/protovalidate.git#subdir=proto/protovalidate,ref={PROTOVALIDATE_VERSION}" ) + protovalidate_testing_path = f"https://github.com/bufbuild/protovalidate.git#subdir=proto/protovalidate-testing,ref={PROTOVALIDATE_VERSION}" repo = Path(__file__).parent.parent From 7e6444eb29d2ff2bcc6727b3a320151c78190604 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 3 Jul 2026 14:01:11 +0900 Subject: [PATCH 3/5] Add cel-expr-python backend --- protovalidate/internal/_core.py | 134 ++ protovalidate/internal/backend.py | 32 + protovalidate/internal/celexpr/__init__.py | 28 + protovalidate/internal/celexpr/bridge.py | 76 + protovalidate/internal/celexpr/extra_func.py | 1686 ++++++++++++++++++ protovalidate/internal/celexpr/rules.py | 1077 +++++++++++ protovalidate/internal/rules.py | 106 +- protovalidate/validator.py | 95 +- pyproject.toml | 28 + test/conformance/nonconforming.cel-expr.yaml | 5 + test/conformance/runner.py | 16 +- test/conformance/test_conformance.py | 14 +- test/conftest.py | 45 + test/test_benchmark.py | 14 +- test/test_validate.py | 5 +- uv.lock | 44 +- 16 files changed, 3274 insertions(+), 131 deletions(-) create mode 100644 protovalidate/internal/_core.py create mode 100644 protovalidate/internal/backend.py create mode 100644 protovalidate/internal/celexpr/__init__.py create mode 100644 protovalidate/internal/celexpr/bridge.py create mode 100644 protovalidate/internal/celexpr/extra_func.py create mode 100644 protovalidate/internal/celexpr/rules.py create mode 100644 test/conformance/nonconforming.cel-expr.yaml create mode 100644 test/conftest.py diff --git a/protovalidate/internal/_core.py b/protovalidate/internal/_core.py new file mode 100644 index 0000000..921b050 --- /dev/null +++ b/protovalidate/internal/_core.py @@ -0,0 +1,134 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Backend-agnostic rule-engine primitives shared by both CEL backends. + +``Violation``, ``RuleContext``, ``CompilationError``, and the ``Rules`` ABC do +not depend on the CEL implementation (celpy vs cel-expr-python) or the +descriptor/value model (protobuf-py vs google.protobuf) — they only speak the +public ``validate_pb`` path types. Both the celpy engine (``rules.py``) and the +cel-expr-python engine (``celexpr/rules.py``) import them from here so violation +output and path bookkeeping are identical across backends. +""" + +import abc +import typing + +from protovalidate._gen.buf.validate import validate_pb + + +class CompilationError(Exception): + pass + + +class Violation: + """A singular rule violation. + + Field and rule paths accumulate as element lists during recursion + (messages are immutable and do not auto-vivify), then materialize into a + ``validate_pb.Violation`` lazily via :attr:`proto`. + """ + + field_value: typing.Any + rule_value: typing.Any + + def __init__( + self, + *, + field_value: typing.Any = None, + rule_value: typing.Any = None, + field: validate_pb.FieldPath | None = None, + rule: validate_pb.FieldPath | None = None, + rule_id: str = "", + message: str = "", + for_key: bool = False, + ): + self.field_value = field_value + self.rule_value = rule_value + self._field_elements: list[validate_pb.FieldPathElement] = list(field.elements) if field is not None else [] + self._rule_elements: list[validate_pb.FieldPathElement] = list(rule.elements) if rule is not None else [] + self._rule_id = rule_id + self._message = message + self._for_key = for_key + + def append_field_element(self, element: validate_pb.FieldPathElement) -> None: + self._field_elements.append(element) + + def extend_rule_elements(self, elements: list[validate_pb.FieldPathElement]) -> None: + self._rule_elements.extend(elements) + + def finalize_paths(self) -> None: + """Reverses the accumulated leaf-to-root paths into root-to-leaf order.""" + self._field_elements.reverse() + self._rule_elements.reverse() + + @property + def proto(self) -> validate_pb.Violation: + kwargs: dict[str, typing.Any] = { + "rule_id": self._rule_id, + "message": self._message, + "for_key": self._for_key, + } + if self._field_elements: + kwargs["field"] = validate_pb.FieldPath(elements=list(self._field_elements)) + if self._rule_elements: + kwargs["rule"] = validate_pb.FieldPath(elements=list(self._rule_elements)) + return validate_pb.Violation(**kwargs) + + +class RuleContext: + """The state associated with a single rule evaluation.""" + + _violations: list[Violation] + + def __init__(self, *, fail_fast: bool = False): + self._fail_fast = fail_fast + self._violations = [] + + @property + def violations(self) -> list[Violation]: + return self._violations + + def add(self, violation: Violation): + self._violations.append(violation) + + def add_errors(self, other_ctx: "RuleContext"): + self._violations.extend(other_ctx.violations) + + def add_field_path_element(self, element: validate_pb.FieldPathElement): + for violation in self._violations: + violation.append_field_element(element) + + def add_rule_path_elements(self, elements: list[validate_pb.FieldPathElement]): + for violation in self._violations: + violation.extend_rule_elements(elements) + + @property + def done(self) -> bool: + return self._fail_fast and self.has_errors() + + def has_errors(self) -> bool: + return len(self._violations) > 0 + + def sub_context(self) -> "RuleContext": + return RuleContext(fail_fast=self._fail_fast) + + +class Rules(abc.ABC): + """The rules associated with a single 'rules' message.""" + + @abc.abstractmethod + def validate(self, ctx: RuleContext, message: typing.Any) -> None: + """Validate the message against the rules in this rule.""" + ... diff --git a/protovalidate/internal/backend.py b/protovalidate/internal/backend.py new file mode 100644 index 0000000..f933d5e --- /dev/null +++ b/protovalidate/internal/backend.py @@ -0,0 +1,32 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Which CEL backend is available. + +The cel-expr-python engine needs both ``cel_expr_python`` (the cel-cpp binding) +and ``google.protobuf`` (the pool/message model it evaluates against). When both +import, ``Validator`` selects it automatically; otherwise it falls back to the +pure-Python celpy engine, which is always present. There is no public switch — +this is pure auto-detect. Tests force the fallback by monkeypatching +``CEL_EXPR_AVAILABLE`` to ``False`` before constructing a ``Validator``. +""" + +import importlib.util + + +def _detect() -> bool: + return all(importlib.util.find_spec(name) is not None for name in ("cel_expr_python", "google.protobuf")) + + +CEL_EXPR_AVAILABLE: bool = _detect() diff --git a/protovalidate/internal/celexpr/__init__.py b/protovalidate/internal/celexpr/__init__.py new file mode 100644 index 0000000..ebb05be --- /dev/null +++ b/protovalidate/internal/celexpr/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The optional cel-expr-python (cel-cpp) validation engine. + +Importing this package requires ``cel_expr_python`` and ``google.protobuf`` to be +installed; the validator only reaches for it when it imports successfully (see +``protovalidate.internal.backend``). Everything below the engine boundary speaks +google.protobuf descriptors and messages — protobuf-py values cross in through +``GoogleBridge``. +""" + +from protovalidate.internal.celexpr.bridge import GoogleBridge +from protovalidate.internal.celexpr.extra_func import make_extension +from protovalidate.internal.celexpr.rules import RuleFactory + +__all__ = ["GoogleBridge", "RuleFactory", "make_extension"] diff --git a/protovalidate/internal/celexpr/bridge.py b/protovalidate/internal/celexpr/bridge.py new file mode 100644 index 0000000..3399c01 --- /dev/null +++ b/protovalidate/internal/celexpr/bridge.py @@ -0,0 +1,76 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Bridges protobuf-py messages into google.protobuf for cel-expr-python. + +protovalidate's public API is protobuf-py, but the CEL engine (cel-expr-python / +cel-cpp) only ingests google.protobuf messages. This bridge lazily mirrors the +protobuf-py descriptors it encounters into google's global descriptor pool — the +same pool cel-expr-python evaluates against — and re-creates message values as +google dynamic messages by a binary round trip. + +Mirroring uses the process-wide default pool (the pool cel-cpp resolves binding +types against), registering each file once and tolerating files already present. +The relocatable / no-conflict property protovalidate offers lives at the +protobuf-py public layer; this google pool is an internal evaluation detail. +""" + +from __future__ import annotations + +import typing + +from google.protobuf import descriptor_pb2, descriptor_pool, message, message_factory + +if typing.TYPE_CHECKING: + import protobuf + + +class GoogleBridge: + """Lazily mirrors protobuf-py descriptors into google's pool and bridges + protobuf-py message values to google dynamic messages.""" + + def __init__(self) -> None: + self._pool = descriptor_pool.Default() + self._mirrored: set[str] = set() + self._classes: dict[str, type[message.Message]] = {} + + def _mirror_file(self, desc_file: typing.Any) -> None: + """Registers a protobuf-py DescFile (and its transitive deps) into the + google pool, dependencies first, skipping files already present.""" + if desc_file.name in self._mirrored: + return + self._mirrored.add(desc_file.name) + for dep in desc_file.dependencies: + self._mirror_file(dep) + try: + self._pool.FindFileByName(desc_file.name) + except KeyError: + proto = descriptor_pb2.FileDescriptorProto.FromString(desc_file.proto.to_binary()) + self._pool.Add(proto) + + def google_class(self, desc: typing.Any) -> type[message.Message]: + """The google message class mirroring a protobuf-py DescMessage.""" + cls = self._classes.get(desc.type_name) + if cls is None: + self._mirror_file(desc.file) + google_desc = self._pool.FindMessageTypeByName(desc.type_name) + cls = message_factory.GetMessageClass(google_desc) + self._classes[desc.type_name] = cls + return cls + + def to_google(self, msg: protobuf.Message) -> message.Message: + """Re-creates a protobuf-py message as a google.protobuf message.""" + bridged = self.google_class(type(msg).desc())() + bridged.ParseFromString(msg.to_binary()) + return bridged diff --git a/protovalidate/internal/celexpr/extra_func.py b/protovalidate/internal/celexpr/extra_func.py new file mode 100644 index 0000000..74b4103 --- /dev/null +++ b/protovalidate/internal/celexpr/extra_func.py @@ -0,0 +1,1686 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from urllib import parse as urlparse + +import re2 +from cel_expr_python import cel +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import wrappers_pb2 + +# protobuf 7+ removed FieldDescriptor.label / LABEL_REPEATED in favour of is_repeated. +_FieldDescriptorClass = _descriptor.FieldDescriptor +if hasattr(_FieldDescriptorClass, "is_repeated"): + + def _is_repeated(field: _descriptor.FieldDescriptor) -> bool: + return field.is_repeated + +else: + + def _is_repeated(field: _descriptor.FieldDescriptor) -> bool: + return field.label == _descriptor.FieldDescriptor.LABEL_REPEATED + + +# See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address +_email_regex = re2.compile( + r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" +) + + +def cel_get_field(message: object, field_name: object) -> object: + if not isinstance(message, _message.Message): + msg = "invalid argument, expected message" + raise ValueError(msg) + if not isinstance(field_name, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + if field_name not in message.DESCRIPTOR.fields_by_name: + msg = f"no such field: {field_name}" + raise ValueError(msg) + field = message.DESCRIPTOR.fields_by_name[field_name] + value = getattr(message, field.name) + if field.message_type is not None and field.message_type.GetOptions().map_entry: + return dict(value) + if _is_repeated(field): + return list(value) + if field.type == _descriptor.FieldDescriptor.TYPE_BYTES: + # Route bytes through BytesValue so the value is owned by the runtime; + # raw Python bytes returns are corrupted by a runtime conversion bug. + return wrappers_pb2.BytesValue(value=value) + return value + + +def cel_is_ip(val: object, ver: object | None = None) -> bool: + """Return True if the string is an IPv4 or IPv6 address, optionally limited to a specific version. + + Version 0 or None means either 4 or 6. Passing a version other than 0, 4, or 6 always returns False. + + IPv4 addresses are expected in the dotted decimal format, for example "192.168.5.21". + IPv6 addresses are expected in their text representation, for example "::1" or "2001:0DB8:ABCD:0012::0". + + Both formats are well-defined in the internet standard RFC 3986. Zone + identifiers for IPv6 addresses (for example "fe80::a%en1") are supported. + + """ + if not isinstance(val, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + if ver is not None and (not isinstance(ver, int) or isinstance(ver, bool)): + msg = "invalid argument, expected int" + raise ValueError(msg) + + if ver is None: + version = 0 + else: + version = ver + + return _is_ip(val, version) + + +def _is_ip(string: str, version: int) -> bool: + """Internal implementation""" + valid = False + if version == 6: + valid = Ipv6(string).address() + elif version == 4: + valid = Ipv4(string).address() + elif version == 0: + valid = Ipv4(string).address() or Ipv6(string).address() + + return valid + + +def cel_is_ip_prefix(val: object, *args) -> bool: + """Return True if the string is a valid IP with prefix length, optionally + limited to a specific version (v4 or v6), and optionally requiring the host + portion to be all zeros. + + An address prefix divides an IP address into a network portion, and a host portion. + The prefix length specifies how many bits the network portion has. + For example, the IPv6 prefix "2001:db8:abcd:0012::0/64" designates the + left-most 64 bits as the network prefix. The range of the network is 2**64 + addresses, from 2001:db8:abcd:0012::0 to 2001:db8:abcd:0012:ffff:ffff:ffff:ffff. + + An address prefix may include a specific host address, for example + "2001:db8:abcd:0012::1f/64". With strict = true, this is not permitted. The + host portion must be all zeros, as in "2001:db8:abcd:0012::0/64". + + The same principle applies to IPv4 addresses. "192.168.1.0/24" designates + the first 24 bits of the 32-bit IPv4 as the network prefix. + + """ + + if not isinstance(val, str): + msg = "invalid argument, expected string or bytes" + raise ValueError(msg) + version = 0 + strict = False + if len(args) == 1 and isinstance(args[0], bool): + strict = bool(args[0]) + elif len(args) == 1 and isinstance(args[0], int): + version = args[0] + elif len(args) == 1: + msg = "invalid argument, expected bool or int" + raise ValueError(msg) + elif len(args) == 2 and isinstance(args[0], int) and not isinstance(args[0], bool) and isinstance(args[1], bool): + version = args[0] + strict = bool(args[1]) + elif len(args) == 2: + msg = "invalid argument, expected int and bool" + raise ValueError(msg) + + return _is_ip_prefix(val, version, strict=strict) + + +def _is_ip_prefix(string: str, version: int, *, strict=False) -> bool: + """Internal implementation""" + valid = False + if version == 6: + v6 = Ipv6(string) + valid = v6.address_prefix() and (not strict or v6.is_prefix_only()) + elif version == 4: + v4 = Ipv4(string) + valid = v4.address_prefix() and (not strict or v4.is_prefix_only()) + elif version == 0: + valid = _is_ip_prefix(string, 6, strict=strict) or _is_ip_prefix(string, 4, strict=strict) + + return valid + + +def cel_is_email(string: object) -> bool: + """Return True if the string is an email address, for example "foo@example.com". + + Conforms to the definition for a valid email address from the HTML standard. + Note that this standard willfully deviates from RFC 5322, which allows many + unexpected forms of email addresses and will easily match a typographical + error. + + """ + if not isinstance(string, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + return _email_regex.fullmatch(string) is not None + + +def cel_is_uri(string: object) -> bool: + """Return True if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". + + URI is defined in the internet standard RFC 3986. + Zone Identifiers in IPv6 address literals are supported (RFC 6874). + + """ + if not isinstance(string, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + return Uri(str(string)).uri() + + +def cel_is_uri_ref(string: object) -> bool: + """Return True if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" or + a Relative Reference such as "./foo/bar?query". + + URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. + Zone Identifiers in IPv6 address literals are supported (RFC 6874). + + """ + if not isinstance(string, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + return Uri(str(string)).uri_reference() + + +def cel_is_hostname(val: object) -> bool: + """Returns True if the string is a valid hostname, for example "foo.example.com". + + A valid hostname follows the rules below: + - The name consists of one or more labels, separated by a dot ("."). + - Each label can be 1 to 63 alphanumeric characters. + - A label can contain hyphens ("-"), but must not start or end with a hyphen. + - The right-most label must not be digits only. + - The name can have a trailing dot, for example "foo.example.com.". + - The name can be 253 characters at most, excluding the optional trailing dot. + + """ + if not isinstance(val, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + return _is_hostname(val) + + +def _is_hostname(val: str) -> bool: + """Internal implementation""" + if len(val) > 253: + return False + + if val.endswith("."): + string = val[0 : len(val) - 1].lower() + else: + string = val.lower() + + all_digits = False + parts = string.lower().split(sep=".") + + # split hostname on '.' and validate each part + for part in parts: + all_digits = True + + # if part is empty, longer than 63 chars, or starts/ends with '-', it is invalid + part_len = len(part) + + if part_len == 0 or part_len > 63 or part.startswith("-") or part.endswith("-"): + return False + + for c in part: + # if the character is not a-z, 0-9, or '-', it is invalid + if (c < "a" or c > "z") and (c < "0" or c > "9") and c != "-": + return False + + all_digits = all_digits and "0" <= c <= "9" + + # the last part cannot be all numbers + return not all_digits + + +def _is_port(val: str) -> bool: + if len(val) == 0: + return False + if len(val) > 1 and val[0] == "0": + return False + for c in val: + if c < "0" or c > "9": + return False + try: + return int(val) <= 65535 + except ValueError: + # Error converting to number + return False + + +def cel_is_host_and_port(string: object, port_required: object) -> bool: + """Return True if the string is a valid host/port pair, for example "example.com:8080". + + If the argument `port_required` is True, the port is required. If the argument + is False, the port is optional. + + The host can be one of: + - An IPv4 address in dotted decimal format, for example "192.168.0.1". + - An IPv6 address enclosed in square brackets, for example "[::1]". + - A hostname, for example "example.com". + + The port is separated by a colon. It must be non-empty, with a decimal number in the range of 0-65535, inclusive. + """ + if not isinstance(string, str): + msg = "invalid argument, expected string" + raise ValueError(msg) + if not isinstance(port_required, bool): + msg = "invalid argument, expected bool" + raise ValueError(msg) + return _is_host_and_port(string, port_required=bool(port_required)) + + +def _is_host_and_port(val: str, *, port_required=False) -> bool: + if len(val) == 0: + return False + + split_idx = val.rfind(":") + + if val[0] == "[": + end = val.rfind("]") + end_plus = end + 1 + + if end_plus == len(val): + return not port_required and _is_ip(val[1:end], 6) + elif end_plus == split_idx: + return _is_ip(val[1:end], 6) and _is_port(val[split_idx + 1 :]) + else: + # malformed + return False + + if split_idx < 0: + return not port_required and (_is_hostname(val) or _is_ip(val, 4)) + + host = val[0:split_idx] + port = val[split_idx + 1 :] + + return (_is_hostname(host) or _is_ip(host, 4)) and _is_port(port) + + +def cel_is_nan(val: object) -> bool: + if not isinstance(val, float): + msg = "invalid argument, expected double" + raise ValueError(msg) + return math.isnan(val) + + +def cel_is_inf(val: object, sign: object | None = None) -> bool: + if not isinstance(val, float): + msg = "invalid argument, expected double" + raise ValueError(msg) + if sign is None: + return math.isinf(val) + + if not isinstance(sign, int) or isinstance(sign, bool): + msg = "invalid argument, expected int" + raise ValueError(msg) + if sign > 0: + return math.isinf(val) and val > 0 + elif sign < 0: + return math.isinf(val) and val < 0 + else: + return math.isinf(val) + + +def cel_unique(val: object) -> bool: + if not isinstance(val, list): + msg = "invalid argument, expected list" + raise ValueError(msg) + # Track seen values keyed by (type, value) so that distinct CEL types that + # are equal in Python (notably bool vs int: ``True == 1``) are not treated + # as duplicates, and so that bytes are never confused with strings. + seen: set = set() + for item in val: + # The runtime hands bytes values to Python as (unhashable) bytearrays. + hashable = bytes(item) if isinstance(item, bytearray) else item + key = (type(hashable), hashable) + if key in seen: + return False + seen.add(key) + return True + + +class Ipv4: + """Ipv4 is a class used to parse a given string to determine if it is a valid IPv4 address or address prefix.""" + + _string: str + _index: int + _octets: bytearray + _prefix_len: int + + def __init__(self, string: str): + """Initialize an Ipv4 validation class with a given string.""" + + super().__init__() + self._string = string + self._index = 0 + self._octets = bytearray() + self._prefix_len = 0 + + def address(self) -> bool: + """Parses an IPv4 Address in dotted decimal notation.""" + return self.__address_part() and self._index == len(self._string) + + def address_prefix(self) -> bool: + """Parses an IPv4 Address prefix.""" + return ( + self.__address_part() and self.__take("/") and self.__prefix_length() and self._index == len(self._string) + ) + + def get_bits(self) -> int: + """Return the 32-bit value of an address parsed through address() or address_prefix(). + + Return -1 if no address was parsed successfully. + + """ + if len(self._octets) != 4: + return -1 + + return (self._octets[0] << 24) | (self._octets[1] << 16) | (self._octets[2] << 8) | self._octets[3] + + def is_prefix_only(self) -> bool: + """Return True if all bits to the right of the prefix-length are all zeros. + + Behavior is undefined if address_prefix() has not been called before, or has returned False. + + """ + bits = self.get_bits() + + mask: int + if self._prefix_len == 32: + mask = 0xFFFFFFFF + else: + mask = ~(0xFFFFFFFF >> self._prefix_len) + + masked = bits & mask + + return bits == masked + + def __prefix_length(self) -> bool: + """Store value in prefix_len""" + + start = self._index + + while self.__digit(): + if self._index - start > 2: + # max prefix-length is 32 bits, so anything more than 2 digits is invalid + return False + + string = self._string[start : self._index] + if len(string) == 0: + # too short + return False + + if len(string) > 1 and string[0] == "0": + # bad leading 0 + return False + + try: + value = int(string) + + if value > 32: + # max 32 bits + return False + + self._prefix_len = value + + return True + + except ValueError: + # Error converting to number + return False + + def __address_part(self) -> bool: + start = self._index + + if ( + self.__dec_octet() + and self.__take(".") + and self.__dec_octet() + and self.__take(".") + and self.__dec_octet() + and self.__take(".") + and self.__dec_octet() + ): + return True + + self._index = start + return False + + def __dec_octet(self) -> bool: + start = self._index + + while self.__digit(): + if self._index - start > 3: + # decimal octet can be three characters at most + return False + + string = self._string[start : self._index] + + if len(string) == 0: + # too short + return False + + if len(string) > 1 and string[0] == "0": + # bad leading 0 + return False + + try: + value = int(string) + + if value > 255: + return False + + self._octets.append(value) + + return True + + except ValueError: + # Error converting to number + return False + + def __digit(self) -> bool: + """Report whether the current position is a digit. + + Parses the rule: + + DIGIT = %x30-39 ; 0-9 + + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if "0" <= c <= "9": + self._index += 1 + return True + + return False + + def __take(self, char: str) -> bool: + """Take the given char at the current position, incrementing the index if necessary.""" + + if self._index >= len(self._string): + return False + + if self._string[self._index] == char: + self._index += 1 + return True + + return False + + +class Ipv6: + """Ipv6 is a class used to parse a given string to determine if it is a IPv6 address or address prefix.""" + + _string: str + _index: int + _pieces: list[int] # 16-bit pieces found + _double_colon_at: int # Number of 16-bit pieces found when double colon was found. + _double_colon_seen: bool + _dotted_raw: str # Dotted notation for right-most 32 bits. + _dotted_addr: Ipv4 | None # Dotted notation successfully parsed as Ipv4. + _zone_id_found: bool + _prefix_len: int # 0 -128 + + def __init__(self, string: str): + """Initialize a URI validation class with a given string.""" + + super().__init__() + self._string = string + self._index = 0 + self._pieces = [] + self._double_colon_at = -1 + self._double_colon_seen = False + self._dotted_raw = "" + self._dotted_addr = None + self._zone_id_found = False + + def get_bits(self) -> int: + """Return the 128-bit value of an address parsed through address() or address_prefix(). + + Return 0 if no address was parsed successfully. + + """ + p16 = self._pieces + + # Handle dotted decimal, add to p16 + if self._dotted_addr is not None: + # Right-most 32 bits + dotted32 = self._dotted_addr.get_bits() + # High 16 bits + p16.append(dotted32 >> 16) + # Low 16 bits + p16.append(dotted32) + + # Handle double colon, fill pieces with 0 + if self._double_colon_seen: + while len(p16) < 8: + # Delete 0 entries at pos, insert a 0 + p16.insert(self._double_colon_at, 0x00000000) + + if len(p16) != 8: + return 0 + + return ( + p16[0] << 112 + | p16[1] << 96 + | p16[2] << 80 + | p16[3] << 64 + | p16[4] << 48 + | p16[5] << 32 + | p16[6] << 16 + | p16[7] + ) + + def is_prefix_only(self) -> bool: + """Return True if all bits to the right of the prefix-length are all zeros. + + Behavior is undefined if address_prefix() has not been called before, or has returned False. + + """ + bits = self.get_bits() + mask: int + if self._prefix_len >= 128: + mask = 0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF + elif self._prefix_len < 0: + mask = 0x00000000_00000000_00000000_00000000 + else: + mask = ~(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF >> self._prefix_len) + + masked = bits & mask + if bits != masked: + return False + + return True + + def address(self) -> bool: + """Parse an IPv6 Address following RFC 4291, with optional zone id following RFC 4007.""" + + return self.__address_part() and self._index == len(self._string) + + def address_prefix(self) -> bool: + """Parse an IPv6 Address Prefix following RFC 4291. Zone id is not permitted.""" + + return ( + self.__address_part() + and not self._zone_id_found + and self.__take("/") + and self.__prefix_length() + and self._index == len(self._string) + ) + + def __prefix_length(self) -> bool: + """Store value in prefix_len.""" + start = self._index + + while self.__digit(): + if self._index - start > 3: + return False + + string = self._string[start : self._index] + + if len(string) == 0: + # too short + return False + + if len(string) > 1 and string[0] == "0": + # bad leading 0 + return False + + try: + value = int(string) + + if value > 128: + # max 128 bits + return False + + self._prefix_len = value + + return True + + except ValueError: + # Error converting to number + return False + + def __address_part(self) -> bool: + """Store dotted notation for right-most 32 bits in dotted_raw / dotted_addr if found.""" + + while self._index < len(self._string): + # dotted notation for right-most 32 bits, e.g. 0:0:0:0:0:ffff:192.1.56.10 + if (self._double_colon_seen or len(self._pieces) == 6) and self.__dotted(): + dotted = Ipv4(self._dotted_raw) + + if dotted.address(): + self._dotted_addr = dotted + return True + + return False + + try: + if self.__h16(): + continue + except ValueError: + return False + + if self.__take(":"): + if self.__take(":"): + if self._double_colon_seen: + return False + + self._double_colon_seen = True + self._double_colon_at = len(self._pieces) + + if self.__take(":"): + return False + elif self._index == 1 or self._index == len(self._string): + # invalid - string cannot start or end on single colon + return False + + continue + + if self._string[self._index] == "%" and not self.__zone_id(): + return False + + break + + if self._double_colon_seen: + return len(self._pieces) < 8 + return len(self._pieces) == 8 + + def __zone_id(self) -> bool: + """Determine whether the current position is a zoneID. + + There is no definition for the character set allowed in the zone + identifier. RFC 4007 permits basically any non-null string. + + RFC 6874: ZoneID = 1*( unreserved / pct-encoded ) + + """ + start = self._index + + if self.__take("%"): + if len(self._string) - self._index > 0: + # permit any non-null string + self._index = len(self._string) + self._zone_id_found = True + + return True + + self._index = start + self._zone_id_found = False + return False + + def __dotted(self) -> bool: + """Determine whether the current position is a dotted address. + + Parses the rule: + + 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + + Stores match in _dotted_raw. + """ + + start = self._index + self._dotted_raw = "" + + while self.__digit() or self.__take("."): + pass + + if self._index - start >= 7: + self._dotted_raw = self._string[start : self._index] + return True + + self._index = start + return False + + def __h16(self) -> bool: + """Determine whether the current position is a h16. + + Parses the rule: + + h16 = 1*4HEXDIG + + If 1-4 hex digits are found, the parsed 16-bit unsigned integer is stored + in pieces and True is returned. + If 0 hex digits are found, returns False. + If more than 4 hex digits are found or the found hex digits cannot be + converted to an int, a ValueError is raised. + """ + + start = self._index + + while self.__hex_dig(): + pass + + string = self._string[start : self._index] + + if len(string) == 0: + # too short, just return false + # this is not an error condition, it just means we didn't find any + # hex digits at the current position. + return False + + if len(string) > 4: + # too long + # this is an error condition, it means we found a string of more than + # four valid hex digits, which is invalid in ipv6 addresses. + raise ValueError + + # Note that this will raise a ValueError also if string cannot be + # converted to an int. + value = int(string, 16) + + self._pieces.append(value) + + return True + + def __hex_dig(self) -> bool: + """Determine whether the current position is a hex digit. + + Parses the rule: + + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + + """ + if self._index >= len(self._string): + return False + + c = self._string[self._index] + + if ("0" <= c <= "9") or ("a" <= c <= "f") or ("A" <= c <= "F"): + self._index += 1 + + return True + + return False + + def __digit(self) -> bool: + """Determine whether the current position is a digit. + + Parses the rule: + + DIGIT = %x30-39 ; 0-9 + + """ + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if "0" <= c <= "9": + self._index += 1 + return True + + return False + + def __take(self, char: str) -> bool: + """Take the given char at the current index. + + If char is at the current index, increment the index. + + """ + if self._index >= len(self._string): + return False + + if self._string[self._index] == char: + self._index += 1 + return True + + return False + + +class Uri: + """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference.""" + + _string: str + _index: int + _pct_encoded_found: bool + + def __init__(self, string: str): + """Initialize a URI validation class with a given string.""" + super().__init__() + self._string = string + self._index = 0 + + def uri(self) -> bool: + """Determine whether _string is a URI. + + Parses the rule: + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + """ + start = self._index + if not (self.__scheme() and self.__take(":") and self.__hier_part()): + self._index = start + return False + + if self.__take("?") and not self.__query(): + return False + + if self.__take("#") and not self.__fragment(): + return False + + if self._index != len(self._string): + self._index = start + return False + + return True + + def uri_reference(self) -> bool: + """Determine whether _string is a URI reference. + + Parses the rule: + + URI-reference = URI / relative-ref + + """ + return self.uri() or self.__relative_ref() + + def __hier_part(self) -> bool: + """Determine whether the current position is a hier-part. + + Parses the rule: + + hier-part = "//" authority path-abempty. + / path-absolute + / path-rootless + / path-empty + + """ + start = self._index + if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): + return True + + self._index = start + return self.__path_absolute() or self.__path_rootless() or self.__path_empty() + + def __relative_ref(self) -> bool: + """Determine whether the current position is a relative reference. + + Parses the rule: + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + """ + start = self._index + if not self.__relative_part(): + return False + + if self.__take("?") and not self.__query(): + self._index = start + return False + + if self.__take("#") and not self.__fragment(): + self._index = start + return False + + if self._index != len(self._string): + self._index = start + return False + + return True + + def __relative_part(self) -> bool: + """Determine whether the current position is a relative part. + + Parses the rule: + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + """ + start = self._index + if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): + return True + + self._index = start + return self.__path_absolute() or self.__path_noscheme() or self.__path_empty() + + def __scheme(self) -> bool: + """Determine whether the current position is a scheme. + + Parses the rule: + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + Terminated by ":". + + """ + start = self._index + if self.__alpha(): + while self.__alpha() or self.__digit() or self.__take("+") or self.__take("-") or self.__take("."): + pass + + if self.__peek(":"): + return True + + self._index = start + return False + + def __authority(self) -> bool: + """Determine whether the current position is an authority. + + Parses the rule: + + authority = [ userinfo "@" ] host [ ":" port ] + + Lead by double slash ("") and terminated by "/", "?", "#", or end of URI. + + """ + start = self._index + if self.__userinfo(): + if not self.__take("@"): + self._index = start + return False + + if not self.__host(): + self._index = start + return False + + if self.__take(":"): + if not self.__port(): + self._index = start + return False + + if not self.__is_authority_end(): + self._index = start + return False + + return True + + def __is_authority_end(self) -> bool: + """Report whether the current position is the end of the authority. + + The authority component [...] is terminated by the next slash ("/"), + question mark ("?"), or number sign ("#") character, or by the + end of the URI. + + """ + return ( + self._index >= len(self._string) + or self._string[self._index] == "?" + or self._string[self._index] == "#" + or self._string[self._index] == "/" + ) + + def __userinfo(self) -> bool: + """Determine whether the current position is a userinfo. + + Parses the rule: + + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + Terminated by "@" in authority. + + """ + start = self._index + while self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":"): + pass + + if self.__peek("@"): + return True + + self._index = start + return False + + @staticmethod + def __check_host_pct_encoded(string: str) -> bool: + """Verify that string is correctly percent-encoded.""" + try: + # unquote defaults to 'UTF-8' encoding. + urlparse.unquote(string, errors="strict") + except UnicodeError: + return False + + return True + + def __host(self) -> bool: + """Determine whether the current position is a host. + + Parses the rule: + + host = IP-literal / IPv4address / reg-name. + + """ + start = self._index + self._pct_encoded_found = False + + # Note: IPv4address is a subset of reg-name + if (self.__peek("[") and self.__ip_literal()) or self.__reg_name(): + if self._pct_encoded_found: + raw_host = self._string[start : self._index] + # RFC 3986: + # > URI producing applications must not use percent-encoding in host + # > unless it is used to represent a UTF-8 character sequence. + if not self.__check_host_pct_encoded(raw_host): + return False + + return True + + return False + + def __port(self) -> bool: + """Determine whether the current position is a port. + + Parses the rule: + + port = *DIGIT + + Terminated by end of authority. + + """ + start = self._index + while self.__digit(): + pass + + if self.__is_authority_end(): + return True + + self._index = start + return False + + def __ip_literal(self) -> bool: + """Determine whether the current position is a IP-literal. + + Parses the rule from RFC 6874: + + IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" + + """ + start = self._index + + if self.__take("["): + curr_idx = self._index + if self.__ipv6_address() and self.__take("]"): + return True + + self._index = curr_idx + + if self.__ipv6_addrz() and self.__take("]"): + return True + + self._index = curr_idx + + if self.__ip_vfuture() and self.__take("]"): + return True + + self._index = start + return False + + def __ipv6_address(self) -> bool: + """Determine whether the current position is a IPv6address. + + Parses the rule "IPv6address". + + Relies on the implementation of _is_ip. + + """ + start = self._index + while self.__hex_dig() or self.__take(":"): + pass + + if _is_ip(self._string[start : self._index], 6): + return True + + self._index = start + return False + + def __ipv6_addrz(self) -> bool: + """Determine whether the current position is a IPv6addrz. + + Parses the rule from RFC 6874: + + IPv6addrz = IPv6address "%25" ZoneID + + """ + start = self._index + if self.__ipv6_address() and self.__take("%") and self.__take("2") and self.__take("5") and self.__zone_id(): + return True + + self._index = start + + return False + + def __zone_id(self) -> bool: + """Determine whether the current position is a ZoneID. + + Parses the rule from RFC 6874: + + ZoneID = 1*( unreserved / pct-encoded ) + + """ + start = self._index + while self.__unreserved() or self.__pct_encoded(): + pass + + if self._index - start > 0: + return True + + self._index = start + + return False + + def __ip_vfuture(self) -> bool: + """Determine whether the current position is a IPvFuture. + + Parses the rule: + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + """ + start = self._index + + if self.__take("v") and self.__hex_dig(): + while self.__hex_dig(): + pass + + if self.__take("."): + j = 0 + while self.__unreserved() or self.__sub_delims() or self.__take(":"): + j += 1 + + if j >= 1: + return True + + self._index = start + + return False + + def __reg_name(self) -> bool: + """Determine whether the current position is a reg-name. + + Parses the rule: + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + Terminates on start of port (":") or end of authority. + + """ + start = self._index + while self.__unreserved() or self.__pct_encoded() or self.__sub_delims(): + pass + + if self.__is_authority_end(): + # End of authority + return True + + if self.__peek(":"): + return True + + self._index = start + return False + + def __is_path_end(self) -> bool: + """Determine whether the current index has reached the end of path. + + > The path is terminated by the first question mark ("?") or + > number sign ("#") character, or by the end of the URI. + + """ + return self._index >= len(self._string) or self._string[self._index] == "?" or self._string[self._index] == "#" + + def __path_abempty(self) -> bool: + """Determine whether the current position is a path-abempty. + + Parses the rule: + + path-abempty = *( "/" segment ) + + Terminated by end of path: "?", "#", or end of URI. + + """ + start = self._index + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + return False + + def __path_absolute(self) -> bool: + """Determine whether the current position is a path-absolute. + + Parses the rule: + + path-absolute = "/" [ segment-nz *( "/" segment ) ] + + Terminated by end of path: "?", "#", or end of URI. + + """ + start = self._index + + if self.__take("/"): + if self.__segment_nz(): + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + return False + + def __path_noscheme(self) -> bool: + """Determine whether the current position is a path-noscheme. + + Parses the rule: + + path-noscheme = segment-nz-nc *( "/" segment ) + + Terminated by end of path: "?", "#", or end of URI. + + """ + start = self._index + if self.__segment_nz_nc(): + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + return True + + def __path_rootless(self) -> bool: + """Determine whether the current position is a path-rootless. + + Parses the rule: + + path-rootless = segment-nz *( "/" segment ) + + Terminated by end of path: "?", "#", or end of URI. + + """ + start = self._index + + if self.__segment_nz(): + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + return True + + def __path_empty(self) -> bool: + """Determine whether the current position is a path-empty. + + Parses the rule: + + path-empty = 0 + + Terminated by end of path: "?", "#", or end of URI. + + """ + return self.__is_path_end() + + def __segment(self) -> bool: + """Determine whether the current position is a segment. + + Parses the rule: + + segment = *pchar + + """ + while self.__pchar(): + pass + + return True + + def __segment_nz(self) -> bool: + """Determine whether the current position is a segment-nz. + + Parses the rule: + + segment-nz = 1*pchar + + """ + start = self._index + + if self.__pchar(): + while self.__pchar(): + pass + + return True + + self._index = start + return False + + def __segment_nz_nc(self) -> bool: + """Determine whether the current position is a segment-nz-nc. + + Parses the rule: + + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + """ + start = self._index + + while self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take("@"): + pass + + if self._index - start > 0: + return True + + self._index = start + return False + + def __pchar(self) -> bool: + """Determine whether the current position is a pchar. + + Parses the rule: + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + """ + return ( + self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":") or self.__take("@") + ) + + def __query(self) -> bool: + """Determine whether the current position is a valid query. + + Parses the rule: + + query = *( pchar / "/" / "?" ) + + Terminated by "#" or end of URI. + + """ + start = self._index + + while self.__pchar() or self.__take("/") or self.__take("?"): + pass + + if self._index == len(self._string) or self.__peek("#"): + return True + + self._index = start + return False + + def __fragment(self) -> bool: + """Determine whether the current position is a fragment. + + Parses the rule: + + fragment = *( pchar / "/" / "?" ) + + Terminated by end of URI. + + """ + start = self._index + + while self.__pchar() or self.__take("/") or self.__take("?"): + pass + + if self._index == len(self._string): + return True + + self._index = start + return False + + def __pct_encoded(self) -> bool: + """Determine whether the current position is a pct-encoded. + + Parses the rule: + + pct-encoded = "%" HEXDIG HEXDIG + + Sets `_pct_encoded_found` to True if a valid triplet was found + + """ + start = self._index + + if self.__take("%") and self.__hex_dig() and self.__hex_dig(): + self._pct_encoded_found = True + return True + + self._index = start + + return False + + def __unreserved(self) -> bool: + """Determine whether the current position is a unreserved character. + + Parses the rule: + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + + """ + return ( + self.__alpha() + or self.__digit() + or self.__take("-") + or self.__take("_") + or self.__take(".") + or self.__take("~") + ) + + def __sub_delims(self) -> bool: + """Determine whether the current position is a sub-delim. + + Parses the rule: + + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + """ + return ( + self.__take("!") + or self.__take("$") + or self.__take("&") + or self.__take("'") + or self.__take("(") + or self.__take(")") + or self.__take("*") + or self.__take("+") + or self.__take(",") + or self.__take(";") + or self.__take("=") + ) + + def __alpha(self) -> bool: + """Determine whether the current position is an alpha character. + + Parses the rule: + + ALPHA = %x41-5A / %x61-7A ; A-Z / a-z + + """ + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if ("A" <= c <= "Z") or ("a" <= c <= "z"): + self._index += 1 + return True + + return False + + def __digit(self) -> bool: + """Determine whether the current position is a digit. + + Parses the rule: + + DIGIT = %x30-39 ; 0-9 + + """ + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if "0" <= c <= "9": + self._index += 1 + return True + + return False + + def __hex_dig(self) -> bool: + """Determine whether the current position is a hex digit. + + Parses the rule: + + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + + """ + if self._index >= len(self._string): + return False + + c = self._string[self._index] + + if ("0" <= c <= "9") or ("a" <= c <= "f") or ("A" <= c <= "F"): + self._index += 1 + return True + + return False + + def __take(self, char: str) -> bool: + """Take the given char at the current index. + + If char is at the current index, increment the index. + + """ + if self._index >= len(self._string): + return False + + if self._string[self._index] == char: + self._index += 1 + return True + + return False + + def __peek(self, char: str) -> bool: + return self._index < len(self._string) and self._string[self._index] == char + + +def _bytes_starts_with(value: object, prefix: object) -> bool: + return bytes(value).startswith(bytes(prefix)) # ty: ignore[invalid-argument-type] + + +def _bytes_ends_with(value: object, suffix: object) -> bool: + return bytes(value).endswith(bytes(suffix)) # ty: ignore[invalid-argument-type] + + +def _bytes_contains(value: object, sub: object) -> bool: + return bytes(sub) in bytes(value) # ty: ignore[invalid-argument-type] + + +def make_extension() -> cel.CelExtension: + """Build the CEL extension with protovalidate's custom functions. + + ``matches`` is not registered: the cel-cpp runtime already evaluates it + with RE2, which is the engine the protovalidate spec requires. ``format`` + comes from the bundled strings extension. The bytes overloads of + ``startsWith``/``endsWith``/``contains`` are protovalidate additions to + the standard string-only functions. + """ + _b, _s, _i, _d, _l, _dyn = ( + cel.Type.BOOL, + cel.Type.STRING, + cel.Type.INT, + cel.Type.DOUBLE, + cel.Type.LIST, + cel.Type.DYN, + ) + return cel.CelExtension( + "protovalidate", + [ + cel.FunctionDecl("getField", [cel.Overload("get_field", _dyn, [_dyn, _s], impl=cel_get_field)]), + cel.FunctionDecl("isNan", [cel.Overload("double_is_nan", _b, [_d], is_member=True, impl=cel_is_nan)]), + cel.FunctionDecl( + "isInf", + [ + cel.Overload("double_is_inf", _b, [_d], is_member=True, impl=cel_is_inf), + cel.Overload("double_int_is_inf", _b, [_d, _i], is_member=True, impl=cel_is_inf), + ], + ), + cel.FunctionDecl( + "isIp", + [ + cel.Overload("string_is_ip", _b, [_s], is_member=True, impl=cel_is_ip), + cel.Overload("string_int_is_ip", _b, [_s, _i], is_member=True, impl=cel_is_ip), + ], + ), + cel.FunctionDecl( + "isIpPrefix", + [ + cel.Overload("string_is_ip_prefix", _b, [_s], is_member=True, impl=cel_is_ip_prefix), + cel.Overload("string_int_is_ip_prefix", _b, [_s, _i], is_member=True, impl=cel_is_ip_prefix), + cel.Overload("string_bool_is_ip_prefix", _b, [_s, _b], is_member=True, impl=cel_is_ip_prefix), + cel.Overload( + "string_int_bool_is_ip_prefix", _b, [_s, _i, _b], is_member=True, impl=cel_is_ip_prefix + ), + ], + ), + cel.FunctionDecl("isEmail", [cel.Overload("string_is_email", _b, [_s], is_member=True, impl=cel_is_email)]), + cel.FunctionDecl("isUri", [cel.Overload("string_is_uri", _b, [_s], is_member=True, impl=cel_is_uri)]), + cel.FunctionDecl( + "isUriRef", [cel.Overload("string_is_uri_ref", _b, [_s], is_member=True, impl=cel_is_uri_ref)] + ), + cel.FunctionDecl( + "isHostname", [cel.Overload("string_is_hostname", _b, [_s], is_member=True, impl=cel_is_hostname)] + ), + cel.FunctionDecl( + "isHostAndPort", + [cel.Overload("string_bool_is_host_and_port", _b, [_s, _b], is_member=True, impl=cel_is_host_and_port)], + ), + cel.FunctionDecl("unique", [cel.Overload("list_unique", _b, [_l], is_member=True, impl=cel_unique)]), + cel.FunctionDecl( + "startsWith", + [ + cel.Overload( + "bytes_starts_with", + _b, + [cel.Type.BYTES, cel.Type.BYTES], + is_member=True, + impl=_bytes_starts_with, + ) + ], + ), + cel.FunctionDecl( + "endsWith", + [ + cel.Overload( + "bytes_ends_with", _b, [cel.Type.BYTES, cel.Type.BYTES], is_member=True, impl=_bytes_ends_with + ) + ], + ), + cel.FunctionDecl( + "contains", + [ + cel.Overload( + "bytes_contains", _b, [cel.Type.BYTES, cel.Type.BYTES], is_member=True, impl=_bytes_contains + ) + ], + ), + ], + ) diff --git a/protovalidate/internal/celexpr/rules.py b/protovalidate/internal/celexpr/rules.py new file mode 100644 index 0000000..46ae3c3 --- /dev/null +++ b/protovalidate/internal/celexpr/rules.py @@ -0,0 +1,1077 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The rule engine. + +Rules are *discovered* from protobuf-py descriptors: the message structure is +read off the google mirror (the validated types are registered in google's pool +by the bridge anyway), but the ``buf.validate`` *options* are read off the +relocatable protobuf-py stub (``validate_pb``), so nothing needs ``buf.validate`` +in google's global pool for discovery. Rules are *evaluated* by cel-expr-python, +which only ingests google messages, so the message under validation and the rule +messages bound as ``rules``/``rule`` are bridged to google (see +``celexpr.bridge.GoogleBridge``). Output ``Violation``\\s are protobuf-py +``validate_pb`` messages — the public type, shared with the celpy engine via +``protovalidate.internal._core``. +""" + +import dataclasses +import datetime +import functools +import typing +from collections.abc import Container + +import protobuf +from cel_expr_python import cel +from cel_expr_python.ext import ext_strings +from google.protobuf import any_pb2, descriptor, descriptor_pool, message, wrappers_pb2 +from protobuf import Oneof +from protobuf.wkt import FieldDescriptorProto + +from protovalidate._gen.buf.validate import validate_pb + +# Backend-agnostic primitives shared with the celpy engine (protovalidate.internal.rules). +from protovalidate.internal._core import CompilationError, RuleContext, Rules, Violation +from protovalidate.internal.celexpr.bridge import GoogleBridge + +# protobuf 7+ removed FieldDescriptor.label / LABEL_REPEATED in favour of is_repeated. +_FieldDescriptorClass = descriptor.FieldDescriptor +if hasattr(_FieldDescriptorClass, "is_repeated"): + + def _is_repeated(field: descriptor.FieldDescriptor) -> bool: + return field.is_repeated + +else: + + def _is_repeated(field: descriptor.FieldDescriptor) -> bool: + return field.label == descriptor.FieldDescriptor.LABEL_REPEATED + + +_FIELD_TYPE_NAMES: dict[int, str] = { + descriptor.FieldDescriptor.TYPE_MESSAGE: "message", + descriptor.FieldDescriptor.TYPE_GROUP: "group", + descriptor.FieldDescriptor.TYPE_ENUM: "enum", + descriptor.FieldDescriptor.TYPE_BOOL: "bool", + descriptor.FieldDescriptor.TYPE_BYTES: "bytes", + descriptor.FieldDescriptor.TYPE_STRING: "string", + descriptor.FieldDescriptor.TYPE_FLOAT: "float", + descriptor.FieldDescriptor.TYPE_DOUBLE: "double", + descriptor.FieldDescriptor.TYPE_INT32: "int32", + descriptor.FieldDescriptor.TYPE_INT64: "int64", + descriptor.FieldDescriptor.TYPE_SINT32: "sint32", + descriptor.FieldDescriptor.TYPE_SINT64: "sint64", + descriptor.FieldDescriptor.TYPE_SFIXED32: "sfixed32", + descriptor.FieldDescriptor.TYPE_SFIXED64: "sfixed64", + descriptor.FieldDescriptor.TYPE_UINT32: "uint32", + descriptor.FieldDescriptor.TYPE_UINT64: "uint64", + descriptor.FieldDescriptor.TYPE_FIXED32: "fixed32", + descriptor.FieldDescriptor.TYPE_FIXED64: "fixed64", +} + + +def _get_type_name(fd: typing.Any) -> str: + return _FIELD_TYPE_NAMES.get(fd, "unknown") + + +def _proto_message_has_field(msg: message.Message, field: descriptor.FieldDescriptor) -> typing.Any: + if field.is_extension: + return msg.HasExtension(field) # ty: ignore[invalid-argument-type] + return msg.HasField(field.name) + + +def _proto_message_get_field(msg: message.Message, field: descriptor.FieldDescriptor) -> typing.Any: + if field.is_extension: + return msg.Extensions[field] # ty: ignore[invalid-argument-type] + return getattr(msg, field.name) + + +_UNSIGNED_FIELD_TYPES = frozenset( + ( + descriptor.FieldDescriptor.TYPE_UINT32, + descriptor.FieldDescriptor.TYPE_UINT64, + descriptor.FieldDescriptor.TYPE_FIXED32, + descriptor.FieldDescriptor.TYPE_FIXED64, + ) +) + + +def _scalar_field_value_to_cel(val: typing.Any, field: descriptor.FieldDescriptor) -> typing.Any: + # The runtime converts Python scalars and messages (including the + # well-known wrapper, timestamp, and duration types) to CEL values + # natively, so most values pass through unchanged. The exceptions ride in + # on well-known wrapper messages, which the runtime unwraps natively: + # + # - Bytes: raw Python bytes trigger a runtime bug where values reaching + # custom functions or equality against message fields are corrupted. + # - Unsigned ints: a Python int always converts to a CEL int; the wrapper + # is the only way to produce a CEL uint for uint-typed fields. + # - Strings containing NUL: the conversion truncates at the first NUL. + if field.type == descriptor.FieldDescriptor.TYPE_BYTES: + return wrappers_pb2.BytesValue(value=val) + if field.type in _UNSIGNED_FIELD_TYPES: + if field.type in (descriptor.FieldDescriptor.TYPE_UINT32, descriptor.FieldDescriptor.TYPE_FIXED32): + return wrappers_pb2.UInt32Value(value=val) + return wrappers_pb2.UInt64Value(value=val) + if field.type == descriptor.FieldDescriptor.TYPE_STRING and "\x00" in val: + return wrappers_pb2.StringValue(value=val) + return val + + +def _field_value_to_cel(val: typing.Any, field: descriptor.FieldDescriptor) -> typing.Any: + if _is_repeated(field): + if field.message_type is not None and field.message_type.GetOptions().map_entry: + return dict(val) + return list(val) + return _scalar_field_value_to_cel(val, field) + + +def _is_empty_field(msg: message.Message, field: descriptor.FieldDescriptor) -> bool: + if field.has_presence: + return not _proto_message_has_field(msg, field) + if _is_repeated(field): + return len(_proto_message_get_field(msg, field)) == 0 + return _proto_message_get_field(msg, field) == field.default_value + + +def field_to_cel(msg: message.Message, field: descriptor.FieldDescriptor) -> typing.Any: + return _field_value_to_cel(_proto_message_get_field(msg, field), field) + + +# ----- protobuf-py validate_pb path construction (output is always validate_pb) ----- + + +def _ftype(google_type: int) -> FieldDescriptorProto.Type: + """Maps a google FieldDescriptor.type int to the protobuf-py enum value.""" + return FieldDescriptorProto.Type(google_type) + + +def _field_to_element(field: descriptor.FieldDescriptor) -> validate_pb.FieldPathElement: + """A FieldPathElement for a (google) field of the message being validated.""" + return validate_pb.FieldPathElement( + field_number=field.number, + field_name=field.name if not field.is_extension else f"[{field.full_name}]", + field_type=_ftype(field.type), + ) + + +def _indexed_field_element(field: descriptor.FieldDescriptor, index: int) -> validate_pb.FieldPathElement: + return validate_pb.FieldPathElement( + field_number=field.number, + field_name=field.name if not field.is_extension else f"[{field.full_name}]", + field_type=_ftype(field.type), + subscript=Oneof(field="index", value=index), + ) + + +def _oneof_to_element(oneof: descriptor.OneofDescriptor) -> validate_pb.FieldPathElement: + return validate_pb.FieldPathElement(field_name=oneof.name) + + +_INT_KEY_TYPES = frozenset( + ( + descriptor.FieldDescriptor.TYPE_INT32, + descriptor.FieldDescriptor.TYPE_SFIXED32, + descriptor.FieldDescriptor.TYPE_INT64, + descriptor.FieldDescriptor.TYPE_SFIXED64, + descriptor.FieldDescriptor.TYPE_SINT32, + descriptor.FieldDescriptor.TYPE_SINT64, + ) +) +_UINT_KEY_TYPES = frozenset( + ( + descriptor.FieldDescriptor.TYPE_UINT32, + descriptor.FieldDescriptor.TYPE_FIXED32, + descriptor.FieldDescriptor.TYPE_UINT64, + descriptor.FieldDescriptor.TYPE_FIXED64, + ) +) + + +def _map_key_element( + field: descriptor.FieldDescriptor, + key: typing.Any, + key_field: descriptor.FieldDescriptor, + value_field: descriptor.FieldDescriptor, +) -> validate_pb.FieldPathElement: + subscript: Oneof + if key_field.type == descriptor.FieldDescriptor.TYPE_BOOL: + subscript = Oneof(field="bool_key", value=key) + elif key_field.type in _INT_KEY_TYPES: + subscript = Oneof(field="int_key", value=key) + elif key_field.type in _UINT_KEY_TYPES: + subscript = Oneof(field="uint_key", value=key) + elif key_field.type == descriptor.FieldDescriptor.TYPE_STRING: + subscript = Oneof(field="string_key", value=key) + else: + msg = "unexpected map type" + raise CompilationError(msg) + return validate_pb.FieldPathElement( + field_number=field.number, + field_name=field.name if not field.is_extension else f"[{field.full_name}]", + field_type=_ftype(field.type), + key_type=_ftype(key_field.type), + value_type=_ftype(value_field.type), + subscript=subscript, + ) + + +# Rule-spec path elements reference the buf.validate rule messages themselves, +# whose descriptors come from the bundled protobuf-py stub. +def _spec_element(pb_field: protobuf.DescField) -> validate_pb.FieldPathElement: + return validate_pb.FieldPathElement( + field_number=pb_field.number, + field_name=pb_field.name, + field_type=pb_field.proto.type, + ) + + +def _indexed_spec_element(pb_field: protobuf.DescField, index: int) -> validate_pb.FieldPathElement: + return validate_pb.FieldPathElement( + field_number=pb_field.number, + field_name=pb_field.name, + field_type=pb_field.proto.type, + subscript=Oneof(field="index", value=index), + ) + + +def _spec_field(rules_cls: typing.Any, name: str) -> protobuf.DescField: + return rules_cls.desc()._fields_by_name[name] + + +def _which_type(field_level: typing.Any) -> str | None: + """The set sub-field name of a FieldRules ``type`` oneof, or None.""" + return field_level.type.field if field_level.type is not None else None + + +@functools.lru_cache(maxsize=1) +def _google_predefined_ext() -> typing.Any: + """The google ``buf.validate.predefined`` extension descriptor. + + Resolved lazily off the global pool, into which the bridge mirrors + ``buf.validate`` (and the user's files, which define any custom predefined + rule extensions) when it bridges rule messages. + """ + return descriptor_pool.Default().FindExtensionByName("buf.validate.predefined") + + +def _message_child(pb_field: protobuf.DescField) -> protobuf.DescMessage | None: + """The protobuf-py descriptor of a message-typed field, list item, or map value.""" + value = pb_field.value + if isinstance(value, protobuf.DescFieldValueMessage): + return value.message + if isinstance(value, protobuf.DescFieldValueList) and isinstance(value.element, protobuf.DescMessage): + return value.element + if isinstance(value, protobuf.DescFieldValueMap) and isinstance(value.value, protobuf.DescMessage): + return value.value + return None + + +@dataclasses.dataclass +class CelRunner: + runner: cel.Expression + rule: typing.Any + rule_value: typing.Any | None = None + rule_cel: typing.Any | None = None + rule_path: validate_pb.FieldPath | None = None + + +class CelRules(Rules): + """A rule that has rules written in CEL. + + ``_rules`` holds the *google* rule message (the buf.validate rules bridged + from protobuf-py), so it can be bound as the ``rules`` CEL variable. + """ + + _cel: list[CelRunner] + _rules: message.Message | None = None + _uses_now: bool = False + + def __init__(self, rules_google: message.Message | None): + self._cel = [] + self._rules = rules_google + + def _validate_cel( + self, + ctx: RuleContext, + *, + this_value: typing.Any | None = None, + this_cel: typing.Any | None = None, + for_key: bool = False, + ): + if not self._cel: + return + activation: dict[str, typing.Any] = {} + if this_cel is not None: + activation["this"] = this_cel + activation["rules"] = self._rules + if self._uses_now: + activation["now"] = datetime.datetime.now(tz=datetime.timezone.utc) + for runner in self._cel: + activation["rule"] = runner.rule_cel + result = runner.runner.eval(data=activation) + result_type = result.type() + if result_type == cel.Type.BOOL: + if not result.plain_value(): + msg = runner.rule.message + if len(msg) == 0: + msg = f'"{runner.rule.expression}" returned false' + ctx.add( + Violation( + field_value=this_value, + rule=runner.rule_path, + rule_value=runner.rule_value, + rule_id=runner.rule.id, + message=msg, + for_key=for_key, + ), + ) + elif result_type == cel.Type.STRING: + # Formatting bytes with %s can yield a CEL string that is not + # valid UTF-8, which cannot convert to a Python str. + try: + result_message = result.plain_value() + except TypeError: + result_message = f'"{runner.rule.expression}" returned false' + if result_message: + ctx.add( + Violation( + field_value=this_value, + rule=runner.rule_path, + rule_value=runner.rule_value, + rule_id=runner.rule.id, + message=result_message, # ty: ignore[invalid-argument-type] + for_key=for_key, + ), + ) + elif result_type == cel.Type.ERROR: + raise RuntimeError(str(result.plain_value())) + + def add_rule( + self, + env: cel.Env, + rules: typing.Any, + *, + rule_field: descriptor.FieldDescriptor | None = None, + rule_path: validate_pb.FieldPath | None = None, + ): + if isinstance(rules, str): + expression = rules + rules = validate_pb.Rule(id=expression, expression=expression) + if "now" in rules.expression: + self._uses_now = True + try: + prog = env.compile(rules.expression) + except RuntimeError as ex: + raise CompilationError(str(ex)) from ex + rule_value = None + rule_cel = None + if rule_field is not None and self._rules is not None: + rule_value = _proto_message_get_field(self._rules, rule_field) + rule_cel = field_to_cel(self._rules, rule_field) + self._cel.append( + CelRunner( + runner=prog, + rule=rules, + rule_value=rule_value, + rule_cel=rule_cel, + rule_path=rule_path, + ) + ) + + +class MessageOneofRule(Rules): + """Validates a single buf.validate.MessageOneofRule given via the message option (buf.validate.message).oneof""" + + def __init__(self, fields: list[descriptor.FieldDescriptor], *, required: bool): + self._fields = fields + self._required = required + + def validate(self, ctx: RuleContext, message: message.Message): + num_set_fields = sum(1 for field in self._fields if not _is_empty_field(message, field)) + if num_set_fields > 1: + ctx.add( + Violation( + rule_id="message.oneof", + message=f"only one of {', '.join(field.name for field in self._fields)} can be set", + ) + ) + if self._required and num_set_fields == 0: + ctx.add( + Violation( + rule_id="message.oneof", + message=f"one of {', '.join(field.name for field in self._fields)} must be set", + ) + ) + + +class MessageRules(CelRules): + """Message-level rules.""" + + _oneofs: list[MessageOneofRule] + + def __init__(self, rules_google: message.Message | None, desc: descriptor.Descriptor): + super().__init__(rules_google) + self._oneofs = [] + self._desc = desc + + def validate(self, ctx: RuleContext, message: message.Message): + if self._cel: + self._validate_cel(ctx, this_cel=message) + if ctx.done: + return + for oneof in self._oneofs: + oneof.validate(ctx, message) + if ctx.done: + return + + def add_oneof(self, rule: typing.Any): + fields = [] + seen = set() + if len(rule.fields) == 0: + msg = f"at least one field must be specified in oneof rule for the message {self._desc.full_name}" + raise CompilationError(msg) + for name in rule.fields: + if name in self._desc.fields_by_name: + if name in seen: + msg = f"duplicate {name} in oneof rule for the message {self._desc.full_name}" + raise CompilationError(msg) + fields.append(self._desc.fields_by_name[name]) + seen.add(name) + else: + msg = f'field "{name}" not found in message {self._desc.full_name}' + raise CompilationError(msg) + self._oneofs.append(MessageOneofRule(fields, required=rule.required)) + + +def check_field_type(field: descriptor.FieldDescriptor, expected: int, wrapper_name: str | None = None): + if field.type != expected and ( + field.type != descriptor.FieldDescriptor.TYPE_MESSAGE or field.message_type.full_name != wrapper_name + ): + field_type_str = _get_type_name(field.type) + if expected == 0: + if wrapper_name is not None: + expected_type_str = wrapper_name + else: + expected_type_str = _get_type_name(descriptor.FieldDescriptor.TYPE_MESSAGE) + else: + expected_type_str = _get_type_name(expected) + msg = f"field {field.name} has type {field_type_str} but expected {expected_type_str}" + raise CompilationError(msg) + + +def _is_map(field: descriptor.FieldDescriptor): + return _is_repeated(field) and field.message_type is not None and field.message_type.GetOptions().map_entry + + +def _is_list(field: descriptor.FieldDescriptor): + return _is_repeated(field) and not _is_map(field) + + +class FieldRules(CelRules): + """Field-level rules.""" + + _ignore_empty = False + _required = False + + _required_rule_path: typing.ClassVar[validate_pb.FieldPath] = validate_pb.FieldPath( + elements=[_spec_element(_spec_field(validate_pb.FieldRules, "required"))] + ) + + def __init__( + self, + env: cel.Env, + bridge: GoogleBridge, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + *, + for_items: bool = False, + force_ignore_empty: bool = False, + ): + type_case = _which_type(field_level) + rules_pb = field_level.type.value if type_case is not None else None + super().__init__(bridge.to_google(rules_pb) if rules_pb is not None else None) + self._field = field + self._ignore_empty = ( + field_level.ignore == validate_pb.Ignore.IF_ZERO_VALUE + or force_ignore_empty + or (field.has_presence and not for_items) + ) + self._required = field_level.required + if rules_pb is not None: + # Each set rule sub-field (standard rules like `gt`, and custom + # extension rules alike) may carry a private predefined-rule + # extension whose CEL implements it. Read these off the bridged + # google rules message: it is parsed against the global pool, into + # which the bridge has mirrored both buf.validate and the user's + # files, so custom extension rules decode (they would otherwise + # remain undecoded on the relocatable protobuf-py stub). + g_rules = self._rules + assert g_rules is not None # rules_pb set implies the bridged rules # noqa: S101 + assert type_case is not None # rules_pb set implies a type oneof # noqa: S101 + type_field = _spec_field(validate_pb.FieldRules, type_case) + predefined_ext = _google_predefined_ext() + for rule_field, _value in g_rules.ListFields(): + options = rule_field.GetOptions() + if not options.HasExtension(predefined_ext): + continue + for cel_rule in options.Extensions[predefined_ext].cel: + self.add_rule( + env, + cel_rule, + rule_field=rule_field, + rule_path=validate_pb.FieldPath( + elements=[_field_to_element(rule_field), _spec_element(type_field)] + ), + ) + cel_expression_field = _spec_field(validate_pb.FieldRules, "cel_expression") + for i, cel_rule in enumerate(field_level.cel_expression): + self.add_rule( + env, + cel_rule, + rule_path=validate_pb.FieldPath(elements=[_indexed_spec_element(cel_expression_field, i)]), + ) + cel_field = _spec_field(validate_pb.FieldRules, "cel") + for i, cel_rule in enumerate(field_level.cel): + self.add_rule( + env, + cel_rule, + rule_path=validate_pb.FieldPath(elements=[_indexed_spec_element(cel_field, i)]), + ) + + def validate(self, ctx: RuleContext, message: message.Message): + if _is_empty_field(message, self._field): + if self._required: + ctx.add( + Violation( + field=validate_pb.FieldPath(elements=[_field_to_element(self._field)]), + rule=FieldRules._required_rule_path, + rule_value=self._required, + rule_id="required", + message="value is required", + ), + ) + return + if self._ignore_empty: + return + val = _proto_message_get_field(message, self._field) + cel_val = _field_value_to_cel(val, self._field) + sub_ctx = ctx.sub_context() + self._validate_value(sub_ctx, val) + self._validate_cel(sub_ctx, this_value=val, this_cel=cel_val) + if sub_ctx.has_errors(): + element = _field_to_element(self._field) + sub_ctx.add_field_path_element(element) + ctx.add_errors(sub_ctx) + + def validate_item(self, ctx: RuleContext, value: typing.Any, *, for_key: bool = False): + self._validate_value(ctx, value, for_key=for_key) + self._validate_cel( + ctx, this_value=value, this_cel=_scalar_field_value_to_cel(value, self._field), for_key=for_key + ) + + def _validate_value(self, ctx: RuleContext, value: typing.Any, *, for_key: bool = False): + pass + + +class AnyRules(FieldRules): + """Rules for an Any field.""" + + _in_rule_path: typing.ClassVar[validate_pb.FieldPath] = validate_pb.FieldPath( + elements=[ + _spec_element(_spec_field(validate_pb.AnyRules, "in")), + _spec_element(_spec_field(validate_pb.FieldRules, "any")), + ], + ) + + _not_in_rule_path: typing.ClassVar[validate_pb.FieldPath] = validate_pb.FieldPath( + elements=[ + _spec_element(_spec_field(validate_pb.AnyRules, "not_in")), + _spec_element(_spec_field(validate_pb.FieldRules, "any")), + ], + ) + + def __init__( + self, + env: cel.Env, + bridge: GoogleBridge, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + ): + super().__init__(env, bridge, field, field_level) + any_rules = field_level.type.value + self._in = list(any_rules.in_) or [] + self._not_in: Container[str] = list(any_rules.not_in) or [] + + def _validate_value(self, ctx: RuleContext, value: any_pb2.Any, *, for_key: bool = False): + if len(self._in) > 0 and value.type_url not in self._in: + ctx.add( + Violation( + rule=AnyRules._in_rule_path, + rule_value=self._in, + rule_id="any.in", + message="type URL must be in the allow list", + for_key=for_key, + ) + ) + if value.type_url in self._not_in: + ctx.add( + Violation( + rule=AnyRules._not_in_rule_path, + rule_value=self._not_in, + rule_id="any.not_in", + message="type URL must not be in the block list", + for_key=for_key, + ) + ) + + +class EnumRules(FieldRules): + """Rules for an enum field.""" + + _defined_only = False + + _defined_only_rule_path: typing.ClassVar[validate_pb.FieldPath] = validate_pb.FieldPath( + elements=[ + _spec_element(_spec_field(validate_pb.EnumRules, "defined_only")), + _spec_element(_spec_field(validate_pb.FieldRules, "enum")), + ], + ) + + def __init__( + self, + env: cel.Env, + bridge: GoogleBridge, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + *, + for_items: bool = False, + force_ignore_empty: bool = False, + ): + super().__init__(env, bridge, field, field_level, for_items=for_items, force_ignore_empty=force_ignore_empty) + if field_level.type.value.defined_only: + self._defined_only = True + + def validate(self, ctx: RuleContext, message: message.Message): + super().validate(ctx, message) + if ctx.done: + return + if self._defined_only and getattr(message, self._field.name) not in self._field.enum_type.values_by_number: + ctx.add( + Violation( + field=validate_pb.FieldPath(elements=[_field_to_element(self._field)]), + rule=EnumRules._defined_only_rule_path, + rule_value=self._defined_only, + rule_id="enum.defined_only", + message="value must be one of the defined enum values", + ), + ) + + +class RepeatedRules(FieldRules): + """Rules for a repeated field.""" + + _item_rules: FieldRules | None = None + + _items_rules_suffix: typing.ClassVar[list[validate_pb.FieldPathElement]] = [ + _spec_element(_spec_field(validate_pb.RepeatedRules, "items")), + _spec_element(_spec_field(validate_pb.FieldRules, "repeated")), + ] + + def __init__( + self, + env: cel.Env, + bridge: GoogleBridge, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + item_rules: FieldRules | None, + ): + super().__init__(env, bridge, field, field_level) + if item_rules is not None: + self._item_rules = item_rules + + def validate(self, ctx: RuleContext, message: message.Message): + super().validate(ctx, message) + if ctx.done: + return + value = getattr(message, self._field.name) + if self._item_rules is not None: + for i, item in enumerate(value): + if self._item_rules._ignore_empty and not item: + continue + sub_ctx = ctx.sub_context() + self._item_rules.validate_item(sub_ctx, item) + if sub_ctx.has_errors(): + sub_ctx.add_field_path_element(_indexed_field_element(self._field, i)) + sub_ctx.add_rule_path_elements(RepeatedRules._items_rules_suffix) + ctx.add_errors(sub_ctx) + if ctx.done: + return + + +class MapRules(FieldRules): + """Rules for a map field.""" + + _key_rules: FieldRules | None = None + _value_rules: FieldRules | None = None + + _key_rules_suffix: typing.ClassVar[list[validate_pb.FieldPathElement]] = [ + _spec_element(_spec_field(validate_pb.MapRules, "keys")), + _spec_element(_spec_field(validate_pb.FieldRules, "map")), + ] + + _value_rules_suffix: typing.ClassVar[list[validate_pb.FieldPathElement]] = [ + _spec_element(_spec_field(validate_pb.MapRules, "values")), + _spec_element(_spec_field(validate_pb.FieldRules, "map")), + ] + + def __init__( + self, + env: cel.Env, + bridge: GoogleBridge, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + key_rules: FieldRules | None, + value_rules: FieldRules | None, + ): + super().__init__(env, bridge, field, field_level) + if key_rules is not None: + self._key_rules = key_rules + if value_rules is not None: + self._value_rules = value_rules + + def validate(self, ctx: RuleContext, message: message.Message): + super().validate(ctx, message) + if ctx.done: + return + value = getattr(message, self._field.name) + key_field = self._field.message_type.fields_by_name["key"] + value_field = self._field.message_type.fields_by_name["value"] + for k, v in value.items(): + key_ctx = ctx.sub_context() + if self._key_rules is not None: + if not self._key_rules._ignore_empty or k: + self._key_rules.validate_item(key_ctx, k, for_key=True) + if key_ctx.has_errors(): + key_ctx.add_rule_path_elements(MapRules._key_rules_suffix) + map_ctx = ctx.sub_context() + if self._value_rules is not None: + if not self._value_rules._ignore_empty or v: + self._value_rules.validate_item(map_ctx, v) + if map_ctx.has_errors(): + map_ctx.add_rule_path_elements(MapRules._value_rules_suffix) + map_ctx.add_errors(key_ctx) + if map_ctx.has_errors(): + map_ctx.add_field_path_element(_map_key_element(self._field, k, key_field, value_field)) + ctx.add_errors(map_ctx) + + +class OneofRules(Rules): + """Rules for a oneof definition.""" + + required = True + + def __init__(self, oneof: descriptor.OneofDescriptor, rules: typing.Any): + self._oneof = oneof + if not rules.required: + self.required = False + + def validate(self, ctx: RuleContext, message: message.Message): + if not message.WhichOneof(self._oneof.name): + if self.required: + ctx.add( + Violation( + field=validate_pb.FieldPath(elements=[_oneof_to_element(self._oneof)]), + rule_id="required", + message="exactly one field is required in oneof", + ) + ) + return + + +class RuleFactory: + """Factory for creating and caching rules, keyed on protobuf-py descriptors.""" + + _env: cel.Env + + def __init__(self, extension: cel.CelExtensionBase, bridge: GoogleBridge): + self._bridge = bridge + # cel-expr-python evaluates against — and type-checks bindings against — + # the global pool the bridge populates. + self._env = cel.NewEnv( + descriptor_pool=descriptor_pool.Default(), + variables={ + "this": cel.Type.DYN, + "rules": cel.Type.DYN, + "rule": cel.Type.DYN, + "now": cel.Type.TIMESTAMP, + }, + extensions=[ext_strings.ExtStrings(), extension], + ) + self._cache: dict[str, list[Rules] | Exception] = {} + + def get(self, desc: protobuf.DescMessage) -> list[Rules]: + key = desc.type_name + if key not in self._cache: + try: + self._cache[key] = self._new_rules(desc) + except Exception as e: + self._cache[key] = e + result = self._cache[key] + if isinstance(result, Exception): + raise result + return result + + def _new_message_rule(self, rules: typing.Any, desc: descriptor.Descriptor) -> MessageRules: + result = MessageRules(self._bridge.to_google(rules), desc) + for oneof in rules.oneof: + result.add_oneof(oneof) + for expr in rules.cel_expression: + result.add_rule(self._env, expr) + for cel_rule in rules.cel: + result.add_rule(self._env, cel_rule) + return result + + def _new_scalar_field_rule( + self, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + *, + for_items: bool = False, + force_ignore_empty: bool = False, + ): + if field_level.ignore == validate_pb.Ignore.ALWAYS: + return None + type_case = _which_type(field_level) + kw = {"for_items": for_items, "force_ignore_empty": force_ignore_empty} + if type_case is None: + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "duration": + check_field_type(field, 0, "google.protobuf.Duration") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "field_mask": + check_field_type(field, 0, "google.protobuf.FieldMask") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "timestamp": + check_field_type(field, 0, "google.protobuf.Timestamp") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "enum": + check_field_type(field, descriptor.FieldDescriptor.TYPE_ENUM) + return EnumRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "bool": + check_field_type(field, descriptor.FieldDescriptor.TYPE_BOOL, "google.protobuf.BoolValue") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "bytes": + check_field_type(field, descriptor.FieldDescriptor.TYPE_BYTES, "google.protobuf.BytesValue") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "fixed32": + check_field_type(field, descriptor.FieldDescriptor.TYPE_FIXED32) + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "fixed64": + check_field_type(field, descriptor.FieldDescriptor.TYPE_FIXED64) + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "float": + check_field_type(field, descriptor.FieldDescriptor.TYPE_FLOAT, "google.protobuf.FloatValue") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "double": + check_field_type(field, descriptor.FieldDescriptor.TYPE_DOUBLE, "google.protobuf.DoubleValue") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "int32": + check_field_type(field, descriptor.FieldDescriptor.TYPE_INT32, "google.protobuf.Int32Value") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "int64": + check_field_type(field, descriptor.FieldDescriptor.TYPE_INT64, "google.protobuf.Int64Value") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "sfixed32": + check_field_type(field, descriptor.FieldDescriptor.TYPE_SFIXED32) + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "sfixed64": + check_field_type(field, descriptor.FieldDescriptor.TYPE_SFIXED64) + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "sint32": + check_field_type(field, descriptor.FieldDescriptor.TYPE_SINT32) + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "sint64": + check_field_type(field, descriptor.FieldDescriptor.TYPE_SINT64) + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "uint32": + check_field_type(field, descriptor.FieldDescriptor.TYPE_UINT32, "google.protobuf.UInt32Value") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "uint64": + check_field_type(field, descriptor.FieldDescriptor.TYPE_UINT64, "google.protobuf.UInt64Value") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "string": + check_field_type(field, descriptor.FieldDescriptor.TYPE_STRING, "google.protobuf.StringValue") + return FieldRules(self._env, self._bridge, field, field_level, **kw) + if type_case == "any": + check_field_type(field, 0, "google.protobuf.Any") + return AnyRules(self._env, self._bridge, field, field_level) + msg = f"unknown rule type {type_case!r}" + raise CompilationError(msg) + + def _new_field_rule( + self, + field: descriptor.FieldDescriptor, + field_level: typing.Any, + *, + force_ignore_empty: bool = False, + ) -> FieldRules: + if not _is_repeated(field): + return self._new_scalar_field_rule(field, field_level, force_ignore_empty=force_ignore_empty) + type_case = _which_type(field_level) + if field.message_type is not None and field.message_type.GetOptions().map_entry: + map_rules = field_level.type.value if type_case == "map" else None + key_rules = None + value_rules = None + if map_rules is not None and map_rules.keys is not None: + key_field = field.message_type.fields_by_name["key"] + key_rules = self._new_scalar_field_rule(key_field, map_rules.keys, for_items=True) + if map_rules is not None and map_rules.values is not None: + value_field = field.message_type.fields_by_name["value"] + value_rules = self._new_scalar_field_rule(value_field, map_rules.values, for_items=True) + return MapRules(self._env, self._bridge, field, field_level, key_rules, value_rules) + item_rule = None + rep_rules = field_level.type.value if type_case == "repeated" else None + if rep_rules is not None and rep_rules.items is not None: + item_rule = self._new_scalar_field_rule(field, rep_rules.items) + return RepeatedRules(self._env, self._bridge, field, field_level, item_rule) + + def _new_rules(self, pb_desc: protobuf.DescMessage) -> list[Rules]: + gdesc = typing.cast(descriptor.Descriptor, self._bridge.google_class(pb_desc).DESCRIPTOR) + result: list[Rules] = [] + all_msg_oneof_fields: set[str] = set() + + msg_opts = pb_desc.proto.options + if msg_opts is not None and validate_pb.ext_message in msg_opts: + message_level: typing.Any = msg_opts[validate_pb.ext_message] + for oneof in message_level.oneof: + all_msg_oneof_fields.update(oneof.fields) + if rule := self._new_message_rule(message_level, gdesc): + result.append(rule) + + pb_oneofs = {o.name: o for o in pb_desc.oneofs} + for goneof in gdesc.oneofs: + pb_oneof = pb_oneofs.get(goneof.name) + if pb_oneof is None: + continue + oneof_opts = pb_oneof.proto.options + if oneof_opts is not None and validate_pb.ext_oneof in oneof_opts: + result.append(OneofRules(goneof, oneof_opts[validate_pb.ext_oneof])) + + ignore_field = _spec_field(validate_pb.FieldRules, "ignore") + for gfield in gdesc.fields: + pb_field = pb_desc._fields_by_name.get(gfield.name) + field_level: typing.Any = None + if pb_field is not None: + field_opts = pb_field.proto.options + if field_opts is not None and validate_pb.ext_field in field_opts: + field_level = field_opts[validate_pb.ext_field] + if field_level is not None: + # A field in a message-level oneof rule that does not set its own + # ignore behaviour is treated as ignore-if-zero-value. + force_ignore_empty = ignore_field not in field_level and gfield.name in all_msg_oneof_fields + if field_level.ignore == validate_pb.Ignore.ALWAYS: + continue + result.append(self._new_field_rule(gfield, field_level, force_ignore_empty=force_ignore_empty)) + if _which_type(field_level) == "repeated": + rep: typing.Any = field_level.type.value # ty: ignore[unresolved-attribute] + if rep.items is not None and rep.items.ignore == validate_pb.Ignore.ALWAYS: + continue + if gfield.message_type is None or pb_field is None: + continue + sub_desc = _message_child(pb_field) + if sub_desc is None: + continue + if gfield.message_type.GetOptions().map_entry: + key_field = gfield.message_type.fields_by_name["key"] + value_field = gfield.message_type.fields_by_name["value"] + result.append(MapValMsgRule(self, gfield, key_field, value_field, sub_desc)) + elif _is_repeated(gfield): + result.append(RepeatedMsgRule(self, gfield, sub_desc)) + else: + result.append(SubMsgRule(self, gfield, sub_desc)) + return result + + +class SubMsgRule(Rules): + def __init__(self, factory: RuleFactory, field: descriptor.FieldDescriptor, sub_desc: protobuf.DescMessage): + self._factory = factory + self._field = field + self._sub_desc = sub_desc + + def validate(self, ctx: RuleContext, message: message.Message): + if not message.HasField(self._field.name): + return + rules = self._factory.get(self._sub_desc) + if not rules: + return + val = getattr(message, self._field.name) + sub_ctx = ctx.sub_context() + for rule in rules: + rule.validate(sub_ctx, val) + if sub_ctx.has_errors(): + sub_ctx.add_field_path_element(_field_to_element(self._field)) + ctx.add_errors(sub_ctx) + + +class MapValMsgRule(Rules): + def __init__( + self, + factory: RuleFactory, + field: descriptor.FieldDescriptor, + key_field: descriptor.FieldDescriptor, + value_field: descriptor.FieldDescriptor, + sub_desc: protobuf.DescMessage, + ): + self._factory = factory + self._field = field + self._key_field = key_field + self._value_field = value_field + self._sub_desc = sub_desc + + def validate(self, ctx: RuleContext, message: message.Message): + val = getattr(message, self._field.name) + if not val: + return + rules = self._factory.get(self._sub_desc) + if not rules: + return + for k, v in val.items(): + sub_ctx = ctx.sub_context() + for rule in rules: + rule.validate(sub_ctx, v) + if sub_ctx.has_errors(): + sub_ctx.add_field_path_element(_map_key_element(self._field, k, self._key_field, self._value_field)) + ctx.add_errors(sub_ctx) + + +class RepeatedMsgRule(Rules): + def __init__(self, factory: RuleFactory, field: descriptor.FieldDescriptor, sub_desc: protobuf.DescMessage): + self._factory = factory + self._field = field + self._sub_desc = sub_desc + + def validate(self, ctx: RuleContext, message: message.Message): + val = getattr(message, self._field.name) + if not val: + return + rules = self._factory.get(self._sub_desc) + if not rules: + return + for idx, item in enumerate(val): + sub_ctx = ctx.sub_context() + for rule in rules: + rule.validate(sub_ctx, item) + if sub_ctx.has_errors(): + sub_ctx.add_field_path_element(_indexed_field_element(self._field, idx)) + ctx.add_errors(sub_ctx) diff --git a/protovalidate/internal/rules.py b/protovalidate/internal/rules.py index ebf0ec3..eca5179 100644 --- a/protovalidate/internal/rules.py +++ b/protovalidate/internal/rules.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import abc import dataclasses import datetime import typing @@ -41,12 +40,11 @@ from protobuf.wkt import Duration, FieldDescriptorProto, Timestamp from protovalidate._gen.buf.validate import validate_pb -from protovalidate.internal.cel_field_presence import InterpretedRunner, in_has - - -class CompilationError(Exception): - pass +# Backend-agnostic primitives shared with the cel-expr-python engine. Re-exported +# here so ``protovalidate.internal.rules.Violation`` (etc.) stays importable. +from protovalidate.internal._core import CompilationError, RuleContext, Rules, Violation +from protovalidate.internal.cel_field_presence import InterpretedRunner, in_has _TYPE_CTORS: dict[ScalarType, Callable[..., celtypes.Value]] = { ScalarType.BOOL: celtypes.BoolType, @@ -321,102 +319,6 @@ def _which_type(field_level: validate_pb.FieldRules) -> str | None: return field_level.type.field if field_level.type is not None else None -class Violation: - """A singular rule violation.""" - - field_value: typing.Any - rule_value: typing.Any - - def __init__( - self, - *, - field_value: typing.Any = None, - rule_value: typing.Any = None, - field: validate_pb.FieldPath | None = None, - rule: validate_pb.FieldPath | None = None, - rule_id: str = "", - message: str = "", - for_key: bool = False, - ): - self.field_value = field_value - self.rule_value = rule_value - self._field_elements: list[validate_pb.FieldPathElement] = list(field.elements) if field is not None else [] - self._rule_elements: list[validate_pb.FieldPathElement] = list(rule.elements) if rule is not None else [] - self._rule_id = rule_id - self._message = message - self._for_key = for_key - - @property - def proto(self) -> validate_pb.Violation: - kwargs: dict[str, typing.Any] = { - "rule_id": self._rule_id, - "message": self._message, - "for_key": self._for_key, - } - if self._field_elements: - kwargs["field"] = validate_pb.FieldPath(elements=list(self._field_elements)) - if self._rule_elements: - kwargs["rule"] = validate_pb.FieldPath(elements=list(self._rule_elements)) - return validate_pb.Violation(**kwargs) - - def _finalize_paths(self) -> None: - self._field_elements.reverse() - self._rule_elements.reverse() - - def _append_field_element(self, element: validate_pb.FieldPathElement) -> None: - self._field_elements.append(element) - - def _extend_rule_elements(self, elements: list[validate_pb.FieldPathElement]) -> None: - self._rule_elements.extend(elements) - - -class RuleContext: - """The state associated with a single rule evaluation.""" - - _violations: list[Violation] - - def __init__(self, *, fail_fast: bool = False): - self._fail_fast = fail_fast - self._violations = [] - - @property - def violations(self) -> list[Violation]: - return self._violations - - def add(self, violation: Violation): - self._violations.append(violation) - - def add_errors(self, other_ctx: "RuleContext"): - self._violations.extend(other_ctx.violations) - - def add_field_path_element(self, element: validate_pb.FieldPathElement): - for violation in self._violations: - violation._append_field_element(element) - - def add_rule_path_elements(self, elements: list[validate_pb.FieldPathElement]): - for violation in self._violations: - violation._extend_rule_elements(elements) - - @property - def done(self) -> bool: - return self._fail_fast and self.has_errors() - - def has_errors(self) -> bool: - return len(self._violations) > 0 - - def sub_context(self) -> "RuleContext": - return RuleContext(fail_fast=self._fail_fast) - - -class Rules(abc.ABC): - """The rules associated with a single 'rules' message.""" - - @abc.abstractmethod - def validate(self, ctx: RuleContext, message: Message) -> None: - """Validate the message against the rules in this rule.""" - ... - - @dataclasses.dataclass class CelRunner: runner: celpy.Runner diff --git a/protovalidate/validator.py b/protovalidate/validator.py index ac88c1f..e78e785 100644 --- a/protovalidate/validator.py +++ b/protovalidate/validator.py @@ -14,21 +14,74 @@ from __future__ import annotations +import typing from typing import TYPE_CHECKING from protobuf import Message, Registry from protovalidate._gen.buf.validate import validate_pb -from protovalidate.internal import extra_func +from protovalidate.internal import backend, extra_func from protovalidate.internal import rules as _rules +from protovalidate.internal._core import CompilationError, RuleContext, Violation from protovalidate.internal.legacy import LegacyMessageConverter if TYPE_CHECKING: from google.protobuf import message as google_message -CompilationError = _rules.CompilationError +# Re-exported on this module (protovalidate/__init__.py binds them by name). +__all__ = ["CompilationError", "ValidationError", "Validator", "Violation", "Violations"] Violations = validate_pb.Violations -Violation = _rules.Violation + + +class _Engine(typing.Protocol): + """A CEL backend: turns a protobuf-py message into a list of violations.""" + + def collect_violations(self, message: Message, *, fail_fast: bool) -> list[Violation]: ... + + +class _CelpyEngine: + """The pure-Python celpy engine, evaluating directly over protobuf-py.""" + + def __init__(self, registry: Registry | None): + self._factory = _rules.RuleFactory(extra_func.make_extra_funcs(), registry) + + def collect_violations(self, message: Message, *, fail_fast: bool) -> list[Violation]: + ctx = RuleContext(fail_fast=fail_fast) + for rule in self._factory.get(type(message).desc()): + rule.validate(ctx, message) + if ctx.done: + break + for violation in ctx.violations: + violation.finalize_paths() + return ctx.violations + + +class _CelExprEngine: + """The cel-expr-python (cel-cpp) engine. + + Rules are discovered from protobuf-py descriptors, but the CEL runtime only + ingests google.protobuf, so every validated message is bridged to a google + dynamic message (a serialize/parse round trip) before evaluation. The user's + ``registry`` is not needed: custom predefined-rule extensions resolve off the + global google pool the bridge mirrors into. + """ + + def __init__(self, registry: Registry | None): # noqa: ARG002 - accepted for API symmetry + from protovalidate.internal import celexpr # noqa: PLC0415 - optional dependency + + self._bridge = celexpr.GoogleBridge() + self._factory = celexpr.RuleFactory(celexpr.make_extension(), self._bridge) + + def collect_violations(self, message: Message, *, fail_fast: bool) -> list[Violation]: + bridged = self._bridge.to_google(message) + ctx = RuleContext(fail_fast=fail_fast) + for rule in self._factory.get(type(message).desc()): + rule.validate(ctx, bridged) + if ctx.done: + break + for violation in ctx.violations: + violation.finalize_paths() + return ctx.violations class Validator: @@ -40,27 +93,36 @@ class Validator: protobuf-py message of the same type; violation field values then refer to that copy. + CEL is evaluated by the cel-expr-python engine when that optional dependency + is installed, and otherwise by the pure-Python celpy engine (see + :mod:`protovalidate.internal.backend`). The choice is transparent: both + engines produce the same violations. + Each validator instance caches internal state generated from the static rules, so reusing the same instance for multiple validations significantly improves performance. """ - _factory: _rules.RuleFactory + _engine: _Engine def __init__(self, registry: Registry | None = None): """ Parameters: registry: An optional Registry used to resolve custom predefined-rule extensions. If omitted, only standard rules are applied. + (Only consulted by the celpy engine; the cel-expr engine resolves + custom rules through its google descriptor pool.) """ - funcs = extra_func.make_extra_funcs() try: import google.protobuf.message # noqa: F401, PLC0415 self._legacy = LegacyMessageConverter() except ImportError: self._legacy = None - self._factory = _rules.RuleFactory(funcs, registry) + if backend.CEL_EXPR_AVAILABLE: + self._engine = _CelExprEngine(registry) + else: + self._engine = _CelpyEngine(registry) def validate(self, message: Message | google_message.Message, *, fail_fast: bool = False): """ @@ -75,10 +137,10 @@ def validate(self, message: Message | google_message.Message, *, fail_fast: bool ValidationError: If the message is invalid. The violations raised as part of this error should always be equal to the list of violations returned by `collect_violations`. """ - message = self._coerce(message) - violations = self.collect_violations(message, fail_fast=fail_fast) + coerced = self._coerce(message) + violations = self.collect_violations(coerced, fail_fast=fail_fast) if len(violations) > 0: - msg = f"invalid {type(message).desc().name}" + msg = f"invalid {type(coerced).desc().name}" raise ValidationError(msg, violations) def collect_violations( @@ -102,15 +164,8 @@ def collect_violations( Raises: CompilationError: If the static rules could not be compiled. """ - message = self._coerce(message) - ctx = _rules.RuleContext(fail_fast=fail_fast) - for rule in self._factory.get(type(message).desc()): - rule.validate(ctx, message) - if ctx.done: - break - for violation in ctx.violations: - violation._finalize_paths() - return ctx.violations + coerced = self._coerce(message) + return self._engine.collect_violations(coerced, fail_fast=fail_fast) def _coerce(self, message: Message | google_message.Message) -> Message: if self._legacy: @@ -124,9 +179,9 @@ class ValidationError(ValueError): An error raised when a message fails to validate. """ - _violations: list[_rules.Violation] + _violations: list[Violation] - def __init__(self, msg: str, violations: list[_rules.Violation]): + def __init__(self, msg: str, violations: list[Violation]): super().__init__(msg) self._violations = violations diff --git a/pyproject.toml b/pyproject.toml index ee96c01..0433db3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,29 @@ dependencies = [ ] dynamic = ["version"] +# cel-expr-python (cel-cpp) is an optional, faster CEL backend. It is fully +# opt-in: it ships only version-specific compiled wheels for CPython 3.11-3.14 +# on manylinux (glibc) / macOS / Windows (x86-64 + arm64) — no musllinux, no +# 3.10, no free-threaded (cp314t), no abi3/pure-python wheel. PEP 508 markers +# cannot express those distinctions (no free-threaded marker; manylinux and +# musllinux are both `sys_platform == 'linux'`), so it cannot be gated onto the +# required dependency list without risking a source build. When installed, the +# validator selects it automatically and otherwise falls back to cel-python. +[project.optional-dependencies] +cel-expr = [ + # google.protobuf is required alongside it: cel-expr-python only ingests + # google.protobuf descriptor pools and messages, so protobuf-py values are + # bridged through it at the validation boundary. + # No 3.10 release exists (earliest wheel is cp311), so a floor marker is + # required for the lock to resolve on our 3.10-supported base. + "cel-expr-python>=0.1.3; python_version >= '3.11'", + "protobuf>=5; python_version >= '3.11'", + # We need at least this version, which fixed Python 3.14 compatibility (tp_new metaclass issue). + # The fix was never backported to the 5.x series. + # Ref: https://github.com/protocolbuffers/protobuf/pull/20594 + "protobuf>=6.31.0; python_version >= '3.14'", +] + [project.urls] Documentation = "https://protovalidate.com" Homepage = "https://github.com/bufbuild/protovalidate-python" @@ -49,6 +72,11 @@ dev = [ "ty==0.0.51", "types-protobuf==6.32.1.20260221", + # The optional cel-expr backend, installed for local/CI testing of both + # engines. Unpinned so the resolver picks the wheel for the active platform; + # floored at 3.11 (its earliest release) so the lock resolves on 3.10. + "cel-expr-python>=0.1.3; python_version >= '3.11'", + # Unlike most dev dependencies, we don't pin to allow testing old and new versions "protobuf>=5", # We need at least this version, which fixed Python 3.14 compatibility (tp_new metaclass issue). diff --git a/test/conformance/nonconforming.cel-expr.yaml b/test/conformance/nonconforming.cel-expr.yaml new file mode 100644 index 0000000..a0bdbc5 --- /dev/null +++ b/test/conformance/nonconforming.cel-expr.yaml @@ -0,0 +1,5 @@ +# Expected conformance failures for the cel-expr-python backend. +# +# cel-expr-python (cel-cpp) evaluates nanosecond-precision duration and +# timestamp comparisons correctly, so the cases celpy skips in +# nonconforming.yaml all pass here. All known cases conform. diff --git a/test/conformance/runner.py b/test/conformance/runner.py index 8c47a7a..7e3dee9 100644 --- a/test/conformance/runner.py +++ b/test/conformance/runner.py @@ -25,6 +25,7 @@ from protobuf import wkt as pb_wkt import protovalidate +from protovalidate.internal import backend from ..gen.buf.validate import validate_pb # noqa: TID252 from ..gen.buf.validate.conformance.harness.harness_pb import ( # noqa: TID252 @@ -36,6 +37,13 @@ # Set to test google.protobuf messages instead of protobuf-py _LEGACY = os.environ.get("PROTOVALIDATE_CONFORMANCE_LEGACY") == "1" +# Pin the CEL backend for this run. A test-harness detail (like _LEGACY), not a +# public switch: the parent process can't monkeypatch into this subprocess, so +# it passes the choice by env. "celpy" forces the fallback by clearing the +# auto-detect flag before any Validator is built; anything else keeps auto. +if os.environ.get("PROTOVALIDATE_CONFORMANCE_BACKEND") == "celpy": + backend.CEL_EXPR_AVAILABLE = False + def build_google_pool(fdset: pb_wkt.FileDescriptorSet) -> google_descriptor_pool.DescriptorPool: pool = google_descriptor_pool.DescriptorPool() @@ -71,10 +79,14 @@ def run_test_case(validator: protovalidate.Validator, tc: protobuf.Message | goo ) else: return TestResult(result=Oneof(field="success", value=True)) - except celpy.CELEvalError as e: - return TestResult(result=Oneof(field="runtime_error", value=str(e))) except protovalidate.CompilationError as e: return TestResult(result=Oneof(field="compilation_error", value=str(e))) + except celpy.CELEvalError as e: + # celpy surfaces evaluation failures as CELEvalError; cel-expr-python + # surfaces them as RuntimeError. + return TestResult(result=Oneof(field="runtime_error", value=str(e))) + except RuntimeError as e: + return TestResult(result=Oneof(field="runtime_error", value=str(e))) except Exception as e: return TestResult(result=Oneof(field="unexpected_error", value=str(e))) diff --git a/test/conformance/test_conformance.py b/test/conformance/test_conformance.py index 59fe5e0..7429a6a 100644 --- a/test/conformance/test_conformance.py +++ b/test/conformance/test_conformance.py @@ -22,8 +22,16 @@ import pytest +from test.conftest import BACKENDS from test.versions import PROTOVALIDATE_VERSION +# Per-backend expected-failure lists: celpy skips nanosecond timestamp/duration +# cases that cel-expr-python handles correctly. +_EXPECTED_FAILURES = { + "celpy": "nonconforming.yaml", + "cel-expr": "nonconforming.cel-expr.yaml", +} + def maybe_patch_args_with_debug(args: list[str]) -> list[str]: # Do a best effort to invoke the child with debugging. @@ -39,8 +47,9 @@ def maybe_patch_args_with_debug(args: list[str]) -> list[str]: return args +@pytest.mark.parametrize("cel_backend", BACKENDS) @pytest.mark.parametrize("legacy", [False, True], ids=["py", "legacy"]) -def test_conformance(*, legacy: bool) -> None: +def test_conformance(*, legacy: bool, cel_backend: str) -> None: # Workaround pydevd monkeypatching of -m invocation not being compatible # with Python 3.14 yet by executing a script that uses runpy itself. # pydevd does monkeypatch -c form correctly. @@ -60,6 +69,7 @@ def test_conformance(*, legacy: bool) -> None: env = os.environ.copy() if legacy: env["PROTOVALIDATE_CONFORMANCE_LEGACY"] = "1" + env["PROTOVALIDATE_CONFORMANCE_BACKEND"] = cel_backend subprocess.run( # noqa: S603 [ # noqa: S607 @@ -67,7 +77,7 @@ def test_conformance(*, legacy: bool) -> None: "run", f"github.com/bufbuild/protovalidate/tools/protovalidate-conformance@{PROTOVALIDATE_VERSION}", "--strict_message", - f"--expected_failures={Path(__file__).parent / 'nonconforming.yaml'}", + f"--expected_failures={Path(__file__).parent / _EXPECTED_FAILURES[cel_backend]}", "--timeout", "10s", *command, diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..3fc3dae --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,45 @@ +# Copyright 2023-2026 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared test helpers for exercising both CEL backends in one run. + +The backend is normally auto-detected. Since there is no public switch, tests +pin a Validator to a specific engine by toggling ``backend.CEL_EXPR_AVAILABLE`` +around construction (the engine binds once in ``Validator.__init__``, so the +flag can be restored immediately afterwards). +""" + +from __future__ import annotations + +import protovalidate +from protovalidate.internal import backend + +# Backend ids present in this environment. celpy is always available; cel-expr +# only when its optional wheel is installed. +BACKENDS: list[str] = ["celpy", *(["cel-expr"] if backend.CEL_EXPR_AVAILABLE else [])] + + +def make_validator(cel_backend: str, **kwargs) -> protovalidate.Validator: + """A Validator pinned to ``cel_backend`` ("celpy" or "cel-expr").""" + original = backend.CEL_EXPR_AVAILABLE + backend.CEL_EXPR_AVAILABLE = cel_backend == "cel-expr" + try: + return protovalidate.Validator(**kwargs) + finally: + backend.CEL_EXPR_AVAILABLE = original + + +def backend_validators(**kwargs) -> list[protovalidate.Validator]: + """One Validator per CEL backend available in this environment.""" + return [make_validator(name, **kwargs) for name in BACKENDS] diff --git a/test/test_benchmark.py b/test/test_benchmark.py index 4d59ca9..b78e41f 100644 --- a/test/test_benchmark.py +++ b/test/test_benchmark.py @@ -32,6 +32,7 @@ import protovalidate +from .conftest import BACKENDS, make_validator from .gen.bench.v1.bench_pb import ( BenchComplexSchema, BenchEnum, @@ -107,7 +108,11 @@ def gen_complex(depth: int) -> BenchComplexSchema: ) -validator = protovalidate.Validator() +@pytest.fixture(params=BACKENDS) +def validator(request: pytest.FixtureRequest) -> protovalidate.Validator: + """A Validator per available CEL backend, so `bench-compare` (grouped on + _id) lines celpy up against cel-expr-python for each case.""" + return make_validator(request.param) def param(*args, id: str) -> pytest.param: # noqa: A002 @@ -189,6 +194,11 @@ def param(*args, id: str) -> pytest.param: # noqa: A002 @pytest.mark.parametrize(("_id", "message_factory"), cases) -def test_benchmark(_id: str, message_factory: Callable[[], Message], benchmark: BenchmarkFixture): +def test_benchmark( + _id: str, + message_factory: Callable[[], Message], + benchmark: BenchmarkFixture, + validator: protovalidate.Validator, +): message = message_factory() benchmark(validator.collect_violations, message) diff --git a/test/test_validate.py b/test/test_validate.py index 0408cbc..f716ff8 100644 --- a/test/test_validate.py +++ b/test/test_validate.py @@ -19,11 +19,14 @@ import protovalidate from protovalidate.internal import rules +from .conftest import backend_validators from .gen.tests.example.v1 import validations_pb, validations_pb2 +# The module singleton (auto-detected backend) plus one Validator per available +# CEL backend, so the suite exercises celpy and cel-expr-python in a single run. validators: list[protovalidate.Validator] = [ protovalidate, # global module singleton - protovalidate.Validator(), # via constructor + *backend_validators(), ] diff --git a/uv.lock b/uv.lock index aa40d79..142f7ee 100644 --- a/uv.lock +++ b/uv.lock @@ -6,7 +6,8 @@ resolution-markers = [ "python_full_version == '3.14.*'", "python_full_version == '3.13.*'", "python_full_version == '3.12.*'", - "python_full_version < '3.12'", + "python_full_version == '3.11.*'", + "python_full_version < '3.11'", ] [[package]] @@ -26,6 +27,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/01/77553cba45fe4a769fb6fa44ee25961580b87a95fb47c95f2638bd8cf145/buf_bin-1.71.0-py3-none-win_arm64.whl", hash = "sha256:5172e2068819c2954fe5626fa02aaeeb1b631f0e154ef24dadfeff14f12f547a", size = 14617019, upload-time = "2026-06-16T18:25:15.78Z" }, ] +[[package]] +name = "cel-expr-python" +version = "0.1.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/42/8a9ee05d86317fd57b8f20853c84076745ef34e38f0ab969c930e6ba8b86/cel_expr_python-0.1.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:5b2ecfc4f4e8120928b446262b13977ee3326b535c1684bdc3eb2fb990298dba", size = 12607175, upload-time = "2026-06-25T23:02:58.642Z" }, + { url = "https://files.pythonhosted.org/packages/43/6d/3b89f7b3473c2c83220a6478b30dda7f9e960f074284e507128ded568c84/cel_expr_python-0.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11e4cf0a3b2bf7b4231df4ed1f359de006001f2680676c99631df86744f9c1f2", size = 12030643, upload-time = "2026-06-25T23:03:00.966Z" }, + { url = "https://files.pythonhosted.org/packages/f7/34/d27fbf56e4f844cff649f980534fc358a2210f61ef19a021ef4c9b4c4ca0/cel_expr_python-0.1.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc1d80c53affcaf7f31b09dd3e10d87add48833b1de51f037b8fcfb549e7362f", size = 15158230, upload-time = "2026-06-25T23:03:03.273Z" }, + { url = "https://files.pythonhosted.org/packages/72/bd/03fb13876057deb3028cd9bf80748f362958944f4a89565b239ada75931b/cel_expr_python-0.1.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:292ae505727bfcfc1986c8bf653c2580faa06f46af56b9087060eafdf2a08652", size = 16017630, upload-time = "2026-06-25T23:03:05.712Z" }, + { url = "https://files.pythonhosted.org/packages/50/e3/fc514c4d9884ae773f987186957e3b0fe57b8abb5a46d44daf58b91ba9db/cel_expr_python-0.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:8b56ebf586df6ec4dae0449d90bd9f6da0027bef407f2ffbb92701a9886ee43c", size = 7870074, upload-time = "2026-06-25T23:03:07.97Z" }, + { url = "https://files.pythonhosted.org/packages/b9/e7/b7cad8b31394c2123b19559d08caef14130cf8d17245005623acd7727e87/cel_expr_python-0.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5ab17a336e7f01e3868fc454630c05596fb8a16293833fa34a7a2f946faa835f", size = 12615855, upload-time = "2026-06-25T23:03:09.963Z" }, + { url = "https://files.pythonhosted.org/packages/23/39/585a1b08d634346cb7dbe158c24a2c4015a8b5f7221fddb471087f570338/cel_expr_python-0.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4e2694afe68a0484ee7c99a79887309511f6738e562a5d2c4ff566dc791e7ec6", size = 12033466, upload-time = "2026-06-25T23:03:12.437Z" }, + { url = "https://files.pythonhosted.org/packages/70/1a/cd8a88eccbca175e89d3f55c61e16c1e8f1ed407f3ac1d6b05019eeb4320/cel_expr_python-0.1.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4170d1e6a3cf359b0ec7c1fb50317daa3e3e3eb12b61311260714a1e02313fbe", size = 16450147, upload-time = "2026-06-25T23:03:14.636Z" }, + { url = "https://files.pythonhosted.org/packages/da/5c/fadbe060d821652909f7fbace3106c92dbc71bfad1faf06e8401c768c69e/cel_expr_python-0.1.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab3b0a418b3911177a77ee69fef1f4199b4a053e2ddd1c339119e58c9cd72824", size = 17313058, upload-time = "2026-06-25T23:03:17.136Z" }, + { url = "https://files.pythonhosted.org/packages/3f/b3/721ba61eaca18a8d4abe8c9fe5942fa3281e53a6db6335e80236703f8719/cel_expr_python-0.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:925bbf323fdf2743491ddf411e40b5c4a12ab102e3bc7e0bfec0fed1c6defa3b", size = 15040270, upload-time = "2026-06-25T23:03:19.981Z" }, + { url = "https://files.pythonhosted.org/packages/a3/50/572b7f65ad961f1227ff58d2c39256b72ce5fcf5c86436a581060eeda867/cel_expr_python-0.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ac60fdb981d1435faadc536c0582ada321a3a392c18ba70e94e3f3caa0f37437", size = 12616486, upload-time = "2026-06-25T23:03:22.613Z" }, + { url = "https://files.pythonhosted.org/packages/88/ba/ca4dbc50ed3c97454f7b4eb3a6552ea3065cd4ef934a17481c897091765b/cel_expr_python-0.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8eb8d3c92b11e99fffddb846ba68f8b1ba12a3bf1bc0ccdd8c2914c7458f3a4", size = 12034024, upload-time = "2026-06-25T23:03:24.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/b0/9c3830b5d105204a2ff2775d8e8f89e1cf68f62c28dec736dcdb468c9fb1/cel_expr_python-0.1.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ea4679e74567c145c824e1ef049ae340af5b2a1a4451324d215e58ed5db20969", size = 16449714, upload-time = "2026-06-25T23:03:26.919Z" }, + { url = "https://files.pythonhosted.org/packages/a9/65/65ebe0e73c700724b2f8b74ed65309f6326aaf9c2e907d9bc237ab139ae8/cel_expr_python-0.1.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa80370ed20cee201a7671634f9c58dc17fd93f15addf680d7e0393969bc51b3", size = 17313510, upload-time = "2026-06-25T23:03:29.553Z" }, + { url = "https://files.pythonhosted.org/packages/34/cf/3532944395d9e4eba7157c2c0414ad7a621e0e3757e654f166190313782b/cel_expr_python-0.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:cd6ff40b4a89af6b62ca24d431893d8fcce5cca6149f06926a0e88211a370645", size = 13724593, upload-time = "2026-06-25T23:03:32.332Z" }, + { url = "https://files.pythonhosted.org/packages/c7/33/ceba4fdc652d8f3e692cafd065e90eb35e51fd20141abcc0f13d235be995/cel_expr_python-0.1.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:665fbb261c4733e69a4022763c16f7655cfe6e057cd394ad9f36dfee92b9df8c", size = 12617454, upload-time = "2026-06-25T23:03:34.947Z" }, + { url = "https://files.pythonhosted.org/packages/9f/6e/3f36105e268e4dd0d8ad1ec233ef8ebca826eee8ad9cb22702bfd5631a34/cel_expr_python-0.1.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b3e1ba34748796624b2cf8c9bc664b79baccc0662c97299a1991278ef45d3aed", size = 12038419, upload-time = "2026-06-25T23:03:37.644Z" }, + { url = "https://files.pythonhosted.org/packages/89/02/65ab9b3a9dc2b73e798ce237c21811675e1acd898638036a829ccf90c77b/cel_expr_python-0.1.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d2035831cfb9c0ae36c91e3de421828f8fa27173680e86676a7d6e0f68300a0", size = 16452974, upload-time = "2026-06-25T23:03:40.028Z" }, + { url = "https://files.pythonhosted.org/packages/c6/25/f352cdffd78175fe56ef50967a3a9e33d7770d46a0bd3294cbca9738df1e/cel_expr_python-0.1.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b36c7a36ca865b147e1fc68f3f6faaae5b113e9de4d77e7eb751c95e1547a99", size = 17314171, upload-time = "2026-06-25T23:03:42.633Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f5/71742755695bfbfbe55632e8f83a9e84cac65cb8aefa102353975df8209c/cel_expr_python-0.1.3-cp314-cp314-win_amd64.whl", hash = "sha256:4711f59f0dd3fcabf68a617685ab2857ed931cc10e2f1595abe7d4d99f921bba", size = 13902879, upload-time = "2026-06-25T23:03:45.135Z" }, +] + [[package]] name = "cel-python" version = "0.5.0" @@ -69,7 +97,7 @@ name = "exceptiongroup" version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } wheels = [ @@ -370,9 +398,16 @@ dependencies = [ { name = "protobuf-py" }, ] +[package.optional-dependencies] +cel-expr = [ + { name = "cel-expr-python", marker = "python_full_version >= '3.11'" }, + { name = "protobuf", marker = "python_full_version >= '3.11'" }, +] + [package.dev-dependencies] dev = [ { name = "buf-bin" }, + { name = "cel-expr-python", marker = "python_full_version >= '3.11'" }, { name = "fix-protobuf-imports" }, { name = "google-re2-stubs" }, { name = "poethepoet" }, @@ -387,17 +422,22 @@ dev = [ [package.metadata] requires-dist = [ + { name = "cel-expr-python", marker = "python_full_version >= '3.11' and extra == 'cel-expr'", specifier = ">=0.1.3" }, { name = "cel-python", specifier = ">=0.5" }, { name = "google-re2", specifier = ">=1" }, { name = "google-re2", marker = "python_full_version == '3.12.*'", specifier = ">=1.1" }, { name = "google-re2", marker = "python_full_version == '3.13.*'", specifier = ">=1.1.20250722" }, { name = "google-re2", marker = "python_full_version == '3.14.*'", specifier = ">=1.1.20251105" }, + { name = "protobuf", marker = "python_full_version >= '3.11' and extra == 'cel-expr'", specifier = ">=5" }, + { name = "protobuf", marker = "python_full_version >= '3.14' and extra == 'cel-expr'", specifier = ">=6.31.0" }, { name = "protobuf-py", specifier = ">=0.1.1" }, ] +provides-extras = ["cel-expr"] [package.metadata.requires-dev] dev = [ { name = "buf-bin", specifier = "==1.71.0" }, + { name = "cel-expr-python", marker = "python_full_version >= '3.11'", specifier = ">=0.1.3" }, { name = "fix-protobuf-imports", specifier = "==0.1.7" }, { name = "google-re2-stubs", specifier = "==0.1.1" }, { name = "poethepoet", specifier = "==0.46.0" }, From 23dcce207653fce055e83a0980261f45797d21f2 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 3 Jul 2026 14:11:30 +0900 Subject: [PATCH 4/5] drift --- protovalidate/internal/_core.py | 18 ++---------------- protovalidate/internal/backend.py | 14 ++++++++++---- protovalidate/internal/celexpr/rules.py | 5 ++--- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/protovalidate/internal/_core.py b/protovalidate/internal/_core.py index 921b050..5f45f6b 100644 --- a/protovalidate/internal/_core.py +++ b/protovalidate/internal/_core.py @@ -12,15 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Backend-agnostic rule-engine primitives shared by both CEL backends. - -``Violation``, ``RuleContext``, ``CompilationError``, and the ``Rules`` ABC do -not depend on the CEL implementation (celpy vs cel-expr-python) or the -descriptor/value model (protobuf-py vs google.protobuf) — they only speak the -public ``validate_pb`` path types. Both the celpy engine (``rules.py``) and the -cel-expr-python engine (``celexpr/rules.py``) import them from here so violation -output and path bookkeeping are identical across backends. -""" +"""Backend-agnostic rule-engine primitives shared by both CEL backends.""" import abc import typing @@ -33,12 +25,7 @@ class CompilationError(Exception): class Violation: - """A singular rule violation. - - Field and rule paths accumulate as element lists during recursion - (messages are immutable and do not auto-vivify), then materialize into a - ``validate_pb.Violation`` lazily via :attr:`proto`. - """ + """A singular rule violation.""" field_value: typing.Any rule_value: typing.Any @@ -69,7 +56,6 @@ def extend_rule_elements(self, elements: list[validate_pb.FieldPathElement]) -> self._rule_elements.extend(elements) def finalize_paths(self) -> None: - """Reverses the accumulated leaf-to-root paths into root-to-leaf order.""" self._field_elements.reverse() self._rule_elements.reverse() diff --git a/protovalidate/internal/backend.py b/protovalidate/internal/backend.py index f933d5e..fbef799 100644 --- a/protovalidate/internal/backend.py +++ b/protovalidate/internal/backend.py @@ -22,11 +22,17 @@ ``CEL_EXPR_AVAILABLE`` to ``False`` before constructing a ``Validator``. """ -import importlib.util - - def _detect() -> bool: - return all(importlib.util.find_spec(name) is not None for name in ("cel_expr_python", "google.protobuf")) + # Actually import (rather than importlib.util.find_spec) so an installed but + # broken wheel — cel-expr-python ships only native wheels with spotty + # coverage, so a failed extension load is a real possibility — falls back to + # celpy instead of being reported available and then crashing at use. + try: + import cel_expr_python # noqa: F401, PLC0415 + import google.protobuf.message # noqa: F401, PLC0415 + except ImportError: + return False + return True CEL_EXPR_AVAILABLE: bool = _detect() diff --git a/protovalidate/internal/celexpr/rules.py b/protovalidate/internal/celexpr/rules.py index 46ae3c3..3ede4db 100644 --- a/protovalidate/internal/celexpr/rules.py +++ b/protovalidate/internal/celexpr/rules.py @@ -21,9 +21,8 @@ in google's global pool for discovery. Rules are *evaluated* by cel-expr-python, which only ingests google messages, so the message under validation and the rule messages bound as ``rules``/``rule`` are bridged to google (see -``celexpr.bridge.GoogleBridge``). Output ``Violation``\\s are protobuf-py -``validate_pb`` messages — the public type, shared with the celpy engine via -``protovalidate.internal._core``. +``_bridge.GoogleBridge``). Output ``Violation``\\s are protobuf-py ``validate_pb`` +messages — the public type. """ import dataclasses From a3941ce125f8cc537a9264b4d2b09893daab079a Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 3 Jul 2026 14:49:49 +0900 Subject: [PATCH 5/5] Raise protobuf floor --- protovalidate/internal/backend.py | 13 +----- protovalidate/internal/celexpr/__init__.py | 14 ++----- protovalidate/internal/celexpr/bridge.py | 11 ------ protovalidate/internal/celexpr/rules.py | 17 +++----- protovalidate/validator.py | 7 ---- pyproject.toml | 46 +++++++++------------- uv.lock | 39 +++++++++--------- 7 files changed, 47 insertions(+), 100 deletions(-) diff --git a/protovalidate/internal/backend.py b/protovalidate/internal/backend.py index fbef799..f38fb4f 100644 --- a/protovalidate/internal/backend.py +++ b/protovalidate/internal/backend.py @@ -13,20 +13,9 @@ # limitations under the License. """Which CEL backend is available. - -The cel-expr-python engine needs both ``cel_expr_python`` (the cel-cpp binding) -and ``google.protobuf`` (the pool/message model it evaluates against). When both -import, ``Validator`` selects it automatically; otherwise it falls back to the -pure-Python celpy engine, which is always present. There is no public switch — -this is pure auto-detect. Tests force the fallback by monkeypatching -``CEL_EXPR_AVAILABLE`` to ``False`` before constructing a ``Validator``. """ def _detect() -> bool: - # Actually import (rather than importlib.util.find_spec) so an installed but - # broken wheel — cel-expr-python ships only native wheels with spotty - # coverage, so a failed extension load is a real possibility — falls back to - # celpy instead of being reported available and then crashing at use. try: import cel_expr_python # noqa: F401, PLC0415 import google.protobuf.message # noqa: F401, PLC0415 @@ -35,4 +24,4 @@ def _detect() -> bool: return True -CEL_EXPR_AVAILABLE: bool = _detect() +CEL_EXPR_AVAILABLE = _detect() diff --git a/protovalidate/internal/celexpr/__init__.py b/protovalidate/internal/celexpr/__init__.py index ebb05be..7652840 100644 --- a/protovalidate/internal/celexpr/__init__.py +++ b/protovalidate/internal/celexpr/__init__.py @@ -12,17 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""The optional cel-expr-python (cel-cpp) validation engine. - -Importing this package requires ``cel_expr_python`` and ``google.protobuf`` to be -installed; the validator only reaches for it when it imports successfully (see -``protovalidate.internal.backend``). Everything below the engine boundary speaks -google.protobuf descriptors and messages — protobuf-py values cross in through -``GoogleBridge``. +"""The cel-expr-python (cel-cpp) validation engine. """ -from protovalidate.internal.celexpr.bridge import GoogleBridge -from protovalidate.internal.celexpr.extra_func import make_extension -from protovalidate.internal.celexpr.rules import RuleFactory +from .bridge import GoogleBridge +from .extra_func import make_extension +from .rules import RuleFactory __all__ = ["GoogleBridge", "RuleFactory", "make_extension"] diff --git a/protovalidate/internal/celexpr/bridge.py b/protovalidate/internal/celexpr/bridge.py index 3399c01..a747b3b 100644 --- a/protovalidate/internal/celexpr/bridge.py +++ b/protovalidate/internal/celexpr/bridge.py @@ -13,17 +13,6 @@ # limitations under the License. """Bridges protobuf-py messages into google.protobuf for cel-expr-python. - -protovalidate's public API is protobuf-py, but the CEL engine (cel-expr-python / -cel-cpp) only ingests google.protobuf messages. This bridge lazily mirrors the -protobuf-py descriptors it encounters into google's global descriptor pool — the -same pool cel-expr-python evaluates against — and re-creates message values as -google dynamic messages by a binary round trip. - -Mirroring uses the process-wide default pool (the pool cel-cpp resolves binding -types against), registering each file once and tolerating files already present. -The relocatable / no-conflict property protovalidate offers lives at the -protobuf-py public layer; this google pool is an internal evaluation detail. """ from __future__ import annotations diff --git a/protovalidate/internal/celexpr/rules.py b/protovalidate/internal/celexpr/rules.py index 3ede4db..8d689e4 100644 --- a/protovalidate/internal/celexpr/rules.py +++ b/protovalidate/internal/celexpr/rules.py @@ -14,15 +14,11 @@ """The rule engine. -Rules are *discovered* from protobuf-py descriptors: the message structure is -read off the google mirror (the validated types are registered in google's pool -by the bridge anyway), but the ``buf.validate`` *options* are read off the -relocatable protobuf-py stub (``validate_pb``), so nothing needs ``buf.validate`` -in google's global pool for discovery. Rules are *evaluated* by cel-expr-python, -which only ingests google messages, so the message under validation and the rule -messages bound as ``rules``/``rule`` are bridged to google (see -``_bridge.GoogleBridge``). Output ``Violation``\\s are protobuf-py ``validate_pb`` -messages — the public type. +Rules are *discovered* by walking the google descriptor mirror (the validated +types the bridge registers in google's pool), while the ``buf.validate`` +*options* that define them are read off the relocatable protobuf-py stub +(``validate_pb``), so nothing needs ``buf.validate`` in google's global pool for +discovery. """ import dataclasses @@ -147,9 +143,6 @@ def field_to_cel(msg: message.Message, field: descriptor.FieldDescriptor) -> typ return _field_value_to_cel(_proto_message_get_field(msg, field), field) -# ----- protobuf-py validate_pb path construction (output is always validate_pb) ----- - - def _ftype(google_type: int) -> FieldDescriptorProto.Type: """Maps a google FieldDescriptor.type int to the protobuf-py enum value.""" return FieldDescriptorProto.Type(google_type) diff --git a/protovalidate/validator.py b/protovalidate/validator.py index e78e785..324920e 100644 --- a/protovalidate/validator.py +++ b/protovalidate/validator.py @@ -93,11 +93,6 @@ class Validator: protobuf-py message of the same type; violation field values then refer to that copy. - CEL is evaluated by the cel-expr-python engine when that optional dependency - is installed, and otherwise by the pure-Python celpy engine (see - :mod:`protovalidate.internal.backend`). The choice is transparent: both - engines produce the same violations. - Each validator instance caches internal state generated from the static rules, so reusing the same instance for multiple validations significantly improves performance. @@ -110,8 +105,6 @@ def __init__(self, registry: Registry | None = None): Parameters: registry: An optional Registry used to resolve custom predefined-rule extensions. If omitted, only standard rules are applied. - (Only consulted by the celpy engine; the cel-expr engine resolves - custom rules through its google descriptor pool.) """ try: import google.protobuf.message # noqa: F401, PLC0415 diff --git a/pyproject.toml b/pyproject.toml index 0433db3..8b95d6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,35 +30,27 @@ dependencies = [ ] dynamic = ["version"] -# cel-expr-python (cel-cpp) is an optional, faster CEL backend. It is fully -# opt-in: it ships only version-specific compiled wheels for CPython 3.11-3.14 -# on manylinux (glibc) / macOS / Windows (x86-64 + arm64) — no musllinux, no -# 3.10, no free-threaded (cp314t), no abi3/pure-python wheel. PEP 508 markers -# cannot express those distinctions (no free-threaded marker; manylinux and -# musllinux are both `sys_platform == 'linux'`), so it cannot be gated onto the -# required dependency list without risking a source build. When installed, the -# validator selects it automatically and otherwise falls back to cel-python. +[project.urls] +Documentation = "https://protovalidate.com" +Homepage = "https://github.com/bufbuild/protovalidate-python" +Issues = "https://github.com/bufbuild/protovalidate-python/issues" +Source = "https://github.com/bufbuild/protovalidate-python" + +# cel-expr has limited wheel support and no sdist for on-the-fly installation, so +# we can't add it as a primary dependency. Users can opt-in if their platform supports it. [project.optional-dependencies] cel-expr = [ - # google.protobuf is required alongside it: cel-expr-python only ingests - # google.protobuf descriptor pools and messages, so protobuf-py values are - # bridged through it at the validation boundary. # No 3.10 release exists (earliest wheel is cp311), so a floor marker is # required for the lock to resolve on our 3.10-supported base. "cel-expr-python>=0.1.3; python_version >= '3.11'", - "protobuf>=5; python_version >= '3.11'", - # We need at least this version, which fixed Python 3.14 compatibility (tp_new metaclass issue). - # The fix was never backported to the 5.x series. + # cel-expr-python's cel-cpp can't resolve nested descriptors of editions + # messages with delimited (group) encoding when the pool is built by protobuf + # < 6.31; earlier versions fail conformance. 6.31 also fixed Python 3.14 + # compatibility (tp_new metaclass issue), never backported to 5.x. # Ref: https://github.com/protocolbuffers/protobuf/pull/20594 - "protobuf>=6.31.0; python_version >= '3.14'", + "protobuf>=6.31.0; python_version >= '3.11'", ] -[project.urls] -Documentation = "https://protovalidate.com" -Homepage = "https://github.com/bufbuild/protovalidate-python" -Issues = "https://github.com/bufbuild/protovalidate-python/issues" -Source = "https://github.com/bufbuild/protovalidate-python" - [dependency-groups] dev = [ "buf-bin==1.71.0", @@ -77,12 +69,12 @@ dev = [ # floored at 3.11 (its earliest release) so the lock resolves on 3.10. "cel-expr-python>=0.1.3; python_version >= '3.11'", - # Unlike most dev dependencies, we don't pin to allow testing old and new versions - "protobuf>=5", - # We need at least this version, which fixed Python 3.14 compatibility (tp_new metaclass issue). - # The fix was never backported to the 5.x series. - # Ref: https://github.com/protocolbuffers/protobuf/pull/20594 - "protobuf>=6.31.0; python_version >= '3.14'", + # Unlike most dev dependencies, we don't pin to allow testing old and new + # versions. Floored at 6.31.0: the cel-expr engine (installed above on 3.11+) + # needs it for editions/delimited descriptor resolution in cel-cpp, and it + # also fixed Python 3.14 compatibility (tp_new metaclass issue), never + # backported to 5.x. Ref: https://github.com/protocolbuffers/protobuf/pull/20594 + "protobuf>=6.31.0", ] [build-system] diff --git a/uv.lock b/uv.lock index 142f7ee..4a4110c 100644 --- a/uv.lock +++ b/uv.lock @@ -2,11 +2,10 @@ version = 1 revision = 3 requires-python = ">=3.10" resolution-markers = [ - "python_full_version >= '3.15'", "python_full_version == '3.14.*'", "python_full_version == '3.13.*'", "python_full_version == '3.12.*'", - "python_full_version == '3.11.*'", + "python_full_version == '3.11.*' or python_full_version >= '3.15'", "python_full_version < '3.11'", ] @@ -73,14 +72,14 @@ wheels = [ [[package]] name = "click" -version = "8.4.1" +version = "8.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9b/98/518d8e5081007684232226f475082b30087d0f585e8457db087298259f49/click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96", size = 353007, upload-time = "2026-05-22T04:08:37.769Z" } +sdist = { url = "https://files.pythonhosted.org/packages/76/d4/81420972a676e8ffea40450d8c8c92943e7218a78fe9b64359836cc9876b/click-8.4.2.tar.gz", hash = "sha256:9a6cea6e60b17ebe0a44c5cc636d94f09bd66142c1cd7d8b4cd731c4917a15f6", size = 338000, upload-time = "2026-06-24T17:45:15.148Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/0d/67e5b4109ea4a837e80daa87c2c696711955e40449a97e8926672534def2/click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2", size = 116639, upload-time = "2026-05-22T04:08:35.26Z" }, + { url = "https://files.pythonhosted.org/packages/fb/e2/79c688af8b210d232694e31e59da9f6ec747bae31c3f5946e4e9b98860d5/click-8.4.2-py3-none-any.whl", hash = "sha256:e6f9f66136c816745b9d65817da91d61d957fb16e02e4dcd0552553c5a197b76", size = 119243, upload-time = "2026-06-24T17:45:13.73Z" }, ] [[package]] @@ -329,17 +328,17 @@ wheels = [ [[package]] name = "protobuf" -version = "7.35.0" +version = "7.35.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/60/fd/5b1491d9e4b586d621c54f4c36b888714164b6875f8d6afa3f9072906a51/protobuf-7.35.0.tar.gz", hash = "sha256:a2efd84605f41e559f1881b0912b44099d0a2ac9bf46b3474823f10fb393b0e6", size = 458677, upload-time = "2026-05-19T23:02:29.197Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/01/9ef0afd7999eb9badb3a768b4aedd78c86d4c65cfaf1958ab276199e76b4/protobuf-7.35.1.tar.gz", hash = "sha256:ce115a26fe0c39a2c29973d914d327e516a6455464489fe3cd1e51a1b354f81a", size = 458717, upload-time = "2026-06-11T21:55:40.257Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/ee/93d06e358a4aa32280b00e722d3ea0a1f25fc3cc5778d80581c9cca2c10e/protobuf-7.35.0-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:66be6c513931c794fa92c080ffee41671390da3d79da219cf9c0c0907f035dda", size = 433225, upload-time = "2026-05-19T23:02:19.884Z" }, - { url = "https://files.pythonhosted.org/packages/8b/39/1c76c2da93f3c507e958e0aecee2391cc44d4625de6c728bbc555195b5a8/protobuf-7.35.0-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:fcbe42a4ac09d3ec9c987ddfcd956afd0b15f1ff613bd8371bde9405ffd5c8e5", size = 328847, upload-time = "2026-05-19T23:02:22.3Z" }, - { url = "https://files.pythonhosted.org/packages/91/1a/39f7ce90a238c1a987a4d81ec26379e02ca0aff367de68e4a1fa474215b9/protobuf-7.35.0-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:4cbf5cc286130e06a6c9bbefac442431173906dfcc979712183d4adcc01b37ee", size = 344030, upload-time = "2026-05-19T23:02:23.591Z" }, - { url = "https://files.pythonhosted.org/packages/70/5b/6baf9008817964454055ff3fe65f1de0b5f1e26c80c82f7fb108b7cd4ea3/protobuf-7.35.0-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:6c0f98f10c8a05ea30f8993dfef2de093d27b490fdae78bb60c8343795d55011", size = 327130, upload-time = "2026-05-19T23:02:24.637Z" }, - { url = "https://files.pythonhosted.org/packages/8e/e5/e46adb0badc388bfb84877a5f9f026aff63f60e611016cf64dbe77e05446/protobuf-7.35.0-cp310-abi3-win32.whl", hash = "sha256:4c4617b83ade0e279d1d2bfe04025a1adb87f9ed657de038620dc0ff959357f6", size = 428946, upload-time = "2026-05-19T23:02:25.741Z" }, - { url = "https://files.pythonhosted.org/packages/a7/ab/547fbd9e16d879dd13c167478f8ae0a83a428008ca07a5e06acdc23ad473/protobuf-7.35.0-cp310-abi3-win_amd64.whl", hash = "sha256:f05bcadf9a2a6b8dda047007075135fb7d08c73d9177aabc067e1be46881a201", size = 439996, upload-time = "2026-05-19T23:02:26.808Z" }, - { url = "https://files.pythonhosted.org/packages/b8/ef/50433d346c56657a70d27f156c7b349ac59a068b01de4eb796e747eecc43/protobuf-7.35.0-py3-none-any.whl", hash = "sha256:c13f325cf242bad135c350629eeb5d54b24228eb472fb3e2e9ebbd4c5dc20ca0", size = 171659, upload-time = "2026-05-19T23:02:27.842Z" }, + { url = "https://files.pythonhosted.org/packages/10/03/8aeeb7458d22546bf64b5250ca1daeb5ff757d900e8e4a7476c6f0db843e/protobuf-7.35.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:24f857477359a85c0c235261b8ba905fd51b2562f4a64ca1df5473f29850cbf6", size = 433226, upload-time = "2026-06-11T21:55:31.719Z" }, + { url = "https://files.pythonhosted.org/packages/37/4b/dfb89eb0e652a1ff073c39a59fb5e3a83cfe9b57a2c83fa6d78270101767/protobuf-7.35.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:11d6b0ec246892d85215b0a13ca6e0233cf5284b68f0ac02646427f4ff88a799", size = 328847, upload-time = "2026-06-11T21:55:34.035Z" }, + { url = "https://files.pythonhosted.org/packages/0f/58/dc12f2cd484951524af6e3382c785869b9b3fb5e52ee95ae23add53ee8f9/protobuf-7.35.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:b73f9489a4b8b1c9cb1f8ed951c736392592edb24b9d6819f36d2e10b171d5b4", size = 344030, upload-time = "2026-06-11T21:55:34.941Z" }, + { url = "https://files.pythonhosted.org/packages/e4/be/5b3cfe508bfab6761414ff944e3366eb13be4fd71efcd69450f89ba39f43/protobuf-7.35.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:74758715c53d7158fb76caf4f0cfdacc5329a4b1bb994f865d6cf302d413a1c4", size = 327130, upload-time = "2026-06-11T21:55:35.921Z" }, + { url = "https://files.pythonhosted.org/packages/d8/bc/6d6c7ba8709c85f8f2c390b2b118d6fb08a783676a572271851bf45a7d22/protobuf-7.35.1-cp310-abi3-win32.whl", hash = "sha256:353652e4efd0bca5b5fc2656abf8307ef351f0cf938c9eba09f0e09c20a25c30", size = 428945, upload-time = "2026-06-11T21:55:37.034Z" }, + { url = "https://files.pythonhosted.org/packages/0a/19/8d0cb6f20a1ef7b18f1c8986ad5783f22f84cce39c6ce9a6e645ea55192e/protobuf-7.35.1-cp310-abi3-win_amd64.whl", hash = "sha256:230a75ddfc2de4806e56696ce9640c1cdfdb6543b7cfce98d42a4c0a0e7bdb87", size = 439996, upload-time = "2026-06-11T21:55:38.123Z" }, + { url = "https://files.pythonhosted.org/packages/19/c7/5f7c636ec43e0c545e28d1f1db71990108306f7bdcb89f069ba97e428e7f/protobuf-7.35.1-py3-none-any.whl", hash = "sha256:4bc97768d8fe4ad6743c8a19403e314511ed9f6d13205b687e52421c023ac1b9", size = 171659, upload-time = "2026-06-11T21:55:39.155Z" }, ] [[package]] @@ -428,8 +427,7 @@ requires-dist = [ { name = "google-re2", marker = "python_full_version == '3.12.*'", specifier = ">=1.1" }, { name = "google-re2", marker = "python_full_version == '3.13.*'", specifier = ">=1.1.20250722" }, { name = "google-re2", marker = "python_full_version == '3.14.*'", specifier = ">=1.1.20251105" }, - { name = "protobuf", marker = "python_full_version >= '3.11' and extra == 'cel-expr'", specifier = ">=5" }, - { name = "protobuf", marker = "python_full_version >= '3.14' and extra == 'cel-expr'", specifier = ">=6.31.0" }, + { name = "protobuf", marker = "python_full_version >= '3.11' and extra == 'cel-expr'", specifier = ">=6.31.0" }, { name = "protobuf-py", specifier = ">=0.1.1" }, ] provides-extras = ["cel-expr"] @@ -441,8 +439,7 @@ dev = [ { name = "fix-protobuf-imports", specifier = "==0.1.7" }, { name = "google-re2-stubs", specifier = "==0.1.1" }, { name = "poethepoet", specifier = "==0.46.0" }, - { name = "protobuf", specifier = ">=5" }, - { name = "protobuf", marker = "python_full_version >= '3.14'", specifier = ">=6.31.0" }, + { name = "protobuf", specifier = ">=6.31.0" }, { name = "pytest", specifier = "==9.1.1" }, { name = "pytest-benchmark", specifier = "==5.2.3" }, { name = "ruff", specifier = "==0.15.18" }, @@ -722,11 +719,11 @@ wheels = [ [[package]] name = "typing-extensions" -version = "4.15.0" +version = "4.16.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/cc/6253133b5bb138fc3306cebfbda2c520f545d36b5be2c7255cc528bb45d6/typing_extensions-4.16.0.tar.gz", hash = "sha256:dc983d19a509c94dba722ee6abd33940f7c05a89e243c47e907eb4db6f1a43e5", size = 113555, upload-time = "2026-07-02T08:40:05.92Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, + { url = "https://files.pythonhosted.org/packages/49/d3/b8441a820a491ddfc024b0b0cf0393375b75ea13866d9c66727e54c2fc80/typing_extensions-4.16.0-py3-none-any.whl", hash = "sha256:481caa481374e813c1b176ada14e97f1f67a4539ce9cfeb3f350d78d6370c2e8", size = 45571, upload-time = "2026-07-02T08:40:04.659Z" }, ] [[package]]