diff --git a/Doc/library/tomllib.rst b/Doc/library/tomllib.rst index 30d7ff50a1acc1..237881e5f14545 100644 --- a/Doc/library/tomllib.rst +++ b/Doc/library/tomllib.rst @@ -13,10 +13,14 @@ -------------- -This module provides an interface for parsing TOML 1.0.0 (Tom's Obvious Minimal +This module provides an interface for parsing TOML 1.1.0 (Tom's Obvious Minimal Language, `https://toml.io `_). This module does not support writing TOML. +.. versionchanged:: next + Module updated to support TOML 1.1.0. Initially the module supported TOML 1.0.0. + + .. seealso:: The :pypi:`Tomli-W package ` diff --git a/Lib/test/test_tomllib/burntsushi.py b/Lib/test/test_tomllib/burntsushi.py index 71228c65369572..0ec50eb1a98a45 100644 --- a/Lib/test/test_tomllib/burntsushi.py +++ b/Lib/test/test_tomllib/burntsushi.py @@ -7,19 +7,8 @@ import datetime from typing import Any -# Aliases for converting TOML compliance format [1] to BurntSushi format [2] -# [1] https://github.com/toml-lang/compliance/blob/db7c3211fda30ff9ddb10292f4aeda7e2e10abc4/docs/json-encoding.md # noqa: E501 -# [2] https://github.com/BurntSushi/toml-test/blob/4634fdf3a6ecd6aaea5f4cdcd98b2733c2694993/README.md # noqa: E501 -_aliases = { - "boolean": "bool", - "offset datetime": "datetime", - "local datetime": "datetime-local", - "local date": "date-local", - "local time": "time-local", -} - - -def convert(obj): # noqa: C901 + +def convert(obj): if isinstance(obj, str): return {"type": "string", "value": obj} elif isinstance(obj, bool): @@ -53,31 +42,25 @@ def convert(obj): # noqa: C901 def normalize(obj: Any) -> Any: """Normalize test objects. - This normalizes primitive values (e.g. floats), and also converts from - TOML compliance format [1] to BurntSushi format [2]. - - [1] https://github.com/toml-lang/compliance/blob/db7c3211fda30ff9ddb10292f4aeda7e2e10abc4/docs/json-encoding.md # noqa: E501 - [2] https://github.com/BurntSushi/toml-test/blob/4634fdf3a6ecd6aaea5f4cdcd98b2733c2694993/README.md # noqa: E501 - """ + This normalizes primitive values (e.g. floats).""" if isinstance(obj, list): return [normalize(item) for item in obj] if isinstance(obj, dict): if "type" in obj and "value" in obj: type_ = obj["type"] - norm_type = _aliases.get(type_, type_) value = obj["value"] - if norm_type == "float": + if type_ == "float": norm_value = _normalize_float_str(value) - elif norm_type in {"datetime", "datetime-local"}: + elif type_ in {"datetime", "datetime-local"}: norm_value = _normalize_datetime_str(value) - elif norm_type == "time-local": + elif type_ == "time-local": norm_value = _normalize_localtime_str(value) else: norm_value = value - if norm_type == "array": + if type_ == "array": return [normalize(item) for item in value] - return {"type": norm_type, "value": norm_value} + return {"type": type_, "value": norm_value} return {k: normalize(v) for k, v in obj.items()} raise AssertionError("Burntsushi fixtures should be dicts/lists only") diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json index 99aca873480ec3..09a7c083d14f88 100644 --- a/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.json @@ -1,4 +1,5 @@ { "local-dt": {"type":"datetime-local","value":"1988-10-27t01:01:01"}, + "local-dt-no-seconds": {"type":"datetime-local","value":"2025-04-18t20:05:00"}, "zulu-dt": {"type":"datetime","value":"1988-10-27t01:01:01z"} } diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml index cf84159de46fd8..5dc4b318256198 100644 --- a/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/datetimes.toml @@ -1,2 +1,3 @@ local-dt=1988-10-27t01:01:01 +local-dt-no-seconds=2025-04-18T20:05 zulu-dt=1988-10-27t01:01:01z diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json index 4d96abcbc799e6..1f66348b237161 100644 --- a/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.json @@ -1,2 +1,4 @@ {"t": - {"type":"time-local","value":"00:00:00.999999"}} + {"type":"time-local","value":"00:00:00.999999"}, +"t2": + {"type":"time-local","value":"00:00:00"}} diff --git a/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml index 87547c1cf3bd89..6579b30c94f8d6 100644 --- a/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml +++ b/Lib/test/test_tomllib/data/valid/dates-and-times/localtime.toml @@ -1 +1,2 @@ -t=00:00:00.99999999999999 \ No newline at end of file +t=00:00:00.99999999999999 +t2=00:00 \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/empty-inline-table.json b/Lib/test/test_tomllib/data/valid/inline-table/empty-inline-table.json similarity index 100% rename from Lib/test/test_tomllib/data/valid/empty-inline-table.json rename to Lib/test/test_tomllib/data/valid/inline-table/empty-inline-table.json diff --git a/Lib/test/test_tomllib/data/valid/empty-inline-table.toml b/Lib/test/test_tomllib/data/valid/inline-table/empty-inline-table.toml similarity index 100% rename from Lib/test/test_tomllib/data/valid/empty-inline-table.toml rename to Lib/test/test_tomllib/data/valid/inline-table/empty-inline-table.toml diff --git a/Lib/test/test_tomllib/data/valid/inline-table/multiline-inline-table.json b/Lib/test/test_tomllib/data/valid/inline-table/multiline-inline-table.json new file mode 100644 index 00000000000000..d253884fbac9f0 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/inline-table/multiline-inline-table.json @@ -0,0 +1,35 @@ +{ + "multiline": { + "a": { + "type": "integer", + "value": "1" + }, + "b": { + "type": "integer", + "value": "2" + }, + "c": [ + { + "type": "integer", + "value": "1" + }, + { + "type": "integer", + "value": "2" + }, + { + "type": "integer", + "value": "3" + } + ], + "d": { + "type": "integer", + "value": "3" + }, + "e": { + "type": "integer", + "value": "4" + }, + "f": {} + } +} \ No newline at end of file diff --git a/Lib/test/test_tomllib/data/valid/inline-table/multiline-inline-table.toml b/Lib/test/test_tomllib/data/valid/inline-table/multiline-inline-table.toml new file mode 100644 index 00000000000000..6a98a08a576a06 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/inline-table/multiline-inline-table.toml @@ -0,0 +1,12 @@ +multiline = { + "a" = 1, "b" = 2, + c = [ + 1, + 2, + 3, + ],# comment + d = 3, + e = 4, f = { + # comment + }, +} diff --git a/Lib/test/test_tomllib/data/valid/multiline-basic-str/replacements.json b/Lib/test/test_tomllib/data/valid/multiline-basic-str/replacements.json new file mode 100644 index 00000000000000..699f556248d880 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/multiline-basic-str/replacements.json @@ -0,0 +1,6 @@ +{ + "escape": {"type":"string","value":"\u001B"}, + "tab": {"type":"string","value":"\t"}, + "upper-j": {"type":"string","value":"J"}, + "upper-j-2": {"type":"string","value":"J"} +} diff --git a/Lib/test/test_tomllib/data/valid/multiline-basic-str/replacements.toml b/Lib/test/test_tomllib/data/valid/multiline-basic-str/replacements.toml new file mode 100644 index 00000000000000..fa5647e5938ee4 --- /dev/null +++ b/Lib/test/test_tomllib/data/valid/multiline-basic-str/replacements.toml @@ -0,0 +1,4 @@ +escape = "\e" +tab = "\x09" +upper-j = "\x4a" +upper-j-2 = "\x4A" diff --git a/Lib/test/test_tomllib/test_data.py b/Lib/test/test_tomllib/test_data.py index 3483d93022b01b..9db1a37466e7bf 100644 --- a/Lib/test/test_tomllib/test_data.py +++ b/Lib/test/test_tomllib/test_data.py @@ -8,12 +8,6 @@ from . import burntsushi, tomllib - -class MissingFile: - def __init__(self, path: Path): - self.path = path - - DATA_DIR = Path(__file__).parent / "data" VALID_FILES = tuple((DATA_DIR / "valid").glob("**/*.toml")) @@ -22,10 +16,7 @@ def __init__(self, path: Path): _expected_files = [] for p in VALID_FILES: json_path = p.with_suffix(".json") - try: - text = json.loads(json_path.read_bytes().decode()) - except FileNotFoundError: - text = MissingFile(json_path) + text = json.loads(json_path.read_bytes().decode()) _expected_files.append(text) VALID_FILES_EXPECTED = tuple(_expected_files) @@ -49,14 +40,6 @@ def test_invalid(self): def test_valid(self): for valid, expected in zip(VALID_FILES, VALID_FILES_EXPECTED): with self.subTest(msg=valid.stem): - if isinstance(expected, MissingFile): - # For a poor man's xfail, assert that this is one of the - # test cases where expected data is known to be missing. - assert valid.stem in { - "qa-array-inline-nested-1000", - "qa-table-inline-nested-1000", - } - continue toml_str = valid.read_bytes().decode() actual = tomllib.loads(toml_str) actual = burntsushi.convert(actual) diff --git a/Lib/tomllib/_parser.py b/Lib/tomllib/_parser.py index 3ee47aa9e0afba..b59d0f7d54bdc3 100644 --- a/Lib/tomllib/_parser.py +++ b/Lib/tomllib/_parser.py @@ -18,39 +18,40 @@ TYPE_CHECKING = False if TYPE_CHECKING: from collections.abc import Iterable - from typing import IO, Any + from typing import IO, Any, Final from ._types import Key, ParseFloat, Pos -ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) +ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) # Neither of these sets include quotation mark or backslash. They are # currently handled as separate cases in the parser functions. -ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t") -ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n") +ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t") +ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n") -ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS -ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS +ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS +ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS -ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS +ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS -TOML_WS = frozenset(" \t") -TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n") -BARE_KEY_CHARS = frozenset( +TOML_WS: Final = frozenset(" \t") +TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n") +BARE_KEY_CHARS: Final = frozenset( "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_" ) -KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'") -HEXDIGIT_CHARS = frozenset("abcdef" "ABCDEF" "0123456789") +KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'") +HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789") -BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType( +BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType( { "\\b": "\u0008", # backspace "\\t": "\u0009", # tab - "\\n": "\u000A", # linefeed - "\\f": "\u000C", # form feed - "\\r": "\u000D", # carriage return + "\\n": "\u000a", # linefeed + "\\f": "\u000c", # form feed + "\\r": "\u000d", # carriage return + "\\e": "\u001b", # escape '\\"': "\u0022", # quote - "\\\\": "\u005C", # backslash + "\\\\": "\u005c", # backslash } ) @@ -133,7 +134,7 @@ def load(fp: IO[bytes], /, *, parse_float: ParseFloat = float) -> dict[str, Any] return loads(s, parse_float=parse_float) -def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901 +def loads(s: str, /, *, parse_float: ParseFloat = float) -> dict[str, Any]: """Parse TOML from a string.""" # The spec allows converting "\r\n" to "\n", even in string @@ -208,10 +209,10 @@ class Flags: """Flags that map to parsed keys/namespaces.""" # Marks an immutable namespace (inline array or inline table). - FROZEN = 0 + FROZEN: Final = 0 # Marks a nest that has been explicitly created and can no longer # be opened using the "[table]" syntax. - EXPLICIT_NEST = 1 + EXPLICIT_NEST: Final = 1 def __init__(self) -> None: self._flags: dict[str, dict[Any, Any]] = {} @@ -257,8 +258,8 @@ def is_(self, key: Key, flag: int) -> bool: cont = inner_cont["nested"] key_stem = key[-1] if key_stem in cont: - cont = cont[key_stem] - return flag in cont["flags"] or flag in cont["recursive_flags"] + inner_cont = cont[key_stem] + return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"] return False @@ -515,7 +516,7 @@ def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos nested_dict = NestedDict() flags = Flags() - pos = skip_chars(src, pos, TOML_WS) + pos = skip_comments_and_array_ws(src, pos) if src.startswith("}", pos): return pos + 1, nested_dict.dict while True: @@ -530,16 +531,18 @@ def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos if key_stem in nest: raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos) nest[key_stem] = value - pos = skip_chars(src, pos, TOML_WS) + pos = skip_comments_and_array_ws(src, pos) c = src[pos : pos + 1] if c == "}": return pos + 1, nested_dict.dict if c != ",": raise TOMLDecodeError("Unclosed inline table", src, pos) + pos += 1 + pos = skip_comments_and_array_ws(src, pos) + if src.startswith("}", pos): + return pos + 1, nested_dict.dict if isinstance(value, (dict, list)): flags.set(key, Flags.FROZEN, recursive=True) - pos += 1 - pos = skip_chars(src, pos, TOML_WS) def parse_basic_str_escape( @@ -561,6 +564,8 @@ def parse_basic_str_escape( pos += 1 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) return pos, "" + if escape_id == "\\x": + return parse_hex_char(src, pos, 2) if escape_id == "\\u": return parse_hex_char(src, pos, 4) if escape_id == "\\U": @@ -660,7 +665,7 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: pos += 1 -def parse_value( # noqa: C901 +def parse_value( src: str, pos: Pos, parse_float: ParseFloat ) -> tuple[Pos, Any]: try: diff --git a/Lib/tomllib/_re.py b/Lib/tomllib/_re.py index eb8beb19747288..fc374ed63d3e37 100644 --- a/Lib/tomllib/_re.py +++ b/Lib/tomllib/_re.py @@ -10,16 +10,20 @@ TYPE_CHECKING = False if TYPE_CHECKING: - from typing import Any + from typing import Any, Final from ._types import ParseFloat -# E.g. -# - 00:32:00.999999 -# - 00:32:00 -_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?" +_TIME_RE_STR: Final = r""" +([01][0-9]|2[0-3]) # hours +:([0-5][0-9]) # minutes +(?: + :([0-5][0-9]) # optional seconds + (?:\.([0-9]{1,6})[0-9]*)? # optional fractions of a second +)? +""" -RE_NUMBER = re.compile( +RE_NUMBER: Final = re.compile( r""" 0 (?: @@ -38,8 +42,8 @@ """, flags=re.VERBOSE, ) -RE_LOCALTIME = re.compile(_TIME_RE_STR) -RE_DATETIME = re.compile( +RE_LOCALTIME: Final = re.compile(_TIME_RE_STR, flags=re.VERBOSE) +RE_DATETIME: Final = re.compile( rf""" ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27 (?: @@ -74,7 +78,8 @@ def match_to_datetime(match: re.Match[str]) -> datetime | date: year, month, day = int(year_str), int(month_str), int(day_str) if hour_str is None: return date(year, month, day) - hour, minute, sec = int(hour_str), int(minute_str), int(sec_str) + hour, minute = int(hour_str), int(minute_str) + sec = int(sec_str) if sec_str else 0 micros = int(micros_str.ljust(6, "0")) if micros_str else 0 if offset_sign_str: tz: tzinfo | None = cached_tz( @@ -103,8 +108,9 @@ def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone: def match_to_localtime(match: re.Match[str]) -> time: hour_str, minute_str, sec_str, micros_str = match.groups() + sec = int(sec_str) if sec_str else 0 micros = int(micros_str.ljust(6, "0")) if micros_str else 0 - return time(int(hour_str), int(minute_str), int(sec_str), micros) + return time(int(hour_str), int(minute_str), sec, micros) def match_to_number(match: re.Match[str], parse_float: ParseFloat) -> Any: diff --git a/Makefile.pre.in b/Makefile.pre.in index 8531162943ae35..9410dd6eaf6ca5 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2741,6 +2741,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tomllib/data/valid \ test/test_tomllib/data/valid/array \ test/test_tomllib/data/valid/dates-and-times \ + test/test_tomllib/data/valid/inline-table \ test/test_tomllib/data/valid/multiline-basic-str \ test/test_tools \ test/test_tools/i18n_data \ diff --git a/Misc/NEWS.d/next/Library/2026-01-26-12-30-57.gh-issue-142956.X9CS8J.rst b/Misc/NEWS.d/next/Library/2026-01-26-12-30-57.gh-issue-142956.X9CS8J.rst new file mode 100644 index 00000000000000..27f104fa0b62f9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-26-12-30-57.gh-issue-142956.X9CS8J.rst @@ -0,0 +1 @@ +Updated :mod:`tomllib` to parse TOML 1.1.0.