diff --git a/.claude/skills/testing-hashql/references/mir-builder-guide.md b/.claude/skills/testing-hashql/references/mir-builder-guide.md index 1967b7e4af7..feb6f086642 100644 --- a/.claude/skills/testing-hashql/references/mir-builder-guide.md +++ b/.claude/skills/testing-hashql/references/mir-builder-guide.md @@ -158,6 +158,7 @@ let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { | `x = tuple , ;` | Create tuple | `Assign(x, Aggregate(Tuple, [a, b]))` | | `x = struct a: , b: ;` | Create struct | `Assign(x, Aggregate(Struct, [v1, v2]))` | | `x = closure ;` | Create closure | `Assign(x, Aggregate(Closure, [def, env]))` | +| `x = opaque (), ;` | Create opaque wrapper | `Assign(x, Aggregate(Opaque(name), [value]))` | | `x = bin. ;` | Binary operation | `Assign(x, Binary(lhs, op, rhs))` | | `x = un. ;` | Unary operation | `Assign(x, Unary(op, operand))` | | `x = input.load! "name";` | Load required input | `Assign(x, Input(Load { required: true }, "name"))` | @@ -276,6 +277,27 @@ let body = body!(interner, env; [graph::read::filter]@0/2 -> Bool { }); ``` +### Opaque Construction and Projection + +Construct opaque-wrapped values with `opaque (), `. The name must +be wrapped in parentheses because it is a multi-token path. + +```rust +use hashql_core::symbol::sym; + +let body = body!(interner, env; fn@0/0 -> Int { + decl inner: (x: Int, y: Int), wrapped: [Opaque sym::path::Entity; ?], result: Int; + @proj y_field = wrapped.y: Int; + + bb0() { + inner = struct x: 100, y: 200; + wrapped = opaque (sym::path::Entity), inner; + result = load y_field; + return result; + } +}); +``` + ### Direct Function Calls Use a `DefId` variable directly: diff --git a/Cargo.lock b/Cargo.lock index 93b49cdd483..d550ed232d1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -357,6 +357,22 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "astral-tokio-tar" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec179a06c1769b1e42e1e2cbe74c7dcdb3d6383c838454d063eaac5bbb7ebbe5" +dependencies = [ + "filetime", + "futures-core", + "libc", + "portable-atomic", + "rustc-hash", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "async-event" version = "0.2.1" @@ -396,6 +412,28 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-stream" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -441,7 +479,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e2cdb6d5ed835199484bb92bb8b3edd526effe995c61732580439c1a67e2e9" dependencies = [ - "base64", + "base64 0.22.1", "http 1.4.0", "log", "url", @@ -976,6 +1014,12 @@ dependencies = [ "match-lookup", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -1088,6 +1132,80 @@ dependencies = [ "objc2", ] +[[package]] +name = "bollard" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee04c4c84f1f811b017f2fbb7dd8815c976e7ca98593de9c1e2afad0f636bff4" +dependencies = [ + "async-stream", + "base64 0.22.1", + "bitflags 2.11.0", + "bollard-buildkit-proto", + "bollard-stubs", + "bytes", + "futures-core", + "futures-util", + "hex", + "home", + "http 1.4.0", + "http-body-util", + "hyper", + "hyper-named-pipe", + "hyper-rustls", + "hyper-util", + "hyperlocal", + "log", + "num", + "pin-project-lite", + "rand 0.9.2", + "rustls", + "rustls-native-certs", + "rustls-pki-types", + "serde", + "serde_derive", + "serde_json", + "serde_urlencoded", + "thiserror 2.0.18", + "time", + "tokio", + "tokio-stream", + "tokio-util", + "tonic 0.14.5", + "tower-service", + "url", + "winapi", +] + +[[package]] +name = "bollard-buildkit-proto" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a885520bf6249ab931a764ffdb87b0ceef48e6e7d807cfdb21b751e086e1ad" +dependencies = [ + "prost 0.14.3", + "prost-types", + "tonic 0.14.5", + "tonic-prost", + "ureq", +] + +[[package]] +name = "bollard-stubs" +version = "1.52.1-rc.29.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f0a8ca8799131c1837d1282c3f81f31e76ceb0ce426e04a7fe1ccee3287c066" +dependencies = [ + "base64 0.22.1", + "bollard-buildkit-proto", + "bytes", + "prost 0.14.3", + "serde", + "serde_json", + "serde_repr", + "time", +] + [[package]] name = "bon" version = "3.9.1" @@ -2215,6 +2333,17 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aeda16ab4059c5fd2a83f2b9c9e9c981327b18aa8e3b313f7e6563799d4f093e" +[[package]] +name = "docker_credential" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d89dfcba45b4afad7450a99b39e751590463e45c04728cf555d36bb66940de8" +dependencies = [ + "base64 0.21.7", + "serde", + "serde_json", +] + [[package]] name = "document-features" version = "0.2.12" @@ -2493,6 +2622,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" +[[package]] +name = "etcetera" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de48cc4d1c1d97a20fd819def54b890cadde72ed3ad0c614822a0a433361be96" +dependencies = [ + "cfg-if", + "windows-sys 0.61.2", +] + [[package]] name = "euclid" version = "0.22.14" @@ -2544,6 +2683,17 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "ferroid" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb330bbd4cb7a5b9f559427f06f98a4f853a137c8298f3bd3f8ca57663e21986" +dependencies = [ + "portable-atomic", + "rand 0.9.2", + "web-time", +] + [[package]] name = "fiat-crypto" version = "0.2.9" @@ -2561,6 +2711,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -2739,7 +2900,10 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" dependencies = [ + "fastrand", "futures-core", + "futures-io", + "parking", "pin-project-lite", ] @@ -3152,7 +3316,7 @@ dependencies = [ name = "harpc-wire-protocol" version = "0.0.0" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "enumflags2", "error-stack", @@ -3858,17 +4022,36 @@ dependencies = [ name = "hashql-eval" version = "0.0.0" dependencies = [ + "bytes", "derive_more", + "error-stack", + "futures-lite", + "hash-graph-authorization", "hash-graph-postgres-store", "hash-graph-store", + "hash-graph-test-data", "hashql-compiletest", "hashql-core", "hashql-diagnostics", "hashql-hir", "hashql-mir", "insta", + "libtest-mimic", + "postgres-protocol", + "postgres-types", + "regex", + "serde", + "serde_json", + "similar-asserts", "simple-mermaid", + "testcontainers", + "testcontainers-modules", + "tokio", + "tokio-postgres", + "tokio-util", "type-system", + "url", + "uuid", ] [[package]] @@ -4022,6 +4205,15 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "hostname" version = "0.4.2" @@ -4147,6 +4339,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-named-pipe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" +dependencies = [ + "hex", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", + "winapi", +] + [[package]] name = "hyper-rustls" version = "0.27.7" @@ -4184,7 +4391,7 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-util", @@ -4201,6 +4408,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "hyperlocal" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" +dependencies = [ + "hex", + "http-body-util", + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -4653,7 +4875,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" dependencies = [ "aws-lc-rs", - "base64", + "base64 0.22.1", "getrandom 0.2.17", "js-sys", "serde", @@ -5187,7 +5409,10 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" dependencies = [ + "bitflags 2.11.0", "libc", + "plain", + "redox_syscall 0.7.3", ] [[package]] @@ -5800,6 +6025,20 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -5810,6 +6049,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.2.0" @@ -5846,12 +6094,34 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-modular" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17bb261bf36fa7d83f4c294f834e91256769097b3cb505d44831e0a179ac647f" +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -6514,11 +6784,36 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.5.18", "smallvec 1.15.1", "windows-link", ] +[[package]] +name = "parse-display" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06af5f9333eb47bd9ba8462d612e37a8328a5cb80b13f0af4de4c3b89f52dee5" +dependencies = [ + "parse-display-derive", + "regex", + "regex-syntax", +] + +[[package]] +name = "parse-display-derive" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc9252f259500ee570c75adcc4e317fa6f57a1e47747d622e0bf838002a7b790" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "regex-syntax", + "structmeta", + "syn 2.0.117", +] + [[package]] name = "paste" version = "1.0.15" @@ -6540,7 +6835,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64", + "base64 0.22.1", "serde_core", ] @@ -6757,13 +7052,19 @@ dependencies = [ "spki", ] +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "plist" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "740ebea15c5d1428f910cd1a5f52cebf8d25006245ed8ade92702f4943d91e07" dependencies = [ - "base64", + "base64 0.22.1", "indexmap 2.13.0", "quick-xml 0.38.4", "serde", @@ -6859,7 +7160,7 @@ version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" dependencies = [ - "base64", + "base64 0.22.1", "byteorder", "bytes", "fallible-iterator", @@ -7555,6 +7856,15 @@ dependencies = [ "bitflags 2.11.0", ] +[[package]] +name = "redox_syscall" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16" +dependencies = [ + "bitflags 2.11.0", +] + [[package]] name = "ref-cast" version = "1.0.25" @@ -7665,7 +7975,7 @@ version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", @@ -8285,6 +8595,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "serde_spanned" version = "1.0.4" @@ -8312,7 +8633,7 @@ version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" dependencies = [ - "base64", + "base64 0.22.1", "chrono", "hex", "indexmap 1.9.3", @@ -8893,7 +9214,7 @@ dependencies = [ "anyhow", "async-trait", "backoff", - "base64", + "base64 0.22.1", "bon", "bytes", "derive_more", @@ -8924,7 +9245,7 @@ source = "git+https://github.com/temporalio/sdk-core?rev=231e21c#231e21cadb80041 dependencies = [ "anyhow", "async-trait", - "base64", + "base64 0.22.1", "bon", "derive_more", "prost 0.14.3", @@ -8998,7 +9319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7" dependencies = [ "anyhow", - "base64", + "base64 0.22.1", "bitflags 2.11.0", "fancy-regex", "filedescriptor", @@ -9067,6 +9388,46 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "testcontainers" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c0624faaa317c56d6d19136580be889677259caf5c897941c6f446b4655068" +dependencies = [ + "astral-tokio-tar", + "async-trait", + "bollard", + "bytes", + "docker_credential", + "either", + "etcetera", + "ferroid", + "futures", + "http 1.4.0", + "itertools 0.14.0", + "log", + "memchr", + "parse-display", + "pin-project-lite", + "serde", + "serde_json", + "serde_with", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tokio-util", + "url", +] + +[[package]] +name = "testcontainers-modules" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5985fde5befe4ffa77a052e035e16c2da86e8bae301baa9f9904ad3c494d357" +dependencies = [ + "testcontainers", +] + [[package]] name = "text-size" version = "1.1.1" @@ -9390,7 +9751,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "http 1.4.0", "http-body 1.0.1", @@ -9419,7 +9780,7 @@ checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" dependencies = [ "async-trait", "axum", - "base64", + "base64 0.22.1", "bytes", "h2", "http 1.4.0", @@ -9977,7 +10338,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc" dependencies = [ - "base64", + "base64 0.22.1", "log", "percent-encoding", "rustls", @@ -9993,7 +10354,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" dependencies = [ - "base64", + "base64 0.22.1", "http 1.4.0", "httparse", "log", @@ -10981,6 +11342,16 @@ dependencies = [ "time", ] +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "xml-rs" version = "0.8.28" diff --git a/Cargo.toml b/Cargo.toml index 8150da1dc6a..cb15f937744 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -257,6 +257,8 @@ temporalio-client = { git = "https://github.com/temporalio/sdk- temporalio-common = { git = "https://github.com/temporalio/sdk-core", rev = "231e21c" } test-log = { version = "0.2.18", default-features = false } test-strategy = { version = "0.4.3", default-features = false } +testcontainers = { version = "0.27.1", default-features = false } +testcontainers-modules = { version = "0.15.0", default-features = false } text-size = { version = "1.1.1", default-features = false } thiserror = { version = "2.0.17", default-features = false } time = { version = "0.3.44", default-features = false } diff --git a/libs/@local/graph/postgres-store/src/store/postgres/query/expression/conditional.rs b/libs/@local/graph/postgres-store/src/store/postgres/query/expression/conditional.rs index 5845722d49d..27538fd0600 100644 --- a/libs/@local/graph/postgres-store/src/store/postgres/query/expression/conditional.rs +++ b/libs/@local/graph/postgres-store/src/store/postgres/query/expression/conditional.rs @@ -22,6 +22,7 @@ pub enum Function { JsonExtractAsText(Box, PathToken<'static>), JsonExtractPath(Vec), JsonContains(Box, Box), + JsonScalar(Box), JsonBuildArray(Vec), JsonBuildObject(Vec<(Expression, Expression)>), JsonPathQueryFirst(Box, Box), @@ -32,8 +33,26 @@ pub enum Function { elements: Vec, element_type: PostgresType, }, + /// Converts any SQL value to jsonb. + /// + /// Transpiles to `to_jsonb()` in PostgreSQL. Passes through jsonb + /// values unchanged; wraps text, uuid, integer, boolean, etc. as jsonb + /// scalars. + ToJson(Box), + /// Returns the first non-NULL argument. + /// + /// Transpiles to `COALESCE(expr, fallback)`. + Coalesce(Box, Box), Lower(Box), Upper(Box), + LowerInc(Box), + UpperInc(Box), + LowerInf(Box), + UpperInf(Box), + /// Extracts the epoch as milliseconds since Unix epoch from a timestamp expression. + /// + /// Transpiles to `(extract(epoch from ) * 1000)::int8` in PostgreSQL. + ExtractEpochMs(Box), Unnest(Vec), Now, } @@ -60,6 +79,11 @@ impl Transpile for Function { expression.transpile(fmt)?; fmt.write_char(')') } + Self::JsonScalar(expression) => { + fmt.write_str("json_scalar(")?; + expression.transpile(fmt)?; + fmt.write_char(')') + } Self::JsonExtractPath(paths) => { fmt.write_str("jsonb_extract_path(")?; for (i, expression) in paths.iter().enumerate() { @@ -112,6 +136,18 @@ impl Transpile for Function { fmt.write_char(')') } Self::Now => fmt.write_str("now()"), + Self::ToJson(expression) => { + fmt.write_str("to_jsonb(")?; + expression.transpile(fmt)?; + fmt.write_char(')') + } + Self::Coalesce(expression, fallback) => { + fmt.write_str("COALESCE(")?; + expression.transpile(fmt)?; + fmt.write_str(", ")?; + fallback.transpile(fmt)?; + fmt.write_char(')') + } Self::Lower(expression) => { fmt.write_str("lower(")?; expression.transpile(fmt)?; @@ -122,6 +158,31 @@ impl Transpile for Function { expression.transpile(fmt)?; fmt.write_char(')') } + Self::LowerInc(expression) => { + fmt.write_str("lower_inc(")?; + expression.transpile(fmt)?; + fmt.write_char(')') + } + Self::UpperInc(expression) => { + fmt.write_str("upper_inc(")?; + expression.transpile(fmt)?; + fmt.write_char(')') + } + Self::LowerInf(expression) => { + fmt.write_str("lower_inf(")?; + expression.transpile(fmt)?; + fmt.write_char(')') + } + Self::UpperInf(expression) => { + fmt.write_str("upper_inf(")?; + expression.transpile(fmt)?; + fmt.write_char(')') + } + Self::ExtractEpochMs(expression) => { + fmt.write_str("(extract(epoch from ")?; + expression.transpile(fmt)?; + fmt.write_str(") * 1000)::int8") + } Self::Unnest(expression) => { fmt.write_str("UNNEST(")?; @@ -209,6 +270,7 @@ pub enum PostgresType { Int, BigInt, Boolean, + TimestampTzRange, } impl Transpile for PostgresType { @@ -227,6 +289,7 @@ impl Transpile for PostgresType { Self::Int => fmt.write_str("int"), Self::BigInt => fmt.write_str("bigint"), Self::Boolean => fmt.write_str("boolean"), + Self::TimestampTzRange => fmt.write_str("tstzrange"), } } } @@ -550,6 +613,11 @@ impl Expression { Self::Grouped(Box::new(self)) } + #[must_use] + pub fn coalesce(self, fallback: Self) -> Self { + Self::Function(Function::Coalesce(Box::new(self), Box::new(fallback))) + } + #[must_use] pub fn starts_with(lhs: Self, rhs: Self) -> Self { Self::StartsWith(Box::new(lhs), Box::new(rhs)) @@ -569,6 +637,11 @@ impl Expression { pub fn cast(self, r#type: PostgresType) -> Self { Self::Cast(Box::new(self), r#type) } + + #[must_use] + pub fn json_scalar(self) -> Self { + Self::Function(Function::JsonScalar(Box::new(self))) + } } impl Transpile for Expression { diff --git a/libs/@local/hashql/compiletest/Cargo.toml b/libs/@local/hashql/compiletest/Cargo.toml index d44b37a3d41..65b99360e5e 100644 --- a/libs/@local/hashql/compiletest/Cargo.toml +++ b/libs/@local/hashql/compiletest/Cargo.toml @@ -8,18 +8,18 @@ version.workspace = true [dependencies] # Public workspace dependencies +hashql-ast = { workspace = true, public = true } +hashql-core = { workspace = true, public = true } +hashql-diagnostics = { workspace = true, features = ["render"], public = true } +hashql-eval = { workspace = true, features = ["graph"], public = true } +hashql-hir = { workspace = true, public = true } +hashql-mir = { workspace = true, public = true } +hashql-syntax-jexpr = { workspace = true, public = true } # Public third-party dependencies # Private workspace dependencies -error-stack = { workspace = true } -hashql-ast = { workspace = true } -hashql-core = { workspace = true } -hashql-diagnostics = { workspace = true, features = ["render"] } -hashql-eval = { workspace = true, features = ["graph"] } -hashql-hir = { workspace = true } -hashql-mir = { workspace = true } -hashql-syntax-jexpr = { workspace = true } +error-stack = { workspace = true } # Private third-party dependencies ansi-to-tui = { workspace = true } diff --git a/libs/@local/hashql/compiletest/src/lib.rs b/libs/@local/hashql/compiletest/src/lib.rs index 6258e2ec2f7..51e9b74cf78 100644 --- a/libs/@local/hashql/compiletest/src/lib.rs +++ b/libs/@local/hashql/compiletest/src/lib.rs @@ -16,6 +16,7 @@ string_from_utf8_lossy_owned, try_trait_v2, vec_from_fn, + macro_metavar_expr )] extern crate alloc; @@ -32,6 +33,7 @@ use self::{ mod annotation; mod harness; +pub mod pipeline; pub mod runner; mod suite; mod ui; diff --git a/libs/@local/hashql/compiletest/src/pipeline.rs b/libs/@local/hashql/compiletest/src/pipeline.rs new file mode 100644 index 00000000000..9b78b3d465a --- /dev/null +++ b/libs/@local/hashql/compiletest/src/pipeline.rs @@ -0,0 +1,277 @@ +//! Staged compilation pipeline from J-Expr source to prepared SQL queries. +//! +//! [`Pipeline`] drives the full HashQL compilation sequence: parsing J-Expr +//! source into an AST, lowering through HIR and MIR, running optimization and +//! execution analysis passes, and finally compiling to [`PreparedQueries`] +//! ready for PostgreSQL execution. +//! +//! Each stage is exposed as a separate method so callers can inspect or test +//! intermediate results. Diagnostics (warnings, advisories) accumulate in +//! [`Pipeline::diagnostics`] across all stages; fatal errors are returned +//! immediately as [`BoxedDiagnostic`]. +//! +//! Intended for use by the compiletest harness and integration test binaries +//! that need the full compilation pipeline without assembling it from +//! individual crate APIs. + +use hashql_core::{ + heap::{Heap, ResetAllocator as _, Scratch}, + module::ModuleRegistry, + span::{SpanId, SpanTable}, + r#type::environment::Environment, +}; +use hashql_diagnostics::{ + Diagnostic, DiagnosticCategory, Failure, Status, Success, diagnostic::BoxedDiagnostic, + issues::BoxedDiagnosticIssues, source::SourceId, +}; +use hashql_hir::context::HirContext; +use hashql_mir::{ + body::Body, + context::MirContext, + def::{DefId, DefIdSlice, DefIdVec}, + pass::{ + Changed, GlobalAnalysisPass as _, GlobalTransformPass as _, GlobalTransformState, + analysis::SizeEstimationAnalysis, + execution::{ExecutionAnalysis, ExecutionAnalysisResidual}, + transform::{Inline, InlineConfig, PostInline, PreInline}, + }, + reify::ReifyContext, +}; +use hashql_syntax_jexpr::span::Span; + +/// Unwraps a [`Status`] into its success value, draining advisories and +/// secondary diagnostics into the shared accumulator. +/// +/// On failure, secondary diagnostics are drained and the primary diagnostic +/// is returned as the error. +fn process_status( + diagnostics: &mut BoxedDiagnosticIssues<'static, SpanId>, + status: Status, +) -> Result> +where + C: DiagnosticCategory + 'static, +{ + match status { + Ok(Success { value, advisories }) => { + diagnostics.extend( + advisories + .into_iter() + .map(|advisory| advisory.generalize().boxed()), + ); + + Ok(value) + } + Err(Failure { primary, secondary }) => { + diagnostics.extend(secondary.into_iter().map(Diagnostic::boxed)); + + Err(primary.generalize().boxed()) + } + } +} + +macro_rules! bind_tri { + ($diagnostics:expr) => { + macro_rules! tri { + ($$status:expr) => { + process_status($diagnostics, $$status)? + }; + } + }; +} + +/// Staged compilation driver from J-Expr source to prepared SQL queries. +/// +/// Owns the shared compilation state (heap reference, type environment, span +/// table, scratch allocator) and accumulates non-fatal diagnostics across +/// stages. Call the methods in order: +/// +/// 1. [`parse`](Self::parse): J-Expr bytes to AST +/// 2. [`lower`](Self::lower): AST through HIR to MIR bodies +/// 3. [`transform`](Self::transform): MIR optimization passes (inlining) +/// 4. [`prepare`](Self::prepare): execution analysis +/// +/// After each stage, check [`diagnostics`](Self::diagnostics) for warnings. +/// Fatal errors short-circuit via the `Result` return. +pub struct Pipeline<'heap> { + pub heap: &'heap Heap, + pub scratch: Scratch, + pub env: Environment<'heap>, + pub spans: SpanTable, + pub diagnostics: BoxedDiagnosticIssues<'static, SpanId>, +} + +impl<'heap> Pipeline<'heap> { + /// Creates a new pipeline bound to `heap`. + /// + /// Initializes the type environment, span table, scratch allocator, and + /// an empty diagnostic accumulator. + pub fn new(heap: &'heap Heap) -> Self { + Self { + heap, + env: Environment::new(heap), + spans: SpanTable::new(SourceId::new_unchecked(0x00)), + diagnostics: BoxedDiagnosticIssues::default(), + scratch: Scratch::new(), + } + } + + /// Parses J-Expr source bytes into an AST expression. + /// + /// # Errors + /// + /// Returns a diagnostic if the input is not valid J-Expr syntax. + pub fn parse( + &mut self, + content: impl AsRef<[u8]>, + ) -> Result, BoxedDiagnostic<'static, SpanId>> { + let mut parser = hashql_syntax_jexpr::Parser::new(self.heap, &mut self.spans); + + parser + .parse_expr(content.as_ref()) + .map_err(Diagnostic::boxed) + } + + /// Lowers an AST expression through HIR into MIR. + /// + /// Performs AST type lowering, HIR node construction, HIR specialization + /// and lowering, then reifies the result into MIR bodies. Returns the + /// MIR interner, the entry definition, and the complete set of bodies. + /// + /// # Errors + /// + /// Returns a diagnostic if any lowering stage fails (type resolution, + /// HIR construction, specialization, or MIR reification). + pub fn lower( + &mut self, + mut expr: hashql_ast::node::expr::Expr<'heap>, + ) -> Result< + ( + hashql_mir::intern::Interner<'heap>, + DefId, + DefIdVec>, + ), + BoxedDiagnostic<'static, SpanId>, + > { + bind_tri!(&mut self.diagnostics); + let registry = ModuleRegistry::new(&self.env); + + let types = tri!(hashql_ast::lowering::lower( + self.heap.intern_symbol("::main"), + &mut expr, + &self.env, + ®istry, + )); + + let hir_interner = hashql_hir::intern::Interner::new(self.heap); + let mut hir_context = HirContext::new(&hir_interner, ®istry); + + let node = tri!(hashql_hir::node::NodeData::from_ast( + expr, + &mut hir_context, + &types + )); + + let node = tri!(hashql_hir::lower::lower( + node, + &types, + &mut self.env, + &mut hir_context, + )); + + let mut bodies = DefIdVec::new(); + + let mir_interner = hashql_mir::intern::Interner::new(self.heap); + let mut mir_context = MirContext::new(&self.env, &mir_interner); + let mut reify_context = ReifyContext { + bodies: &mut bodies, + mir: &mut mir_context, + hir: &hir_context, + }; + + let entry = tri!(hashql_mir::reify::from_hir(node, &mut reify_context)); + + // drain the context, because we're going to re-create it + self.diagnostics.extend( + mir_context + .diagnostics + .into_iter() + .map(hashql_diagnostics::Diagnostic::boxed), + ); + + Ok((mir_interner, entry, bodies)) + } + + /// Runs MIR optimization passes on the body set. + /// + /// Applies pre-inline cleanup, function inlining, and post-inline + /// simplification in sequence. Bodies are modified in place. + /// + /// # Errors + /// + /// Returns a diagnostic if any transform pass emits a fatal error. + pub fn transform( + &mut self, + interner: &hashql_mir::intern::Interner<'heap>, + bodies: &mut DefIdSlice>, + ) -> Result<(), BoxedDiagnostic<'static, SpanId>> { + let mut context = MirContext::new(&self.env, interner); + let mut state = GlobalTransformState::new_in(&*bodies, self.heap); + + self.scratch.reset(); + + let mut pass = PreInline::new_in(&mut self.scratch); + let _: Changed = pass.run(&mut context, &mut state, bodies); + self.scratch.reset(); + + let mut pass = Inline::new_in(InlineConfig::default(), &mut self.scratch); + let _: Changed = pass.run(&mut context, &mut state, bodies); + self.scratch.reset(); + + let mut pass = PostInline::new_in(&mut self.scratch); + let _: Changed = pass.run(&mut context, &mut state, bodies); + self.scratch.reset(); + + let status = context.diagnostics.generalize().boxed().into_status(()); + process_status(&mut self.diagnostics, status)?; + + Ok(()) + } + + /// Runs execution analysis and compiles MIR bodies to prepared SQL queries. + /// + /// Performs size estimation, execution island analysis (determining which + /// parts of each body run on PostgreSQL vs the interpreter), then compiles + /// the PostgreSQL islands into [`PreparedQueries`] containing the SQL + /// statements, parameter bindings, and column descriptors. + /// + /// # Errors + /// + /// Returns a diagnostic if execution analysis or SQL compilation fails. + pub fn prepare<'bodies>( + &mut self, + interner: &hashql_mir::intern::Interner<'heap>, + bodies: &'bodies mut DefIdSlice>, + ) -> Result< + DefIdVec>, &'heap Heap>, + BoxedDiagnostic<'static, SpanId>, + > { + let mut context = MirContext::new(&self.env, interner); + + let mut pass = SizeEstimationAnalysis::new_in(&self.scratch); + pass.run(&mut context, bodies); + let footprints = pass.finish(); + self.scratch.reset(); + + let pass = ExecutionAnalysis { + footprints: &footprints, + scratch: &mut self.scratch, + }; + let analysis = pass.run_all_in(&mut context, bodies, self.heap); + self.scratch.reset(); + + let status = context.diagnostics.generalize().boxed().into_status(()); + process_status(&mut self.diagnostics, status)?; + + Ok(analysis) + } +} diff --git a/libs/@local/hashql/compiletest/src/suite/eval_postgres.rs b/libs/@local/hashql/compiletest/src/suite/eval_postgres.rs index 496d237f8a0..721796be1e9 100644 --- a/libs/@local/hashql/compiletest/src/suite/eval_postgres.rs +++ b/libs/@local/hashql/compiletest/src/suite/eval_postgres.rs @@ -117,8 +117,14 @@ impl Suite for EvalPostgres { let mir_buf = format_mir_with_placement(heap, &environment, &bodies, &analysis); secondary_outputs.insert("mir", mir_buf); - let mut context = - EvalContext::new_in(&environment, &bodies, &analysis, context.heap, &mut scratch); + let mut context = EvalContext::new_in( + &environment, + &interner, + &bodies, + &analysis, + context.heap, + &mut scratch, + ); scratch.reset(); // Inside of **all** the bodies, find the `GraphRead` terminators to compile. @@ -128,7 +134,7 @@ impl Suite for EvalPostgres { for body in &bodies { for block in &*body.basic_blocks { if let TerminatorKind::GraphRead(read) = &block.terminator.kind { - let prepared_query = compiler.compile(read); + let prepared_query = compiler.compile_graph_read(read); prepared_queries.push(prepared_query); } } diff --git a/libs/@local/hashql/core/src/heap/allocator.rs b/libs/@local/hashql/core/src/heap/allocator.rs index c453ef7a189..6d750495a26 100644 --- a/libs/@local/hashql/core/src/heap/allocator.rs +++ b/libs/@local/hashql/core/src/heap/allocator.rs @@ -6,7 +6,11 @@ use bump_scope::{Bump, BumpBox, BumpScope}; use super::{BumpAllocator, bump::ResetAllocator}; -pub struct Checkpoint(bump_scope::Checkpoint); +#[expect( + clippy::field_scoped_visibility_modifiers, + reason = "constructed and destructured by sibling allocator types" +)] +pub struct Checkpoint(pub(super) bump_scope::Checkpoint); /// Internal arena allocator. #[derive(Debug)] @@ -155,7 +159,11 @@ unsafe impl alloc::Allocator for Allocator { } } -pub struct AllocatorScope<'scope>(BumpScope<'scope>); +#[expect( + clippy::field_scoped_visibility_modifiers, + reason = "constructed by sibling allocator types in scoped callbacks" +)] +pub struct AllocatorScope<'scope>(pub(super) BumpScope<'scope>); impl BumpAllocator for AllocatorScope<'_> { type Checkpoint = Checkpoint; diff --git a/libs/@local/hashql/core/src/heap/convert.rs b/libs/@local/hashql/core/src/heap/convert.rs index 398d95fc09e..9c930292a97 100644 --- a/libs/@local/hashql/core/src/heap/convert.rs +++ b/libs/@local/hashql/core/src/heap/convert.rs @@ -1,5 +1,6 @@ //! Allocator-aware conversion traits. +use alloc::rc::Rc; use core::alloc::Allocator; use super::CollectIn as _; @@ -58,6 +59,26 @@ impl FromIn for Box { } } +impl FromIn<&str, A> for Rc { + #[inline] + fn from_in(value: &str, allocator: A) -> Self { + // This is very much the same as Rc::from(), but without the specialization + let mut slice = Rc::new_uninit_slice_in(value.len(), allocator); + + // SAFETY: We have just created the slice, so we're guaranteed to have exclusive access. + let slice_ref = unsafe { Rc::get_mut_unchecked(&mut slice) }; + slice_ref.write_copy_of_slice(value.as_bytes()); + + // SAFETY: We have just written to the slice, so we're guaranteed to have initialized it. + let slice = unsafe { slice.assume_init() }; + + let (ptr, alloc) = Rc::into_raw_with_allocator(slice); + + // SAFETY: str has the same layout as `[u8]`, so this is safe. + unsafe { Self::from_raw_in(ptr as *const str, alloc) } + } +} + #[cfg(test)] mod tests { use super::*; @@ -109,4 +130,45 @@ mod tests { let boxed: Box = String::from("foo").into_in(&heap); assert_eq!(&*boxed, "foo"); } + + #[test] + fn str_into_rc_str() { + let heap = Heap::new(); + + let rc: Rc = "hello".into_in(&heap); + assert_eq!(&*rc, "hello"); + + let rc: Rc = Rc::from_in("world", &heap); + assert_eq!(&*rc, "world"); + } + + #[expect(clippy::non_ascii_literal)] + #[test] + fn str_into_rc_str_unicode() { + let heap = Heap::new(); + + let rc: Rc = "日本語 🎉".into_in(&heap); + assert_eq!(&*rc, "日本語 🎉"); + } + + #[test] + fn str_into_rc_str_empty() { + let heap = Heap::new(); + + let rc: Rc = "".into_in(&heap); + assert_eq!(&*rc, ""); + assert_eq!(rc.len(), 0); + } + + #[test] + fn rc_str_clone_shares_data() { + let heap = Heap::new(); + + let rc1: Rc = "shared".into_in(&heap); + let rc2 = Rc::clone(&rc1); + + assert_eq!(&*rc1, "shared"); + assert_eq!(&*rc2, "shared"); + assert!(Rc::ptr_eq(&rc1, &rc2)); + } } diff --git a/libs/@local/hashql/core/src/heap/mod.rs b/libs/@local/hashql/core/src/heap/mod.rs index 6049dfa8c6a..68360ad9803 100644 --- a/libs/@local/hashql/core/src/heap/mod.rs +++ b/libs/@local/hashql/core/src/heap/mod.rs @@ -97,6 +97,7 @@ mod bump; mod clone; mod convert; mod iter; +mod pool; mod scratch; mod transfer; @@ -110,6 +111,7 @@ pub use self::{ clone::{CloneIn, TryCloneIn}, convert::{FromIn, IntoIn}, iter::{CollectIn, FromIteratorIn}, + pool::{ScratchPool, ScratchPoolGuard}, scratch::Scratch, transfer::TransferInto, }; diff --git a/libs/@local/hashql/core/src/heap/pool.rs b/libs/@local/hashql/core/src/heap/pool.rs new file mode 100644 index 00000000000..7a7b7c64a6f --- /dev/null +++ b/libs/@local/hashql/core/src/heap/pool.rs @@ -0,0 +1,201 @@ +//! Pool of scratch allocators for parallel bump allocation. +//! +//! [`ScratchPool`] enables bump allocation across multiple threads. Each thread +//! borrows its own [`ScratchPoolGuard`] via [`get`](ScratchPool::get), which provides +//! an independent bump allocator. +//! +//! # Usage +//! +//! ``` +//! # #![feature(allocator_api)] +//! use hashql_core::heap::ScratchPool; +//! +//! let pool = ScratchPool::new(); +//! +//! let guard = pool.get(); +//! let mut vec: Vec = Vec::new_in(&guard); +//! vec.push(42); +//! ``` + +use core::{alloc, mem, ptr}; + +use bump_scope::{BumpBox, BumpPool, BumpPoolGuard}; + +use super::{AllocatorScope, BumpAllocator, allocator::Checkpoint}; + +/// A pool of scratch allocators for parallel bump allocation. +/// +/// Unlike [`Scratch`](super::Scratch) which is `!Sync`, `ScratchPool` can be shared +/// across threads. Each thread obtains its own [`ScratchPoolGuard`] via [`get`](Self::get), +/// which provides an independent bump allocator. +/// +/// # Example +/// +/// ``` +/// # #![feature(allocator_api)] +/// use hashql_core::heap::ScratchPool; +/// +/// let pool = ScratchPool::new(); +/// let guard = pool.get(); +/// +/// let mut vec: Vec = Vec::new_in(&guard); +/// vec.push(1); +/// vec.push(2); +/// ``` +pub struct ScratchPool(BumpPool); + +impl ScratchPool { + /// Creates a new empty scratch pool. + #[must_use] + #[inline] + pub fn new() -> Self { + Self(BumpPool::new()) + } + + /// Borrows an allocator from the pool. + /// + /// Each call may reuse a previously returned allocator or create a new one. + #[inline] + pub fn get(&self) -> ScratchPoolGuard<'_> { + ScratchPoolGuard(self.0.get()) + } + + /// Resets all allocators in the pool, freeing all allocations at once. + /// + /// The pool retains its current capacity. + /// + /// # Panics + /// + /// All [`ScratchPoolGuard`]s must have been dropped before calling this method. + #[inline] + pub fn reset(&mut self) { + self.0.reset(); + } +} + +impl Default for ScratchPool { + fn default() -> Self { + Self::new() + } +} + +/// A borrowed allocator from a [`ScratchPool`]. +/// +/// Implements [`BumpAllocator`] and [`Allocator`](alloc::Allocator), so it can be +/// used anywhere a bump allocator is expected. +pub struct ScratchPoolGuard<'pool>(BumpPoolGuard<'pool>); + +impl BumpAllocator for ScratchPoolGuard<'_> { + type Checkpoint = Checkpoint; + type Scoped<'scope> = AllocatorScope<'scope>; + + #[inline] + fn scoped(&mut self, func: impl FnOnce(Self::Scoped<'_>) -> T) -> T { + self.0.scoped(|scope| func(AllocatorScope(scope))) + } + + #[inline] + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.0.checkpoint()) + } + + #[inline] + unsafe fn rollback(&self, checkpoint: Self::Checkpoint) { + // SAFETY: The same safety preconditions apply. + unsafe { self.0.reset_to(checkpoint.0) } + } + + #[inline] + fn try_allocate_slice_copy(&self, slice: &[T]) -> Result<&mut [T], alloc::AllocError> { + self.0 + .try_alloc_slice_copy(slice) + .map(BumpBox::leak) + .map_err(|_err| alloc::AllocError) + } + + #[inline] + fn try_allocate_slice_uninit( + &self, + len: usize, + ) -> Result<&mut [mem::MaybeUninit], alloc::AllocError> { + const { + assert!( + !core::mem::needs_drop::(), + "Cannot allocate a type that needs drop" + ); + }; + + self.0 + .try_alloc_uninit_slice(len) + .map(BumpBox::leak) + .map_err(|_err| alloc::AllocError) + } +} + +// SAFETY: Delegates to bump_scope via the internal BumpPoolGuard. +#[expect(unsafe_code, reason = "proxy to bump")] +unsafe impl alloc::Allocator for ScratchPoolGuard<'_> { + #[inline] + fn allocate(&self, layout: alloc::Layout) -> Result, alloc::AllocError> { + bump_scope::alloc::Allocator::allocate(&*self.0, layout).map_err(|_err| alloc::AllocError) + } + + #[inline] + fn allocate_zeroed( + &self, + layout: alloc::Layout, + ) -> Result, alloc::AllocError> { + bump_scope::alloc::Allocator::allocate_zeroed(&*self.0, layout) + .map_err(|_err| alloc::AllocError) + } + + #[inline] + unsafe fn deallocate(&self, ptr: ptr::NonNull, layout: alloc::Layout) { + // SAFETY: Caller upholds Allocator contract. + unsafe { + bump_scope::alloc::Allocator::deallocate(&*self.0, ptr, layout); + } + } + + #[inline] + unsafe fn grow( + &self, + ptr: ptr::NonNull, + old_layout: alloc::Layout, + new_layout: alloc::Layout, + ) -> Result, alloc::AllocError> { + // SAFETY: Caller upholds Allocator contract. + unsafe { + bump_scope::alloc::Allocator::grow(&*self.0, ptr, old_layout, new_layout) + .map_err(|_err| alloc::AllocError) + } + } + + #[inline] + unsafe fn grow_zeroed( + &self, + ptr: ptr::NonNull, + old_layout: alloc::Layout, + new_layout: alloc::Layout, + ) -> Result, alloc::AllocError> { + // SAFETY: Caller upholds Allocator contract. + unsafe { + bump_scope::alloc::Allocator::grow_zeroed(&*self.0, ptr, old_layout, new_layout) + .map_err(|_err| alloc::AllocError) + } + } + + #[inline] + unsafe fn shrink( + &self, + ptr: ptr::NonNull, + old_layout: alloc::Layout, + new_layout: alloc::Layout, + ) -> Result, alloc::AllocError> { + // SAFETY: Caller upholds Allocator contract. + unsafe { + bump_scope::alloc::Allocator::shrink(&*self.0, ptr, old_layout, new_layout) + .map_err(|_err| alloc::AllocError) + } + } +} diff --git a/libs/@local/hashql/core/src/lib.rs b/libs/@local/hashql/core/src/lib.rs index 98a62567f1b..30bcbe06294 100644 --- a/libs/@local/hashql/core/src/lib.rs +++ b/libs/@local/hashql/core/src/lib.rs @@ -33,9 +33,10 @@ slice_partition_dedup, slice_swap_unchecked, step_trait, - str_from_raw_parts, try_trait_v2, variant_count, + get_mut_unchecked, + extern_types )] extern crate alloc; diff --git a/libs/@local/hashql/core/src/module/mod.rs b/libs/@local/hashql/core/src/module/mod.rs index f97b1617874..d0b7e253255 100644 --- a/libs/@local/hashql/core/src/module/mod.rs +++ b/libs/@local/hashql/core/src/module/mod.rs @@ -9,7 +9,7 @@ pub mod item; pub mod locals; pub mod namespace; mod resolver; -mod std_lib; +pub mod std_lib; pub mod universe; use core::slice; diff --git a/libs/@local/hashql/core/src/module/std_lib/core/mod.rs b/libs/@local/hashql/core/src/module/std_lib/core/mod.rs index bdf0ce5fc2e..fae43081c6b 100644 --- a/libs/@local/hashql/core/src/module/std_lib/core/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/core/mod.rs @@ -12,12 +12,12 @@ pub(in crate::module::std_lib) mod bool; pub(in crate::module::std_lib) mod cmp; pub(in crate::module::std_lib) mod json; pub(in crate::module::std_lib) mod math; -pub(in crate::module::std_lib) mod option; +pub mod option; pub(in crate::module::std_lib) mod result; -pub(in crate::module::std_lib) mod url; -pub(in crate::module::std_lib) mod uuid; +pub mod url; +pub mod uuid; -pub(crate) fn func<'heap>( +pub(in crate::module::std_lib) fn func<'heap>( lib: &StandardLibrary<'_, 'heap>, def: &mut ModuleDef<'heap>, diff --git a/libs/@local/hashql/core/src/module/std_lib/core/option.rs b/libs/@local/hashql/core/src/module/std_lib/core/option.rs index 60a8ca036f1..7bf46204fa5 100644 --- a/libs/@local/hashql/core/src/module/std_lib/core/option.rs +++ b/libs/@local/hashql/core/src/module/std_lib/core/option.rs @@ -1,19 +1,23 @@ use crate::{ heap::Heap, module::std_lib::{ItemDef, ModuleDef, StandardLibrary, StandardLibraryModule}, - symbol::Symbol, - r#type::TypeId, + symbol::{Symbol, sym}, }; -const NONE_ABSOLUTE_PATH: &str = "::core::option::None"; -const SOME_ABSOLUTE_PATH: &str = "::core::option::Some"; +pub mod types { + use crate::{ + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; -// create a concrete monomorphized instance of `Option` -pub(in crate::module::std_lib) fn option(lib: &StandardLibrary<'_, '_>, value: TypeId) -> TypeId { - let none = lib.ty.opaque(NONE_ABSOLUTE_PATH, lib.ty.null()); - let some = lib.ty.opaque(SOME_ABSOLUTE_PATH, value); + // create a concrete monomorphized instance of `Option` + #[must_use] + pub fn option(ty: &TypeBuilder<'_, '_>, value: TypeId) -> TypeId { + let none = ty.opaque(sym::path::None, ty.null()); + let some = ty.opaque(sym::path::Some, value); - lib.ty.union([none, some]) + ty.union([none, some]) + } } pub(in crate::module::std_lib) struct Option { @@ -32,30 +36,24 @@ impl<'heap> StandardLibraryModule<'heap> for Option { // Option is simply a union between two opaque types, when the constructor only takes a // `Null` the constructor automatically allows for no-value. - let t_arg = lib.ty.fresh_argument("T"); + let t_arg = lib.ty.fresh_argument(sym::T); let t_ref = lib.ty.hydrate_argument(t_arg); let t_param = lib.ty.param(t_arg); // newtype None = Null; - let none_ty = lib.ty.opaque(NONE_ABSOLUTE_PATH, lib.ty.null()); - def.push( - lib.heap.intern_symbol("None"), - ItemDef::newtype(lib.ty.env, none_ty, &[]), - ); + let none_ty = lib.ty.opaque(sym::path::None, lib.ty.null()); + def.push(sym::None, ItemDef::newtype(lib.ty.env, none_ty, &[])); // newtype Some = T; let some_ty = lib .ty - .generic([(t_arg, None)], lib.ty.opaque(SOME_ABSOLUTE_PATH, t_param)); - def.push( - lib.heap.intern_symbol("Some"), - ItemDef::newtype(lib.ty.env, some_ty, &[t_ref]), - ); + .generic([(t_arg, None)], lib.ty.opaque(sym::path::Some, t_param)); + def.push(sym::Some, ItemDef::newtype(lib.ty.env, some_ty, &[t_ref])); // type Option = Some | None; let option_ty = lib.ty.union([some_ty, none_ty]); def.push( - lib.heap.intern_symbol("Option"), + sym::Option, ItemDef::r#type(lib.ty.env, option_ty, &[t_ref]), ); diff --git a/libs/@local/hashql/core/src/module/std_lib/core/url.rs b/libs/@local/hashql/core/src/module/std_lib/core/url.rs index 2dbbef43520..0335472f623 100644 --- a/libs/@local/hashql/core/src/module/std_lib/core/url.rs +++ b/libs/@local/hashql/core/src/module/std_lib/core/url.rs @@ -1,9 +1,20 @@ use crate::{ heap::Heap, module::std_lib::{ItemDef, ModuleDef, StandardLibrary, StandardLibraryModule}, - symbol::Symbol, + symbol::{Symbol, sym}, }; +pub(crate) mod types { + use crate::{ + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + pub(crate) fn url(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::Url, ty.string()) + } +} + pub(in crate::module::std_lib) struct Url { _dependencies: (), } @@ -17,13 +28,11 @@ impl<'heap> StandardLibraryModule<'heap> for Url { fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; // TODO: consider making this constructor private via intrinsic (requires VM) // newtype Url = String; - let url_ty = lib.ty.opaque("::core::url::Url", lib.ty.string()); - let url = ItemDef::newtype(lib.ty.env, url_ty, &[]); - def.push(heap.intern_symbol("Url"), url); + let url_ty = types::url(&lib.ty); + def.push(sym::Url, ItemDef::newtype(lib.ty.env, url_ty, &[])); def } diff --git a/libs/@local/hashql/core/src/module/std_lib/core/uuid.rs b/libs/@local/hashql/core/src/module/std_lib/core/uuid.rs index edc2950e56a..a679193e885 100644 --- a/libs/@local/hashql/core/src/module/std_lib/core/uuid.rs +++ b/libs/@local/hashql/core/src/module/std_lib/core/uuid.rs @@ -1,9 +1,20 @@ use crate::{ heap::Heap, module::std_lib::{ItemDef, ModuleDef, StandardLibrary, StandardLibraryModule}, - symbol::Symbol, + symbol::{Symbol, sym}, }; +pub(crate) mod types { + use crate::{ + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + pub(crate) fn uuid(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::Uuid, ty.string()) + } +} + pub(in crate::module::std_lib) struct Uuid { _dependencies: (), } @@ -11,19 +22,17 @@ pub(in crate::module::std_lib) struct Uuid { impl<'heap> StandardLibraryModule<'heap> for Uuid { type Children = (); - fn name(heap: &'heap Heap) -> Symbol<'heap> { - heap.intern_symbol("uuid") + fn name(_: &'heap Heap) -> Symbol<'heap> { + sym::uuid } fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; // TODO: consider making this constructor private via intrinsic (requires VM) // newtype Uuid = String; - let uuid_ty = lib.ty.opaque("::core::uuid::Uuid", lib.ty.string()); - let uuid = ItemDef::newtype(lib.ty.env, uuid_ty, &[]); - def.push(heap.intern_symbol("Uuid"), uuid); + let uuid = ItemDef::newtype(lib.ty.env, types::uuid(&lib.ty), &[]); + def.push(sym::Uuid, uuid); def } diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/entity.rs b/libs/@local/hashql/core/src/module/std_lib/graph/entity.rs index a238122ad05..8106254199a 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/entity.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/entity.rs @@ -5,7 +5,7 @@ use crate::{ locals::TypeDef, std_lib::{ self, ModuleDef, StandardLibraryModule, - core::{func, option::option}, + core::{func, option::types::option}, decl, }, }, @@ -59,7 +59,7 @@ impl<'heap> StandardLibraryModule<'heap> for Entity { let decl = decl!(lib; (entity: lib.ty.apply([(entity_ty.arguments[0].id, T)], entity_ty.id), path: json_path_ty.id - ) -> option(lib, lib.ty.unknown()) + ) -> option(&lib.ty, lib.ty.unknown()) ); func(lib, &mut def, "::graph::entity::property", &[], decl); diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/head.rs b/libs/@local/hashql/core/src/module/std_lib/graph/head.rs index 87dc1c3a851..a157436cf54 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/head.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/head.rs @@ -5,12 +5,13 @@ use crate::{ locals::TypeDef, std_lib::{self, ModuleDef, StandardLibraryModule, core::func, decl}, }, - symbol::Symbol, + symbol::{Symbol, sym}, }; pub(in crate::module::std_lib) struct Head { _dependencies: ( std_lib::graph::Graph, + std_lib::graph::temporal::Temporal, std_lib::graph::types::knowledge::entity::Entity, ), } @@ -26,10 +27,13 @@ impl<'heap> StandardLibraryModule<'heap> for Head { let mut def = ModuleDef::new(); let heap = lib.heap; - let graph = lib.manifest::(); + let query_temporal_axes_ty = lib + .manifest::() + .expect_type(sym::QueryTemporalAxes); - let query_temporal_axes_ty = graph.expect_type(heap.intern_symbol("QueryTemporalAxes")); - let mut graph_ty = graph.expect_type(heap.intern_symbol("Graph")); + let mut graph_ty = lib + .manifest::() + .expect_type(heap.intern_symbol("Graph")); graph_ty.instantiate(&mut lib.instantiate); let mut entity = lib diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/mod.rs b/libs/@local/hashql/core/src/module/std_lib/graph/mod.rs index ecb68b6cb2b..7d6a3380645 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/mod.rs @@ -2,8 +2,9 @@ pub(in crate::module::std_lib) mod body; pub(in crate::module::std_lib) mod entity; pub(in crate::module::std_lib) mod head; pub(in crate::module::std_lib) mod tail; +pub mod temporal; pub(in crate::module::std_lib) mod tmp; -pub(in crate::module::std_lib) mod types; +pub mod types; use crate::{ heap::Heap, @@ -12,7 +13,6 @@ use crate::{ std_lib::{ItemDef, ModuleDef, StandardLibraryModule}, }, symbol::Symbol, - r#type::{TypeId, kind::generic::GenericArgumentId}, }; pub(in crate::module::std_lib) struct Graph { @@ -21,6 +21,7 @@ pub(in crate::module::std_lib) struct Graph { impl<'heap> StandardLibraryModule<'heap> for Graph { type Children = ( + self::temporal::Temporal, self::head::Head, self::body::Body, self::tail::Tail, @@ -58,25 +59,6 @@ impl<'heap> StandardLibraryModule<'heap> for Graph { ItemDef::r#type(lib.ty.env, graph_ty, &[t_ref]), ); - // newtype TimeAxis = (:) - // - // Currently implemented as an empty opaque type. This will be enhanced to support - // user construction in the future. - // see: https://linear.app/hash/issue/H-4736/hashql-make-time-axis-constructible - let time_axis_ty = lib.ty.generic( - [] as [GenericArgumentId; 0], - lib.ty.opaque( - "::graph::TimeAxis", - lib.ty.r#struct([] as [(&str, TypeId); 0]), - ), - ); - def.push( - lib.heap.intern_symbol("QueryTemporalAxes"), - // Export as `type` rather than `newtype` since TimeAxis is currently not - // user-constructible - ItemDef::r#type(lib.ty.env, time_axis_ty, &[]), - ); - def } } diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/temporal.rs b/libs/@local/hashql/core/src/module/std_lib/graph/temporal.rs new file mode 100644 index 00000000000..26729210137 --- /dev/null +++ b/libs/@local/hashql/core/src/module/std_lib/graph/temporal.rs @@ -0,0 +1,285 @@ +use crate::{ + heap::Heap, + module::std_lib::{ItemDef, ModuleDef, StandardLibrary, StandardLibraryModule}, + symbol::{Symbol, sym}, +}; + +pub mod types { + use crate::{ + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + #[must_use] + pub fn timestamp(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::Timestamp, ty.integer()) + } + + #[must_use] + pub fn unbounded_temporal_bound(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::UnboundedTemporalBound, ty.null()) + } + + #[must_use] + pub fn inclusive_temporal_bound(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::InclusiveTemporalBound, self::timestamp(ty)) + } + + #[must_use] + pub fn exclusive_temporal_bound(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::ExclusiveTemporalBound, self::timestamp(ty)) + } + + #[must_use] + pub fn temporal_bound(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.union([ + self::unbounded_temporal_bound(ty), + self::inclusive_temporal_bound(ty), + self::exclusive_temporal_bound(ty), + ]) + } + + #[must_use] + pub fn finite_temporal_bound(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.union([ + self::inclusive_temporal_bound(ty), + self::exclusive_temporal_bound(ty), + ]) + } + + // newtype DecisionTime = T + #[must_use] + pub fn decision_time(ty: &TypeBuilder<'_, '_>, inner: TypeId) -> TypeId { + ty.opaque(sym::path::DecisionTime, inner) + } + + // newtype TransactionTime = T + #[must_use] + pub fn transaction_time(ty: &TypeBuilder<'_, '_>, inner: TypeId) -> TypeId { + ty.opaque(sym::path::TransactionTime, inner) + } + + /// `newtype Interval = (start: S, end: E)`. + /// + /// Generic over the start and end bound types. Callers pass concrete types + /// to monomorphize (e.g. `InclusiveTemporalBound` for start, + /// `OpenTemporalBound` for end). + #[must_use] + pub fn interval(ty: &TypeBuilder<'_, '_>, start: TypeId, end: TypeId) -> TypeId { + ty.opaque( + sym::path::Interval, + ty.r#struct([(sym::start, start), (sym::end, end)]), + ) + } + + /// `type OpenTemporalBound = ExclusiveTemporalBound | UnboundedTemporalBound`. + #[must_use] + pub fn open_temporal_bound(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.union([ + self::exclusive_temporal_bound(ty), + self::unbounded_temporal_bound(ty), + ]) + } + + /// `type LeftClosedTemporalInterval = Interval`. + #[must_use] + pub fn left_closed_temporal_interval(ty: &TypeBuilder<'_, '_>) -> TypeId { + self::interval( + ty, + self::inclusive_temporal_bound(ty), + self::open_temporal_bound(ty), + ) + } + + /// `type RightBoundedTemporalInterval = Interval`. + #[must_use] + pub fn right_bounded_temporal_interval(ty: &TypeBuilder<'_, '_>) -> TypeId { + self::interval( + ty, + self::temporal_bound(ty), + self::finite_temporal_bound(ty), + ) + } +} + +pub(in crate::module::std_lib) struct Temporal { + _dependencies: (), +} + +impl<'heap> StandardLibraryModule<'heap> for Temporal { + type Children = (); + + fn name(_heap: &'heap Heap) -> Symbol<'heap> { + sym::temporal + } + + #[expect(clippy::too_many_lines, clippy::similar_names)] + fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { + let mut def = ModuleDef::new(); + + // newtype Timestamp = Integer + // + // TODO: replace with a dedicated primitive type in the future. + let timestamp_ty = types::timestamp(&lib.ty); + def.push( + sym::Timestamp, + ItemDef::newtype(lib.ty.env, timestamp_ty, &[]), + ); + + // newtype DecisionTime = T + let dt_t_arg = lib.ty.fresh_argument(sym::T); + let dt_t_ref = lib.ty.hydrate_argument(dt_t_arg); + let dt_t_param = lib.ty.param(dt_t_arg); + + let decision_time_ty = lib.ty.generic( + [(dt_t_arg, None)], + types::decision_time(&lib.ty, dt_t_param), + ); + def.push( + sym::DecisionTime, + ItemDef::newtype(lib.ty.env, decision_time_ty, &[dt_t_ref]), + ); + + // newtype TransactionTime = T + let tt_t_arg = lib.ty.fresh_argument(sym::T); + let tt_t_ref = lib.ty.hydrate_argument(tt_t_arg); + let tt_t_param = lib.ty.param(tt_t_arg); + + let transaction_time_ty = lib.ty.generic( + [(tt_t_arg, None)], + types::transaction_time(&lib.ty, tt_t_param), + ); + def.push( + sym::TransactionTime, + ItemDef::newtype(lib.ty.env, transaction_time_ty, &[tt_t_ref]), + ); + + // newtype UnboundedTemporalBound = Null + let unbounded_bound_ty = types::unbounded_temporal_bound(&lib.ty); + def.push( + sym::UnboundedTemporalBound, + ItemDef::newtype(lib.ty.env, unbounded_bound_ty, &[]), + ); + + // newtype InclusiveTemporalBound = Timestamp + let inclusive_bound_ty = types::inclusive_temporal_bound(&lib.ty); + def.push( + sym::InclusiveTemporalBound, + ItemDef::newtype(lib.ty.env, inclusive_bound_ty, &[]), + ); + + // newtype ExclusiveTemporalBound = Timestamp + let exclusive_bound_ty = types::exclusive_temporal_bound(&lib.ty); + def.push( + sym::ExclusiveTemporalBound, + ItemDef::newtype(lib.ty.env, exclusive_bound_ty, &[]), + ); + + // type TemporalBound = UnboundedTemporalBound | InclusiveTemporalBound + // | ExclusiveTemporalBound + let temporal_bound_ty = + lib.ty + .union([unbounded_bound_ty, inclusive_bound_ty, exclusive_bound_ty]); + def.push( + sym::TemporalBound, + ItemDef::r#type(lib.ty.env, temporal_bound_ty, &[]), + ); + + // type FiniteTemporalBound = InclusiveTemporalBound | ExclusiveTemporalBound + let finite_bound_ty = lib.ty.union([inclusive_bound_ty, exclusive_bound_ty]); + def.push( + sym::FiniteTemporalBound, + ItemDef::r#type(lib.ty.env, finite_bound_ty, &[]), + ); + + // type OpenTemporalBound = ExclusiveTemporalBound | UnboundedTemporalBound + let open_bound_ty = lib.ty.union([exclusive_bound_ty, unbounded_bound_ty]); + def.push( + sym::OpenTemporalBound, + ItemDef::r#type(lib.ty.env, open_bound_ty, &[]), + ); + + // newtype Interval = (start: S, end: E) + let interval_s_arg = lib.ty.fresh_argument(sym::S); + let interval_s_ref = lib.ty.hydrate_argument(interval_s_arg); + let interval_s_param = lib.ty.param(interval_s_arg); + + let interval_e_arg = lib.ty.fresh_argument(sym::E); + let interval_e_ref = lib.ty.hydrate_argument(interval_e_arg); + let interval_e_param = lib.ty.param(interval_e_arg); + + let interval_ty = lib.ty.generic( + [(interval_s_arg, None), (interval_e_arg, None)], + types::interval(&lib.ty, interval_s_param, interval_e_param), + ); + def.push( + sym::Interval, + ItemDef::newtype(lib.ty.env, interval_ty, &[interval_s_ref, interval_e_ref]), + ); + + // type LeftClosedTemporalInterval = + // Interval + let left_closed_interval_ty = types::interval(&lib.ty, inclusive_bound_ty, open_bound_ty); + def.push( + sym::LeftClosedTemporalInterval, + ItemDef::r#type(lib.ty.env, left_closed_interval_ty, &[]), + ); + + // type RightBoundedTemporalInterval = + // Interval + let right_bounded_interval_ty = + types::interval(&lib.ty, temporal_bound_ty, finite_bound_ty); + def.push( + sym::RightBoundedTemporalInterval, + ItemDef::r#type(lib.ty.env, right_bounded_interval_ty, &[]), + ); + + // newtype PinnedTransactionTimeTemporalAxes = ( + // pinned: TransactionTime, + // variable: DecisionTime, + // ) + let pinned_tx_ty = lib.ty.opaque( + sym::path::PinnedTransactionTimeTemporalAxes, + lib.ty.r#struct([ + (sym::pinned, types::transaction_time(&lib.ty, timestamp_ty)), + ( + sym::variable, + types::decision_time(&lib.ty, right_bounded_interval_ty), + ), + ]), + ); + def.push( + sym::PinnedTransactionTimeTemporalAxes, + ItemDef::newtype(lib.ty.env, pinned_tx_ty, &[]), + ); + + // newtype PinnedDecisionTimeTemporalAxes = ( + // pinned: DecisionTime, + // variable: TransactionTime, + // ) + let pinned_dt_ty = lib.ty.opaque( + sym::path::PinnedDecisionTimeTemporalAxes, + lib.ty.r#struct([ + (sym::pinned, types::decision_time(&lib.ty, timestamp_ty)), + ( + sym::variable, + types::transaction_time(&lib.ty, right_bounded_interval_ty), + ), + ]), + ); + def.push( + sym::PinnedDecisionTimeTemporalAxes, + ItemDef::newtype(lib.ty.env, pinned_dt_ty, &[]), + ); + + // type QueryTemporalAxes = PinnedTransactionTimeTemporalAxes + // | PinnedDecisionTimeTemporalAxes + let query_temporal_axes_ty = lib.ty.union([pinned_tx_ty, pinned_dt_ty]); + def.push( + sym::QueryTemporalAxes, + ItemDef::r#type(lib.ty.env, query_temporal_axes_ty, &[]), + ); + + def + } +} diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/tmp.rs b/libs/@local/hashql/core/src/module/std_lib/graph/tmp.rs index ef113a6836f..e9bfed600b5 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/tmp.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/tmp.rs @@ -5,12 +5,12 @@ use crate::{ locals::TypeDef, std_lib::{self, ModuleDef, StandardLibraryModule, core::func}, }, - symbol::Symbol, + symbol::{Symbol, sym}, r#type::TypeId, }; pub(in crate::module::std_lib) struct Tmp { - _dependencies: (std_lib::graph::Graph,), + _dependencies: (std_lib::graph::temporal::Temporal,), } impl<'heap> StandardLibraryModule<'heap> for Tmp { @@ -22,11 +22,10 @@ impl<'heap> StandardLibraryModule<'heap> for Tmp { fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; let query_temporal_axes_ty = lib - .manifest::() - .expect_type(heap.intern_symbol("QueryTemporalAxes")); + .manifest::() + .expect_type(sym::QueryTemporalAxes); // ::graph::tmp::decision_time_now() -> TimeAxis func( diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/entity.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/entity.rs index 375c4591b76..dd5b0f19e26 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/entity.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/entity.rs @@ -2,292 +2,545 @@ use crate::{ heap::Heap, module::{ StandardLibrary, - std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule, core::option::option}, + std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule}, }, symbol::{Symbol, sym}, }; +pub mod types { + use crate::{ + module::std_lib::{self, core::option::types::option}, + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + // newtype EntityUuid = Uuid; + pub struct EntityUuidDependencies { + pub uuid: TypeId, + } + + #[must_use] + pub fn entity_uuid(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let EntityUuidDependencies { uuid } = deps.unwrap_or_else(|| EntityUuidDependencies { + uuid: std_lib::core::uuid::types::uuid(ty), + }); + + ty.opaque(sym::path::EntityUuid, uuid) + } + + // newtype DraftId = Uuid; + pub struct DraftIdDependencies { + pub uuid: TypeId, + } + + #[must_use] + pub fn draft_id(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let DraftIdDependencies { uuid } = deps.unwrap_or_else(|| DraftIdDependencies { + uuid: std_lib::core::uuid::types::uuid(ty), + }); + + ty.opaque(sym::path::DraftId, uuid) + } + + // newtype EntityEditionId = Uuid; + pub struct EntityEditionIdDependencies { + pub uuid: TypeId, + } + + #[must_use] + pub fn entity_edition_id( + ty: &TypeBuilder<'_, '_>, + deps: Option, + ) -> TypeId { + let EntityEditionIdDependencies { uuid } = + deps.unwrap_or_else(|| EntityEditionIdDependencies { + uuid: std_lib::core::uuid::types::uuid(ty), + }); + + ty.opaque(sym::path::EntityEditionId, uuid) + } + + // newtype EntityId = (web_id: WebId, entity_uuid: EntityUuid, draft_id: Option) + pub struct EntityIdDependencies { + pub web_id: TypeId, + pub entity_uuid: TypeId, + pub draft_id: TypeId, + } + + #[must_use] + pub fn entity_id(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let EntityIdDependencies { + web_id, + entity_uuid, + draft_id, + } = deps.unwrap_or_else(|| EntityIdDependencies { + web_id: std_lib::graph::types::principal::actor_group::web::types::web_id(ty, None), + entity_uuid: self::entity_uuid(ty, None), + draft_id: self::draft_id(ty, None), + }); + + ty.opaque( + sym::path::EntityId, + ty.r#struct([ + (sym::web_id, web_id), + (sym::entity_uuid, entity_uuid), + (sym::draft_id, option(ty, draft_id)), + ]), + ) + } + + // newtype RecordId = (entity_id: EntityId, edition_id: EntityEditionId) + pub struct RecordIdDependencies { + pub entity_id: TypeId, + pub edition_id: TypeId, + } + + #[must_use] + pub fn record_id(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let RecordIdDependencies { + entity_id, + edition_id, + } = deps.unwrap_or_else(|| RecordIdDependencies { + entity_id: self::entity_id(ty, None), + edition_id: self::entity_edition_id(ty, None), + }); + + ty.opaque( + sym::path::RecordId, + ty.r#struct([(sym::entity_id, entity_id), (sym::edition_id, edition_id)]), + ) + } + + // newtype TemporalMetadata = ( + // decision_time: DecisionTime, + // transaction_time: TransactionTime, + // ) + pub struct TemporalMetadataDependencies { + pub interval: TypeId, + } + + #[must_use] + pub fn temporal_metadata( + ty: &TypeBuilder<'_, '_>, + deps: Option, + ) -> TypeId { + let TemporalMetadataDependencies { interval } = + deps.unwrap_or_else(|| TemporalMetadataDependencies { + interval: std_lib::graph::temporal::types::left_closed_temporal_interval(ty), + }); + + ty.opaque( + sym::path::TemporalMetadata, + ty.r#struct([ + ( + sym::decision_time, + std_lib::graph::temporal::types::decision_time(ty, interval), + ), + ( + sym::transaction_time, + std_lib::graph::temporal::types::transaction_time(ty, interval), + ), + ]), + ) + } + + // newtype Confidence = Number + #[must_use] + pub fn confidence(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::Confidence, ty.number()) + } + + // newtype InferredEntityProvenance = Unknown + // + // JSONB blob in `entity_ids.provenance`. Contains `created_by_id`, + // `created_at_transaction_time`, `created_at_decision_time`, and optional + // `first_non_draft_created_at_*` timestamps. + #[must_use] + pub fn inferred_entity_provenance(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::InferredEntityProvenance, ty.unknown()) + } + + // newtype EntityEditionProvenance = Unknown + // + // JSONB blob in `entity_editions.provenance`. Contains `created_by_id`, + // optional `archived_by_id`, `actor_type`, `OriginProvenance`, and + // `Vec`. + #[must_use] + pub fn entity_edition_provenance(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::EntityEditionProvenance, ty.unknown()) + } + + // newtype EntityProvenance = ( + // inferred: InferredEntityProvenance, + // edition: EntityEditionProvenance, + // ) + pub struct EntityProvenanceDependencies { + pub inferred: TypeId, + pub edition: TypeId, + } + + #[must_use] + pub fn entity_provenance( + ty: &TypeBuilder<'_, '_>, + deps: Option, + ) -> TypeId { + let EntityProvenanceDependencies { inferred, edition } = + deps.unwrap_or_else(|| EntityProvenanceDependencies { + inferred: self::inferred_entity_provenance(ty), + edition: self::entity_edition_provenance(ty), + }); + + ty.opaque( + sym::path::EntityProvenance, + ty.r#struct([(sym::inferred, inferred), (sym::edition, edition)]), + ) + } + + // newtype PropertyProvenance = Unknown + // + // JSONB blob on entity edges (`entity_edge.provenance`). Just + // `Vec`. + #[must_use] + pub fn property_provenance(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::PropertyProvenance, ty.unknown()) + } + + // newtype PropertyObjectMetadata = Unknown + // + // JSONB blob in `entity_editions.property_metadata`. Contains per-property-key + // metadata (confidence, provenance) rather than property values. + #[must_use] + pub fn property_object_metadata(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::PropertyObjectMetadata, ty.unknown()) + } + + // newtype EntityMetadata = ( + // record_id: EntityRecordId, + // temporal_versioning: EntityTemporalMetadata, + // entity_type_ids: List, + // archived: Boolean, + // provenance: EntityProvenance, + // confidence: Option, + // properties: PropertyObjectMetadata, + // ) + pub struct EntityMetadataDependencies { + pub record_id: TypeId, + pub temporal_versioning: TypeId, + pub entity_type_ids: TypeId, + pub provenance: TypeId, + pub confidence: TypeId, + pub properties: TypeId, + } + + #[must_use] + pub fn entity_metadata( + ty: &TypeBuilder<'_, '_>, + deps: Option, + ) -> TypeId { + let EntityMetadataDependencies { + record_id, + temporal_versioning, + entity_type_ids, + provenance, + confidence, + properties, + } = deps.unwrap_or_else(|| EntityMetadataDependencies { + record_id: self::record_id(ty, None), + temporal_versioning: self::temporal_metadata(ty, None), + entity_type_ids: ty.list(std_lib::graph::types::ontology::types::versioned_url( + ty, None, + )), + provenance: self::entity_provenance(ty, None), + confidence: self::confidence(ty), + properties: self::property_object_metadata(ty), + }); + + ty.opaque( + sym::path::EntityMetadata, + ty.r#struct([ + (sym::record_id, record_id), + (sym::temporal_versioning, temporal_versioning), + (sym::entity_type_ids, entity_type_ids), + (sym::archived, ty.boolean()), + (sym::provenance, provenance), + (sym::confidence, option(ty, confidence)), + (sym::properties, properties), + ]), + ) + } + + // newtype LinkData = ( + // left_entity_id: EntityId, + // right_entity_id: EntityId, + // left_entity_confidence: Option, + // left_entity_provenance: PropertyProvenance, + // right_entity_confidence: Option, + // right_entity_provenance: PropertyProvenance, + // ) + pub struct LinkDataDependencies { + pub entity_id: TypeId, + pub confidence: TypeId, + pub property_provenance: TypeId, + } + + #[must_use] + pub fn link_data(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let LinkDataDependencies { + entity_id, + confidence, + property_provenance, + } = deps.unwrap_or_else(|| LinkDataDependencies { + entity_id: self::entity_id(ty, None), + confidence: self::confidence(ty), + property_provenance: self::property_provenance(ty), + }); + + ty.opaque( + sym::path::LinkData, + ty.r#struct([ + (sym::left_entity_id, entity_id), + (sym::right_entity_id, entity_id), + (sym::left_entity_confidence, option(ty, confidence)), + (sym::left_entity_provenance, property_provenance), + (sym::right_entity_confidence, option(ty, confidence)), + (sym::right_entity_provenance, property_provenance), + ]), + ) + } + + // newtype EntityEncodings = (vectors: Unknown) + // + // The graph API doesn't expose encodings yet, but the storage layer already has + // them. The `?` inner type is correct; the encoding format is opaque to the + // type system. + #[must_use] + pub fn entity_encodings(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque( + sym::path::EntityEncodings, + ty.r#struct([(sym::vectors, ty.unknown())]), + ) + } + + // newtype Entity = ( + // properties: T, + // link_data: Option, + // metadata: EntityMetadata, + // encodings: EntityEncodings, + // ) + pub struct EntityDependencies { + pub link_data: TypeId, + pub metadata: TypeId, + pub encodings: TypeId, + } + + #[must_use] + pub fn entity( + ty: &TypeBuilder<'_, '_>, + properties: TypeId, + deps: Option, + ) -> TypeId { + let EntityDependencies { + link_data, + metadata, + encodings, + } = deps.unwrap_or_else(|| EntityDependencies { + link_data: self::link_data(ty, None), + metadata: self::entity_metadata(ty, None), + encodings: self::entity_encodings(ty), + }); + + ty.opaque( + sym::path::Entity, + ty.r#struct([ + (sym::properties, properties), + (sym::link_data, option(ty, link_data)), + (sym::metadata, metadata), + (sym::encodings, encodings), + ]), + ) + } +} + pub(in crate::module::std_lib) struct Entity { _dependencies: ( std_lib::core::uuid::Uuid, std_lib::graph::types::principal::actor_group::web::Web, std_lib::graph::types::ontology::Ontology, + std_lib::graph::temporal::Temporal, ), } impl<'heap> StandardLibraryModule<'heap> for Entity { type Children = (); - fn name(heap: &'heap Heap) -> Symbol<'heap> { - heap.intern_symbol("entity") + fn name(_: &'heap Heap) -> Symbol<'heap> { + sym::entity } #[expect(clippy::too_many_lines)] fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; - // newtype EntityUuid = Uuid; let uuid_ty = lib .manifest::() - .expect_newtype(heap.intern_symbol("Uuid")); - let entity_uuid_ty = lib - .ty - .opaque("::graph::types::knowledge::entity::EntityUuid", uuid_ty.id); - def.push( - heap.intern_symbol("EntityUuid"), - ItemDef::newtype(lib.ty.env, entity_uuid_ty, &[]), - ); + .expect_newtype(sym::Uuid) + .id; + let web_id_ty = lib + .manifest::() + .expect_newtype(sym::WebId) + .id; + let versioned_url_ty = lib + .manifest::() + .expect_newtype(sym::VersionedUrl) + .id; + let left_closed_interval_ty = lib + .manifest::() + .expect_type(sym::LeftClosedTemporalInterval) + .id; - // newtype DraftId = Uuid; - let draft_id_ty = lib - .ty - .opaque("::graph::types::knowledge::entity::DraftId", uuid_ty.id); - def.push( - heap.intern_symbol("DraftId"), - ItemDef::newtype(lib.ty.env, draft_id_ty, &[]), - ); + let ty = &lib.ty; - // newtype EntityEditionId = Uuid; - let entity_edition_id_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityEditionId", - uuid_ty.id, - ); + let entity_uuid_ty = + types::entity_uuid(ty, Some(types::EntityUuidDependencies { uuid: uuid_ty })); def.push( - heap.intern_symbol("EntityEditionId"), - ItemDef::newtype(lib.ty.env, entity_edition_id_ty, &[]), + sym::EntityUuid, + ItemDef::newtype(ty.env, entity_uuid_ty, &[]), ); - // newtype EntityId = (web_id: WebId, entity_uuid: EntityUuid, draft_id: Option) - let web_id = lib - .manifest::() - .expect_newtype(heap.intern_symbol("WebId")); - let entity_id_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityId", - lib.ty.r#struct([ - ("web_id", web_id.id), - ("entity_uuid", entity_uuid_ty), - ("draft_id", option(lib, draft_id_ty)), - ]), - ); - def.push( - heap.intern_symbol("EntityId"), - ItemDef::newtype(lib.ty.env, entity_id_ty, &[]), - ); + let draft_id_ty = types::draft_id(ty, Some(types::DraftIdDependencies { uuid: uuid_ty })); + def.push(sym::DraftId, ItemDef::newtype(ty.env, draft_id_ty, &[])); - // newtype EntityRecordId = (entity_id: EntityId, edition_id: EntityEditionId) - let entity_record_id_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityRecordId", - lib.ty.r#struct([ - ("entity_id", entity_id_ty), - ("edition_id", entity_edition_id_ty), - ]), + let entity_edition_id_ty = types::entity_edition_id( + ty, + Some(types::EntityEditionIdDependencies { uuid: uuid_ty }), ); def.push( - heap.intern_symbol("EntityRecordId"), - ItemDef::newtype(lib.ty.env, entity_record_id_ty, &[]), + sym::EntityEditionId, + ItemDef::newtype(ty.env, entity_edition_id_ty, &[]), ); - // newtype TemporalInterval = Unknown - // - // Opaque wrapper for `LeftClosedTemporalInterval`. The internal structure (start bound, - // end bound) is not exposed to the HashQL type system; the placement resolver only needs - // the field name prefix (`temporal_versioning.decision_time`). - let temporal_interval_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::TemporalInterval", - lib.ty.unknown(), - ); - def.push( - heap.intern_symbol("TemporalInterval"), - ItemDef::newtype(lib.ty.env, temporal_interval_ty, &[]), - ); + let entity_id_ty = types::entity_id( + ty, + Some(types::EntityIdDependencies { + web_id: web_id_ty, + entity_uuid: entity_uuid_ty, + draft_id: draft_id_ty, + }), + ); + def.push(sym::EntityId, ItemDef::newtype(ty.env, entity_id_ty, &[])); - // newtype EntityTemporalMetadata = ( - // decision_time: TemporalInterval, - // transaction_time: TemporalInterval, - // ) - let temporal_metadata_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityTemporalMetadata", - lib.ty.r#struct([ - ("decision_time", temporal_interval_ty), - ("transaction_time", temporal_interval_ty), - ]), - ); - def.push( - heap.intern_symbol("EntityTemporalMetadata"), - ItemDef::newtype(lib.ty.env, temporal_metadata_ty, &[]), - ); + let record_id_ty = types::record_id( + ty, + Some(types::RecordIdDependencies { + entity_id: entity_id_ty, + edition_id: entity_edition_id_ty, + }), + ); + def.push(sym::RecordId, ItemDef::newtype(ty.env, record_id_ty, &[])); - // newtype Confidence = Number - let confidence_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::Confidence", - lib.ty.number(), + let temporal_metadata_ty = types::temporal_metadata( + ty, + Some(types::TemporalMetadataDependencies { + interval: left_closed_interval_ty, + }), ); def.push( - heap.intern_symbol("Confidence"), - ItemDef::newtype(lib.ty.env, confidence_ty, &[]), + sym::TemporalMetadata, + ItemDef::newtype(ty.env, temporal_metadata_ty, &[]), ); - // newtype InferredEntityProvenance = Unknown - // - // JSONB blob in `entity_ids.provenance`. Contains `created_by_id`, - // `created_at_transaction_time`, `created_at_decision_time`, and optional - // `first_non_draft_created_at_*` timestamps. - let inferred_provenance_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::InferredEntityProvenance", - lib.ty.unknown(), - ); + let confidence_ty = types::confidence(ty); def.push( - heap.intern_symbol("InferredEntityProvenance"), - ItemDef::newtype(lib.ty.env, inferred_provenance_ty, &[]), + sym::Confidence, + ItemDef::newtype(ty.env, confidence_ty, &[]), ); - // newtype EntityEditionProvenance = Unknown - // - // JSONB blob in `entity_editions.provenance`. Contains `created_by_id`, - // optional `archived_by_id`, `actor_type`, `OriginProvenance`, and - // `Vec`. - let edition_provenance_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityEditionProvenance", - lib.ty.unknown(), - ); + let inferred_provenance_ty = types::inferred_entity_provenance(ty); def.push( - heap.intern_symbol("EntityEditionProvenance"), - ItemDef::newtype(lib.ty.env, edition_provenance_ty, &[]), + sym::InferredEntityProvenance, + ItemDef::newtype(ty.env, inferred_provenance_ty, &[]), ); - // newtype EntityProvenance = ( - // inferred: InferredEntityProvenance, - // edition: EntityEditionProvenance, - // ) - let entity_provenance_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityProvenance", - lib.ty.r#struct([ - ("inferred", inferred_provenance_ty), - ("edition", edition_provenance_ty), - ]), - ); + let edition_provenance_ty = types::entity_edition_provenance(ty); def.push( - heap.intern_symbol("EntityProvenance"), - ItemDef::newtype(lib.ty.env, entity_provenance_ty, &[]), + sym::EntityEditionProvenance, + ItemDef::newtype(ty.env, edition_provenance_ty, &[]), ); - // newtype PropertyProvenance = Unknown - // - // JSONB blob on entity edges (`entity_edge.provenance`). Just - // `Vec`. - let property_provenance_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::PropertyProvenance", - lib.ty.unknown(), + let entity_provenance_ty = types::entity_provenance( + ty, + Some(types::EntityProvenanceDependencies { + inferred: inferred_provenance_ty, + edition: edition_provenance_ty, + }), ); def.push( - heap.intern_symbol("PropertyProvenance"), - ItemDef::newtype(lib.ty.env, property_provenance_ty, &[]), + sym::EntityProvenance, + ItemDef::newtype(ty.env, entity_provenance_ty, &[]), ); - // newtype PropertyObjectMetadata = Unknown - // - // JSONB blob in `entity_editions.property_metadata`. Contains per-property-key - // metadata (confidence, provenance) rather than property values. - let property_object_metadata_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::PropertyObjectMetadata", - lib.ty.unknown(), - ); + let property_provenance_ty = types::property_provenance(ty); def.push( - heap.intern_symbol("PropertyObjectMetadata"), - ItemDef::newtype(lib.ty.env, property_object_metadata_ty, &[]), + sym::PropertyProvenance, + ItemDef::newtype(ty.env, property_provenance_ty, &[]), ); - // newtype EntityMetadata = ( - // record_id: EntityRecordId, - // temporal_versioning: EntityTemporalMetadata, - // entity_type_ids: List, - // archived: Boolean, - // provenance: EntityProvenance, - // confidence: Option, - // properties: PropertyObjectMetadata, - // ) - let versioned_url = lib - .manifest::() - .expect_newtype(heap.intern_symbol("VersionedUrl")); - let entity_metadata_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityMetadata", - lib.ty.r#struct([ - ("record_id", entity_record_id_ty), - ("temporal_versioning", temporal_metadata_ty), - ("entity_type_ids", lib.ty.list(versioned_url.id)), - ("archived", lib.ty.boolean()), - ("provenance", entity_provenance_ty), - ("confidence", option(lib, confidence_ty)), - ("properties", property_object_metadata_ty), - ]), - ); + let property_object_metadata_ty = types::property_object_metadata(ty); def.push( - heap.intern_symbol("EntityMetadata"), - ItemDef::newtype(lib.ty.env, entity_metadata_ty, &[]), + sym::PropertyObjectMetadata, + ItemDef::newtype(ty.env, property_object_metadata_ty, &[]), ); - // newtype LinkData = ( - // left_entity_id: EntityId, - // right_entity_id: EntityId, - // left_entity_confidence: Option, - // left_entity_provenance: PropertyProvenance, - // right_entity_confidence: Option, - // right_entity_provenance: PropertyProvenance, - // ) - let link_data_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::LinkData", - lib.ty.r#struct([ - ("left_entity_id", entity_id_ty), - ("right_entity_id", entity_id_ty), - ("left_entity_confidence", option(lib, confidence_ty)), - ("left_entity_provenance", property_provenance_ty), - ("right_entity_confidence", option(lib, confidence_ty)), - ("right_entity_provenance", property_provenance_ty), - ]), + let entity_metadata_ty = types::entity_metadata( + ty, + Some(types::EntityMetadataDependencies { + record_id: record_id_ty, + temporal_versioning: temporal_metadata_ty, + entity_type_ids: ty.list(versioned_url_ty), + provenance: entity_provenance_ty, + confidence: confidence_ty, + properties: property_object_metadata_ty, + }), ); def.push( - heap.intern_symbol("LinkData"), - ItemDef::newtype(lib.ty.env, link_data_ty, &[]), + sym::EntityMetadata, + ItemDef::newtype(ty.env, entity_metadata_ty, &[]), ); - // newtype EntityEncodings = (vectors: Unknown) - // - // The graph API doesn't expose encodings yet, but the storage layer already has - // them. The `?` inner type is correct; the encoding format is opaque to the - // type system. - let encodings_ty = lib.ty.opaque( - "::graph::types::knowledge::entity::EntityEncodings", - lib.ty.r#struct([("vectors", lib.ty.unknown())]), - ); + let link_data_ty = types::link_data( + ty, + Some(types::LinkDataDependencies { + entity_id: entity_id_ty, + confidence: confidence_ty, + property_provenance: property_provenance_ty, + }), + ); + def.push(sym::LinkData, ItemDef::newtype(ty.env, link_data_ty, &[])); + + let encodings_ty = types::entity_encodings(ty); def.push( - heap.intern_symbol("EntityEncodings"), - ItemDef::newtype(lib.ty.env, encodings_ty, &[]), + sym::EntityEncodings, + ItemDef::newtype(ty.env, encodings_ty, &[]), ); - // newtype Entity = ( - // properties: T, - // link_data: Option, - // metadata: EntityMetadata, - // encodings: EntityEncodings, - // ) - let t_arg = lib.ty.fresh_argument("T"); + // Entity + let t_arg = lib.ty.fresh_argument(sym::T); let t_ref = lib.ty.hydrate_argument(t_arg); let t_param = lib.ty.param(t_arg); let entity_ty = lib.ty.generic( [t_arg], - lib.ty.opaque( - sym::path::Entity, - lib.ty.r#struct([ - ("properties", t_param), - ("link_data", option(lib, link_data_ty)), - ("metadata", entity_metadata_ty), - ("encodings", encodings_ty), - ]), + types::entity( + &lib.ty, + t_param, + Some(types::EntityDependencies { + link_data: link_data_ty, + metadata: entity_metadata_ty, + encodings: encodings_ty, + }), ), ); def.push( - heap.intern_symbol("Entity"), + sym::Entity, ItemDef::newtype(lib.ty.env, entity_ty, &[t_ref]), ); diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/mod.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/mod.rs index b68d310a05f..7fd11736062 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/knowledge/mod.rs @@ -1,4 +1,4 @@ -pub(in crate::module::std_lib) mod entity; +pub mod entity; use crate::{ heap::Heap, diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/mod.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/mod.rs index 80686d4a3a0..9e743089972 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/mod.rs @@ -8,9 +8,9 @@ use crate::{ symbol::Symbol, }; -pub(in crate::module::std_lib) mod knowledge; -pub(in crate::module::std_lib) mod ontology; -pub(in crate::module::std_lib) mod principal; +pub mod knowledge; +pub mod ontology; +pub mod principal; pub(in crate::module::std_lib) struct Types { _dependencies: (), diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/entity_type.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/entity_type.rs index b79b0f639d2..b8fb9b034ab 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/entity_type.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/entity_type.rs @@ -2,7 +2,7 @@ use crate::{ heap::Heap, module::{ StandardLibrary, - std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule, core::option::option}, + std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule, core::option::types::option}, }, symbol::Symbol, }; @@ -31,7 +31,7 @@ impl<'heap> StandardLibraryModule<'heap> for EntityType { .expect_newtype(heap.intern_symbol("WebId")); let entity_type_metadata_ty = lib.ty.opaque( "::graph::types::ontology::entity_type::EntityTypeMetadata", - lib.ty.r#struct([("web_id", option(lib, web_id.id))]), + lib.ty.r#struct([("web_id", option(&lib.ty, web_id.id))]), ); def.push( heap.intern_symbol("EntityTypeMetadata"), diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/mod.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/mod.rs index da2ec631994..8edd217d567 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/ontology/mod.rs @@ -1,12 +1,62 @@ pub(in crate::module::std_lib) mod entity_type; +pub mod types { + use crate::{ + module::std_lib, + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + #[must_use] + pub fn ontology_type_version(ty: &TypeBuilder<'_, '_>) -> TypeId { + ty.opaque(sym::path::OntologyTypeVersion, ty.string()) + } + + pub struct BaseUrlDependencies { + pub url: TypeId, + } + + #[must_use] + pub fn base_url(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let BaseUrlDependencies { url } = deps.unwrap_or_else(|| BaseUrlDependencies { + url: std_lib::core::url::types::url(ty), + }); + + ty.opaque(sym::path::BaseUrl, url) + } + + pub struct VersionedUrlDependencies { + pub base_url: TypeId, + pub ontology_type_version: TypeId, + } + + #[must_use] + pub fn versioned_url( + ty: &TypeBuilder<'_, '_>, + deps: Option, + ) -> TypeId { + let VersionedUrlDependencies { + base_url: base_url_ty, + ontology_type_version: version_ty, + } = deps.unwrap_or_else(|| VersionedUrlDependencies { + base_url: self::base_url(ty, None), + ontology_type_version: self::ontology_type_version(ty), + }); + + ty.opaque( + sym::path::VersionedUrl, + ty.r#struct([(sym::base_url, base_url_ty), (sym::version, version_ty)]), + ) + } +} + use crate::{ heap::Heap, module::{ StandardLibrary, std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule}, }, - symbol::Symbol, + symbol::{Symbol, sym}, }; pub(in crate::module::std_lib) struct Ontology { @@ -22,38 +72,37 @@ impl<'heap> StandardLibraryModule<'heap> for Ontology { fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; // newtype OntologyTypeVersion = String; - let ontology_type_version_ty = lib - .ty - .opaque("::graph::ontology::OntologyTypeVersion", lib.ty.string()); + let ontology_type_version_ty = types::ontology_type_version(&lib.ty); def.push( - heap.intern_symbol("OntologyTypeVersion"), + sym::OntologyTypeVersion, ItemDef::newtype(lib.ty.env, ontology_type_version_ty, &[]), ); let url_ty = lib .manifest::() - .expect_newtype(heap.intern_symbol("Url")) + .expect_newtype(sym::Url) .id; // TODO: consider making this constructor private via intrinsic (requires VM) // newtype BaseUrl = Url; - let base_url_ty = lib.ty.opaque("::graph::types::ontology::BaseUrl", url_ty); - let base_url = ItemDef::newtype(lib.ty.env, base_url_ty, &[]); - def.push(heap.intern_symbol("BaseUrl"), base_url); + let base_url_ty = + types::base_url(&lib.ty, Some(types::BaseUrlDependencies { url: url_ty })); + def.push(sym::BaseUrl, ItemDef::newtype(lib.ty.env, base_url_ty, &[])); // newtype VersionedUrl = (base_url: BaseUrl, version: OntologyTypeVersion); - let versioned_url_ty = lib.ty.opaque( - "::graph::types::ontology::VersionedUrl", - lib.ty.r#struct([ - ("base_url", base_url_ty), - ("version", ontology_type_version_ty), - ]), + let versioned_url_ty = types::versioned_url( + &lib.ty, + Some(types::VersionedUrlDependencies { + base_url: base_url_ty, + ontology_type_version: ontology_type_version_ty, + }), + ); + def.push( + sym::VersionedUrl, + ItemDef::newtype(lib.ty.env, versioned_url_ty, &[]), ); - let versioned_url = ItemDef::newtype(lib.ty.env, versioned_url_ty, &[]); - def.push(lib.heap.intern_symbol("VersionedUrl"), versioned_url); def } diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/mod.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/mod.rs index 078c532a1f4..2134f79dc61 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/mod.rs @@ -1,4 +1,29 @@ -pub(in crate::module::std_lib) mod web; +pub mod web; + +pub mod types { + use crate::{ + module::std_lib, + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + pub struct ActorGroupEntityUuidDependencies { + pub uuid: TypeId, + } + + #[must_use] + pub fn actor_group_entity_uuid( + ty: &TypeBuilder<'_, '_>, + deps: Option, + ) -> TypeId { + let ActorGroupEntityUuidDependencies { uuid } = + deps.unwrap_or_else(|| ActorGroupEntityUuidDependencies { + uuid: std_lib::core::uuid::types::uuid(ty), + }); + + ty.opaque(sym::path::ActorGroupEntityUuid, uuid) + } +} use crate::{ heap::Heap, @@ -6,7 +31,7 @@ use crate::{ StandardLibrary, std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule}, }, - symbol::Symbol, + symbol::{Symbol, sym}, }; pub(in crate::module::std_lib) struct ActorGroup { @@ -22,7 +47,6 @@ impl<'heap> StandardLibraryModule<'heap> for ActorGroup { fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; // newtype ActorGroupEntityUuid = EntityUuid; // (we just set it to Uuid to avoid any cycles) @@ -30,14 +54,15 @@ impl<'heap> StandardLibraryModule<'heap> for ActorGroup { // see: https://linear.app/hash/issue/H-4735/hashql-convert-rust-types-into-hashql-types let uuid_ty = lib .manifest::() - .expect_newtype(heap.intern_symbol("Uuid")); - let entity_uuid_ty = lib.ty.opaque( - "::graph::types::principal::actor_group::ActorGroupEntityUuid", - uuid_ty.id, + .expect_newtype(sym::Uuid) + .id; + let actor_group_entity_uuid_ty = types::actor_group_entity_uuid( + &lib.ty, + Some(types::ActorGroupEntityUuidDependencies { uuid: uuid_ty }), ); def.push( - heap.intern_symbol("ActorGroupEntityUuid"), - ItemDef::newtype(lib.ty.env, entity_uuid_ty, &[]), + sym::ActorGroupEntityUuid, + ItemDef::newtype(lib.ty.env, actor_group_entity_uuid_ty, &[]), ); def diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/web.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/web.rs index 0834fe10e1c..fa3319c5865 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/web.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/actor_group/web.rs @@ -4,9 +4,35 @@ use crate::{ StandardLibrary, std_lib::{self, ItemDef, ModuleDef, StandardLibraryModule}, }, - symbol::Symbol, + symbol::{Symbol, sym}, }; +pub mod types { + use crate::{ + module::std_lib, + symbol::sym, + r#type::{TypeBuilder, TypeId}, + }; + + pub struct WebIdDependencies { + pub actor_group_entity_uuid: TypeId, + } + + #[must_use] + pub fn web_id(ty: &TypeBuilder<'_, '_>, deps: Option) -> TypeId { + let WebIdDependencies { + actor_group_entity_uuid, + } = deps.unwrap_or_else(|| WebIdDependencies { + actor_group_entity_uuid: + std_lib::graph::types::principal::actor_group::types::actor_group_entity_uuid( + ty, None, + ), + }); + + ty.opaque(sym::path::WebId, actor_group_entity_uuid) + } +} + pub(in crate::module::std_lib) struct Web { _dependencies: (std_lib::graph::types::principal::actor_group::ActorGroup,), } @@ -20,20 +46,19 @@ impl<'heap> StandardLibraryModule<'heap> for Web { fn define(lib: &mut StandardLibrary<'_, 'heap>) -> ModuleDef<'heap> { let mut def = ModuleDef::new(); - let heap = lib.heap; // newtype WebId = ActorGroupEntityUuid; let actor_group_entity_uuid_ty = lib .manifest::() - .expect_newtype(heap.intern_symbol("ActorGroupEntityUuid")); - let entity_uuid_ty = lib.ty.opaque( - "::graph::types::principal::actor_group::web::WebId", - actor_group_entity_uuid_ty.id, - ); - def.push( - heap.intern_symbol("WebId"), - ItemDef::newtype(lib.ty.env, entity_uuid_ty, &[]), + .expect_newtype(sym::ActorGroupEntityUuid) + .id; + let web_id_ty = types::web_id( + &lib.ty, + Some(types::WebIdDependencies { + actor_group_entity_uuid: actor_group_entity_uuid_ty, + }), ); + def.push(sym::WebId, ItemDef::newtype(lib.ty.env, web_id_ty, &[])); def } diff --git a/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/mod.rs b/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/mod.rs index 674022d3350..ae6f9075180 100644 --- a/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/graph/types/principal/mod.rs @@ -1,4 +1,4 @@ -pub(in crate::module::std_lib) mod actor_group; +pub mod actor_group; use crate::{ heap::Heap, diff --git a/libs/@local/hashql/core/src/module/std_lib/mod.rs b/libs/@local/hashql/core/src/module/std_lib/mod.rs index 39c72b32c42..447d3065775 100644 --- a/libs/@local/hashql/core/src/module/std_lib/mod.rs +++ b/libs/@local/hashql/core/src/module/std_lib/mod.rs @@ -1,5 +1,5 @@ -mod core; -mod graph; +pub mod core; +pub mod graph; mod kernel; use ::core::iter; diff --git a/libs/@local/hashql/core/src/symbol/repr.rs b/libs/@local/hashql/core/src/symbol/repr.rs index 891017c53a8..42bd96d0e54 100644 --- a/libs/@local/hashql/core/src/symbol/repr.rs +++ b/libs/@local/hashql/core/src/symbol/repr.rs @@ -29,7 +29,6 @@ use alloc::alloc::handle_alloc_error; use core::{ alloc::{AllocError, Layout}, - mem, num::NonZero, ptr::{self, NonNull}, }; @@ -37,6 +36,18 @@ use core::{ use super::sym::SYMBOLS; use crate::heap::BumpAllocator; +unsafe extern "C" { + /// A dummy type used to force `RuntimeRepr` to be unsized while not requiring + /// references to it be wide pointers. + type Unsize; +} + +#[repr(C, align(2))] +struct RuntimeReprSkeleton { + len: usize, + data: [u8; 0], +} + /// Header for a runtime-allocated symbol with inline string data. /// /// # Memory Layout @@ -54,23 +65,23 @@ use crate::heap::BumpAllocator; /// # Provenance /// /// References to this type (`&RuntimeSymbol`) only have provenance for the header, -/// not the trailing bytes. All access must go through [`NonNull`] -/// to preserve full allocation provenance. +/// not the trailing bytes. All access must go through [`*mut RuntimeSymbol`] to +/// preserve full allocation provenance. #[repr(C, align(2))] pub(crate) struct RuntimeRepr { - len: usize, - data: [u8; 0], + skel: RuntimeReprSkeleton, + unsize: Unsize, } impl RuntimeRepr { /// Computes the allocation layout for a runtime symbol with `len` bytes of data. #[inline] fn layout(len: usize) -> Layout { - Layout::from_size_align( - size_of::().checked_add(len).expect("overflow"), - mem::align_of::(), - ) - .expect("invalid RuntimeSymbol layout") + let (layout, _offset) = Layout::new::() + .extend(Layout::array::(len).expect("should not overflow")) + .expect("valid RuntimeSymbol layout"); + + layout } /// Allocates a runtime symbol containing `value` on the given allocator. @@ -94,49 +105,27 @@ impl RuntimeRepr { /// # Errors /// /// Returns [`AllocError`] if the allocator cannot satisfy the request. + #[expect(clippy::cast_ptr_alignment)] fn try_alloc(alloc: &A, value: &str) -> Result, AllocError> { let len = value.len(); let layout = Self::layout(value.len()); - let ptr = alloc.allocate(layout)?.cast::(); + let ptr = alloc.allocate(layout)?.as_ptr() as *mut Self; // SAFETY: `ptr` points to a freshly allocated block of `layout` size. // We write `len` to the header and copy `len` bytes of string data // immediately after the header, which fits within the allocation. unsafe { - ptr.cast::().write(len); + (&raw mut (*ptr).skel.len).write(len); - let buf = ptr.add(1).cast::(); - ptr::copy_nonoverlapping(value.as_ptr(), buf.as_ptr(), len); + (&raw mut (*ptr).skel.data) + .cast::() + .copy_from_nonoverlapping(value.as_ptr(), value.len()); } - Ok(ptr) - } - - /// Returns a pointer to the inline string data. - /// - /// This performs pointer arithmetic without dereferencing, so it is safe. - /// The returned pointer has provenance for the trailing bytes if `this` - /// has provenance for the full allocation. - #[inline] - const fn data_ptr(this: NonNull) -> NonNull { - // SAFETY: `this` points to a valid `RuntimeSymbol` allocation, which - // always has at least `size_of::()` bytes. Adding 1 moves past - // the header to the inline data region. - unsafe { this.add(1) }.cast() - } - - /// Reads the length of the inline string data. - /// - /// # Safety - /// - /// - `this` must point to a valid, initialized [`RuntimeRepr`] allocation. - /// - The allocation must remain live for the duration of this call. - #[inline] - const unsafe fn len(this: NonNull) -> usize { - // SAFETY: Caller guarantees `this` points to a valid, initialized allocation. - unsafe { this.cast::().read() } + // SAFETY: the pointer returned from `alloc.allocate` is non-null + Ok(unsafe { NonNull::new_unchecked(ptr) }) } /// Returns the inline data as a byte slice. @@ -147,10 +136,15 @@ impl RuntimeRepr { /// - The allocation must remain live for the lifetime `'a`. /// - The returned slice must not be mutated for the lifetime `'a`. #[inline] - const unsafe fn as_bytes<'a>(this: NonNull) -> &'a [u8] { + const unsafe fn as_bytes<'a>(this: *mut Self) -> &'a [u8] { // SAFETY: Caller guarantees `this` is valid and the allocation outlives `'a`. - // `data_ptr` returns a pointer to the inline bytes, and `len` returns the count. - unsafe { core::slice::from_raw_parts(Self::data_ptr(this).as_ptr(), Self::len(this)) } + // `data_ptr` has provenance over the full allocation (no reborrow narrowing). + unsafe { + let data_ptr = (&raw const (*this).skel.data).cast::(); + let len = (&raw const (*this).skel.len).read(); + + core::slice::from_raw_parts(data_ptr, len) + } } /// Returns the inline data as a string slice. @@ -161,10 +155,9 @@ impl RuntimeRepr { /// - The allocation must remain live for the lifetime `'a`. /// - The returned string must not be mutated for the lifetime `'a`. #[inline] - const unsafe fn as_str<'a>(this: NonNull) -> &'a str { - // SAFETY: Caller guarantees `this` is valid and the allocation outlives `'a`. - // The bytes are valid UTF-8 because they were copied from a `&str` in `try_alloc`. - unsafe { core::str::from_raw_parts(Self::data_ptr(this).as_ptr(), Self::len(this)) } + const unsafe fn as_str<'a>(this: *mut Self) -> &'a str { + // SAFETY: The bytes are valid UTF-8 because they were copied from a `&str` in `try_alloc`. + unsafe { core::str::from_utf8_unchecked(Self::as_bytes(this)) } } } @@ -251,7 +244,8 @@ impl Repr { /// - `self` must have been created via [`Repr::runtime`]. /// - The underlying allocation must still be live. #[inline] - unsafe fn as_runtime(self) -> NonNull { + #[expect(clippy::cast_ptr_alignment)] + unsafe fn as_runtime(self) -> *mut RuntimeRepr { debug_assert!(self.tag() == Self::TAG_RUNTIME); self.ptr @@ -260,7 +254,7 @@ impl Repr { // lowest bit is always 0. Masking it off preserves a valid, non-zero address. unsafe { NonZero::new_unchecked(addr.get() & !Self::TAG_MASK) } }) - .cast::() + .as_ptr() as *mut RuntimeRepr } /// Extracts the constant symbol index. @@ -354,7 +348,7 @@ impl Repr { #[inline] pub(crate) fn runtime(symbol: NonNull) -> Self { const { - assert!(align_of::() >= Self::MIN_ALIGN); + assert!(align_of::() >= Self::MIN_ALIGN); } let ptr = symbol.map_addr(|addr| addr | Self::TAG_RUNTIME).cast(); @@ -366,7 +360,7 @@ impl Repr { const _: () = { assert!(size_of::() == size_of::<*const ()>()); assert!(size_of::>() == size_of::<*const ()>()); - assert!(align_of::() >= Repr::MIN_ALIGN); + assert!(align_of::() >= Repr::MIN_ALIGN); }; #[cfg(test)] @@ -481,8 +475,7 @@ mod tests { // SAFETY: `symbol` points to a valid allocation and `heap` is live. unsafe { - assert_eq!(RuntimeRepr::len(symbol), 5); - assert_eq!(RuntimeRepr::as_str(symbol).len(), 5); + assert_eq!(RuntimeRepr::as_str(symbol.as_ptr()).len(), 5); } } } diff --git a/libs/@local/hashql/core/src/symbol/sym.rs b/libs/@local/hashql/core/src/symbol/sym.rs index 4bc9c4fe5b5..d4809e9ed2b 100644 --- a/libs/@local/hashql/core/src/symbol/sym.rs +++ b/libs/@local/hashql/core/src/symbol/sym.rs @@ -4,6 +4,7 @@ use super::Symbol; hashql_macros::define_symbols! { // [tidy] sort alphabetically start access, + ActorGroupEntityUuid, add, and, archived, @@ -19,38 +20,55 @@ hashql_macros::define_symbols! { bit_xor, Boolean, collect, + Confidence, confidence, core, created_at_decision_time, created_at_transaction_time, created_by_id, decision_time, + DecisionTime, Dict, div, draft_id, + DraftId, dummy: "", E, edition, edition_id, encodings, + end, entity, + Entity, entity_edition_id, entity_id, entity_type_ids, entity_uuid, + EntityEditionId, + EntityEditionProvenance, + EntityEncodings, + EntityId, + EntityMetadata, + EntityProvenance, + EntityUuid, eq, Err, + ExclusiveTemporalBound, filter, + FiniteTemporalBound, foo, gt, gte, id, + InclusiveTemporalBound, index, inferred, + InferredEntityProvenance, input, input_exists: "$exists", Integer, Intersection, + Interval, kernel, left_entity_confidence, left_entity_id, @@ -58,6 +76,8 @@ hashql_macros::define_symbols! { left_entity_uuid, left_entity_web_id, link_data, + LeftClosedTemporalInterval, + LinkData, List, lt, lte, @@ -72,15 +92,24 @@ hashql_macros::define_symbols! { null, Number, Ok, + OntologyTypeVersion, + OpenTemporalBound, option, + Option, or, + pinned, + PinnedDecisionTimeTemporalAxes, + PinnedTransactionTimeTemporalAxes, pow, properties, property_metadata, + PropertyObjectMetadata, + PropertyProvenance, provenance, provenance_edition, provenance_inferred, provided, + QueryTemporalAxes, r#as: "as", r#as_force: "as!", r#else: "else", @@ -96,7 +125,10 @@ hashql_macros::define_symbols! { r#type: "type", r#use: "use", R, + RightBoundedTemporalInterval, + S, record_id, + RecordId, Result, right_entity_confidence, right_entity_id, @@ -105,21 +137,34 @@ hashql_macros::define_symbols! { right_entity_web_id, Some, special_form, + start, String, sub, T, + temporal, temporal_versioning, - then: "then", - thunk: "thunk", + TemporalBound, + TemporalInterval, + TemporalMetadata, + then, + thunk, + Timestamp, transaction_time, + TransactionTime, U, + UnboundedTemporalBound, Union, Unknown, unknown, Url, + uuid, + Uuid, + variable, vectors, version, + VersionedUrl, web_id, + WebId, // [tidy] sort alphabetically end internal: { @@ -176,13 +221,48 @@ hashql_macros::define_symbols! { path: { // [tidy] sort alphabetically start + ActorGroupEntityUuid: "::graph::types::principal::actor_group::ActorGroupEntityUuid", + BaseUrl: "::graph::types::ontology::BaseUrl", + Confidence: "::graph::types::knowledge::entity::Confidence", + DecisionTime: "::graph::temporal::DecisionTime", + DraftId: "::graph::types::knowledge::entity::DraftId", Entity: "::graph::types::knowledge::entity::Entity", + EntityEditionId: "::graph::types::knowledge::entity::EntityEditionId", + EntityEditionProvenance: "::graph::types::knowledge::entity::EntityEditionProvenance", + EntityEncodings: "::graph::types::knowledge::entity::EntityEncodings", + EntityId: "::graph::types::knowledge::entity::EntityId", + EntityMetadata: "::graph::types::knowledge::entity::EntityMetadata", + EntityProvenance: "::graph::types::knowledge::entity::EntityProvenance", + EntityUuid: "::graph::types::knowledge::entity::EntityUuid", + ExclusiveTemporalBound: "::graph::temporal::ExclusiveTemporalBound", graph_body_filter: "::graph::body::filter", graph_head_entities: "::graph::head::entities", graph_tail_collect: "::graph::tail::collect", - none: "::core::option::None", + InclusiveTemporalBound: "::graph::temporal::InclusiveTemporalBound", + InferredEntityProvenance: "::graph::types::knowledge::entity::InferredEntityProvenance", + Interval: "::graph::temporal::Interval", + LeftClosedTemporalInterval: "::graph::temporal::LeftClosedTemporalInterval", + LinkData: "::graph::types::knowledge::entity::LinkData", + None: "::core::option::None", + OntologyTypeVersion: "::graph::ontology::OntologyTypeVersion", + OpenTemporalBound: "::graph::temporal::OpenTemporalBound", option: "::core::option::Option", - some: "::core::option::Some", + PinnedDecisionTimeTemporalAxes: "::graph::temporal::PinnedDecisionTimeTemporalAxes", + PinnedTransactionTimeTemporalAxes: "::graph::temporal::PinnedTransactionTimeTemporalAxes", + PropertyObjectMetadata: "::graph::types::knowledge::entity::PropertyObjectMetadata", + PropertyProvenance: "::graph::types::knowledge::entity::PropertyProvenance", + RecordId: "::graph::types::knowledge::entity::RecordId", + RightBoundedTemporalInterval: "::graph::temporal::RightBoundedTemporalInterval", + Some: "::core::option::Some", + TemporalInterval: "::graph::types::knowledge::entity::TemporalInterval", + TemporalMetadata: "::graph::types::knowledge::entity::TemporalMetadata", + Timestamp: "::graph::temporal::Timestamp", + TransactionTime: "::graph::temporal::TransactionTime", + UnboundedTemporalBound: "::graph::temporal::UnboundedTemporalBound", + Url: "::core::url::Url", + Uuid: "::core::uuid::Uuid", + VersionedUrl: "::graph::types::ontology::VersionedUrl", + WebId: "::graph::types::principal::actor_group::web::WebId", // [tidy] sort alphabetically end } } diff --git a/libs/@local/hashql/diagnostics/src/diagnostic/label.rs b/libs/@local/hashql/diagnostics/src/diagnostic/label.rs index 5785c5d6bfd..a447d815168 100644 --- a/libs/@local/hashql/diagnostics/src/diagnostic/label.rs +++ b/libs/@local/hashql/diagnostics/src/diagnostic/label.rs @@ -255,6 +255,39 @@ impl Labels { } } + /// Returns the number of labels in the collection. + /// + /// # Examples + /// + /// ``` + /// use hashql_diagnostics::{Label, diagnostic::Labels}; + /// + /// let mut labels = Labels::new(Label::new(0..5, "primary")); + /// labels.push(Label::new(10..15, "secondary")); + /// + /// assert_eq!(labels.len(), 2); + /// ``` + #[must_use] + pub const fn len(&self) -> usize { + self.labels.len() + } + + /// Returns `true` if the collection contains no labels. + /// + /// # Examples + /// + /// ``` + /// use hashql_diagnostics::{Label, diagnostic::Labels}; + /// + /// let labels = Labels::new(Label::new(0..5, "primary")); + /// + /// assert!(!labels.is_empty()); + /// ``` + #[must_use] + pub const fn is_empty(&self) -> bool { + self.labels.is_empty() + } + /// Adds a secondary label to the collection. /// /// All labels added via this method become secondary labels, which have the purpose of diff --git a/libs/@local/hashql/eval/Cargo.toml b/libs/@local/hashql/eval/Cargo.toml index bea6be986f8..96ae18e8dda 100644 --- a/libs/@local/hashql/eval/Cargo.toml +++ b/libs/@local/hashql/eval/Cargo.toml @@ -21,12 +21,33 @@ hashql-core = { workspace = true } type-system = { workspace = true, optional = true } # Private third-party dependencies -derive_more = { workspace = true, features = ["display"] } -simple-mermaid = { workspace = true } +bytes.workspace = true +derive_more = { workspace = true, features = ["display"] } +futures-lite = "2.6.1" +postgres-protocol.workspace = true +postgres-types = { workspace = true, features = ["uuid-1"] } +serde = { workspace = true } +serde_json = { workspace = true, features = ["raw_value"] } +simple-mermaid = { workspace = true } +tokio.workspace = true +tokio-postgres.workspace = true +tokio-util = { workspace = true, features = ["rt"] } +url.workspace = true +uuid.workspace = true [dev-dependencies] -hashql-compiletest = { workspace = true } -insta = { workspace = true } +error-stack.workspace = true +hash-graph-authorization = { workspace = true } +hash-graph-store.workspace = true +hash-graph-test-data.workspace = true +hashql-compiletest = { workspace = true } +hashql-diagnostics = { workspace = true, features = ["render"] } +insta = { workspace = true } +libtest-mimic = { workspace = true } +regex = { workspace = true } +similar-asserts = { workspace = true } +testcontainers = { workspace = true, features = ["reusable-containers"] } +testcontainers-modules = { workspace = true, features = ["postgres"] } [features] graph = ["dep:hash-graph-store", "dep:type-system"] @@ -38,6 +59,10 @@ workspace = true name = "compiletest" harness = false +[[test]] +name = "orchestrator" +harness = false + [package.metadata.sync.turborepo] ignore-dev-dependencies = [ "hashql-compiletest", diff --git a/libs/@local/hashql/eval/src/context.rs b/libs/@local/hashql/eval/src/context.rs index 6098a7d448d..dd7c7bd15d4 100644 --- a/libs/@local/hashql/eval/src/context.rs +++ b/libs/@local/hashql/eval/src/context.rs @@ -11,6 +11,7 @@ use hashql_mir::{ local::Local, }, def::{DefId, DefIdSlice, DefIdVec}, + intern::Interner, pass::{ analysis::dataflow::{ TraversalLivenessAnalysis, @@ -50,6 +51,7 @@ impl Index<(DefId, BasicBlockId)> for LiveOut { pub struct EvalContext<'ctx, 'heap, A: Allocator> { pub env: &'ctx Environment<'heap>, + pub interner: &'ctx Interner<'heap>, pub bodies: &'ctx DefIdSlice>, pub execution: &'ctx DefIdSlice>>, @@ -62,6 +64,7 @@ pub struct EvalContext<'ctx, 'heap, A: Allocator> { impl<'ctx, 'heap, A: Allocator> EvalContext<'ctx, 'heap, A> { pub fn new_in( env: &'ctx Environment<'heap>, + interner: &'ctx Interner<'heap>, bodies: &'ctx DefIdSlice>, execution: &'ctx DefIdSlice>>, alloc: A, @@ -109,6 +112,7 @@ impl<'ctx, 'heap, A: Allocator> EvalContext<'ctx, 'heap, A> { Self { env, + interner, bodies, execution, live_out: LiveOut(live_out), diff --git a/libs/@local/hashql/eval/src/lib.rs b/libs/@local/hashql/eval/src/lib.rs index 430a3aa0cc8..abb9b449033 100644 --- a/libs/@local/hashql/eval/src/lib.rs +++ b/libs/@local/hashql/eval/src/lib.rs @@ -14,7 +14,9 @@ assert_matches, allocator_api, iter_array_chunks, - maybe_uninit_fill + maybe_uninit_fill, + impl_trait_in_assoc_type, + try_blocks )] extern crate alloc; @@ -22,6 +24,7 @@ pub mod context; pub mod error; #[cfg(feature = "graph")] pub mod graph; +pub mod orchestrator; pub mod postgres; #[cfg(test)] diff --git a/libs/@local/hashql/eval/src/orchestrator/codec/decode/mod.rs b/libs/@local/hashql/eval/src/orchestrator/codec/decode/mod.rs new file mode 100644 index 00000000000..87257702b47 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/codec/decode/mod.rs @@ -0,0 +1,406 @@ +use alloc::{rc::Rc, vec}; +use core::alloc::Allocator; + +use hashql_core::{ + algorithms::co_sort, + heap::{CollectIn as _, FromIn as _}, + r#type::{ + TypeId, + environment::Environment, + kind::{Apply, Generic, OpaqueType, PrimitiveType, StructType, TupleType, TypeKind}, + }, +}; +use hashql_mir::interpret::value::{self, Value}; + +use super::{JsonValueKind, JsonValueRef}; +use crate::{ + orchestrator::{ + Indexed, + error::{BridgeError, DecodeError}, + }, + postgres::ColumnDescriptor, +}; + +#[cfg(test)] +mod tests; + +/// Type-directed JSON deserializer that converts column values into interpreter +/// [`Value`]s. +/// +/// Walks the HashQL type tree to determine how each JSON node should be +/// interpreted: primitives map directly, structs expect JSON objects with +/// matching keys, tuples expect arrays of the correct length, unions try each +/// variant in order, and opaque types wrap their inner representation. +/// +/// When the type is unknown ([`Param`], [`Infer`], [`Unknown`]), falls back to +/// [`decode_unknown`](Self::decode_unknown), which uses JSON structure alone +/// (objects become structs or dicts, arrays become lists, etc.). +/// +/// [`Value`]: hashql_mir::interpret::value::Value +/// [`Param`]: hashql_core::type_::kind::TypeKind::Param +/// [`Infer`]: hashql_core::type_::kind::TypeKind::Infer +/// [`Unknown`]: hashql_core::type_::kind::TypeKind::Unknown +pub struct Decoder<'env, 'heap, A> { + env: &'env Environment<'heap>, + interner: &'env hashql_mir::intern::Interner<'heap>, + + alloc: A, +} + +impl<'env, 'heap, A: Allocator> Decoder<'env, 'heap, A> { + pub const fn new( + env: &'env Environment<'heap>, + interner: &'env hashql_mir::intern::Interner<'heap>, + alloc: A, + ) -> Self { + Self { + env, + interner, + alloc, + } + } + + fn decode_unknown(&self, value: JsonValueRef<'_>) -> Result, DecodeError<'heap>> + where + A: Clone, + { + match value { + JsonValueRef::Null => Ok(Value::Unit), + JsonValueRef::Bool(value) => Ok(Value::Integer(value::Int::from(value))), + JsonValueRef::Number(number) => { + if let Some(value) = number.as_i128() { + Ok(Value::Integer(value::Int::from(value))) + } else { + let value = number + .as_f64() + .ok_or(DecodeError::NumberOutOfRange { expected: None })?; + + Ok(Value::Number(value::Num::from(value))) + } + } + JsonValueRef::String(string) => { + let value = value::Str::from(Rc::from_in(string, self.alloc.clone())); + Ok(Value::String(value)) + } + JsonValueRef::Array(values) => { + // We default in the output to **lists** not tuples. Very important distinction + let mut output = value::List::new(); + + for element in values { + output.push_back(self.decode_unknown(element.into())?); + } + + Ok(Value::List(output)) + } + JsonValueRef::Object(map) => { + if !map.keys().all(|key| { + // Mirrors the implementation of `BaseUrl` parse validation. + if key.len() < 2048 + && let Ok(url) = url::Url::parse(key) + && matches!(url.scheme(), "http" | "https") + && !url.cannot_be_a_base() + && key.ends_with('/') + { + true + } else { + false + } + }) { + let mut dict = value::Dict::new(); + + for (key, value) in map { + let key = self.decode_unknown(JsonValueRef::String(key))?; + let value = self.decode_unknown(value.into())?; + + dict.insert(key, value); + } + + return Ok(Value::Dict(dict)); + } + + let mut fields = Vec::with_capacity_in(map.len(), self.alloc.clone()); + let mut values = Vec::with_capacity_in(map.len(), self.alloc.clone()); + + for (key, value) in map { + let key = self.env.heap.intern_symbol(key); + let value = self.decode_unknown(value.into())?; + + fields.push(key); + values.push(value); + } + + co_sort(&mut fields, &mut values); + let fields = self.interner.symbols.intern_slice(&fields); + + value::Struct::new(fields, values) + .map(Value::Struct) + .ok_or(DecodeError::MalformedConstruction { expected: None }) + } + } + } + + /// Deserializes a JSON value into a typed [`Value`] guided by `type_id`. + /// + /// Recursively walks the type tree: opaque types wrap their inner + /// representation, structs expect JSON objects with matching keys, tuples + /// expect arrays of the correct length, unions try each variant in + /// declaration order, and primitives require exact JSON kind matches. + /// + /// # Errors + /// + /// Returns a [`DecodeError`] when the JSON shape does not match the + /// expected type (wrong kind, missing fields, length mismatches, etc.) + /// or when an unrepresentable type (intersection, closure, never) + /// reaches the decoder. + /// + /// [`Value`]: hashql_mir::interpret::value::Value + #[expect(clippy::too_many_lines)] + pub fn decode( + &self, + type_id: TypeId, + value: JsonValueRef<'_>, + ) -> Result, DecodeError<'heap>> + where + A: Clone, + { + let r#type = self.env.r#type(type_id); + + match r#type.kind { + &TypeKind::Opaque(OpaqueType { name, repr }) => { + let value = self.decode(repr, value)?; + + Ok(Value::Opaque(value::Opaque::new( + name, + Rc::new_in(value, self.alloc.clone()), + ))) + } + TypeKind::Primitive(primitive_type) => match (primitive_type, value) { + (PrimitiveType::Number, JsonValueRef::Number(number)) => { + number.as_f64().map(From::from).map(Value::Number).ok_or( + DecodeError::NumberOutOfRange { + expected: Some(type_id), + }, + ) + } + (PrimitiveType::Integer, JsonValueRef::Number(number)) + if let Some(value) = number.as_i128() => + { + Ok(Value::Integer(value::Int::from(value))) + } + (PrimitiveType::String, JsonValueRef::String(string)) => { + let value = value::Str::from(Rc::from_in(string, self.alloc.clone())); + Ok(Value::String(value)) + } + (PrimitiveType::Null, JsonValueRef::Null) => Ok(Value::Unit), + (PrimitiveType::Boolean, JsonValueRef::Bool(value)) => { + Ok(Value::Integer(value::Int::from(value))) + } + ( + PrimitiveType::Number, + JsonValueRef::Null + | JsonValueRef::Bool(_) + | JsonValueRef::String(_) + | JsonValueRef::Array(_) + | JsonValueRef::Object(_), + ) + | ( + PrimitiveType::Integer, + JsonValueRef::Null + | JsonValueRef::Bool(_) + | JsonValueRef::Number(_) + | JsonValueRef::String(_) + | JsonValueRef::Array(_) + | JsonValueRef::Object(_), + ) + | ( + PrimitiveType::String, + JsonValueRef::Null + | JsonValueRef::Bool(_) + | JsonValueRef::Number(_) + | JsonValueRef::Array(_) + | JsonValueRef::Object(_), + ) + | ( + PrimitiveType::Null, + JsonValueRef::Bool(_) + | JsonValueRef::Number(_) + | JsonValueRef::String(_) + | JsonValueRef::Array(_) + | JsonValueRef::Object(_), + ) + | ( + PrimitiveType::Boolean, + JsonValueRef::Null + | JsonValueRef::Number(_) + | JsonValueRef::String(_) + | JsonValueRef::Array(_) + | JsonValueRef::Object(_), + ) => Err(DecodeError::TypeMismatch { + expected: type_id, + received: JsonValueKind::from(value), + }), + }, + TypeKind::Struct(StructType { fields }) => { + let JsonValueRef::Object(object) = value else { + return Err(DecodeError::TypeMismatch { + expected: type_id, + received: JsonValueKind::from(value), + }); + }; + + if object.len() != fields.len() { + return Err(DecodeError::StructLengthMismatch { + expected: type_id, + expected_length: fields.len(), + received_length: object.len(), + }); + } + + for field in fields.iter() { + if !object.contains_key(field.name.as_str()) { + return Err(DecodeError::MissingField { + expected: type_id, + field: field.name, + }); + } + } + + let names: Vec<_, A> = fields + .iter() + .map(|field| field.name) + .collect_in(self.alloc.clone()); + let names = self.interner.symbols.intern_slice(&names); + let mut values = vec::from_elem_in(Value::Unit, object.len(), self.alloc.clone()); + + // We assume the struct is closed. The length check and per-field + // check above guarantee a bijection between JSON keys and type + // fields, so the position lookup cannot fail. + for (name, value) in object { + let field = fields + .iter() + .position(|field| field.name.as_str() == name) + .unwrap_or_else(|| unreachable!()); + + values[field] = self.decode(fields[field].value, value.into())?; + } + + value::Struct::new(names, values).map(Value::Struct).ok_or( + DecodeError::MalformedConstruction { + expected: Some(type_id), + }, + ) + } + TypeKind::Tuple(TupleType { fields }) => { + let JsonValueRef::Array(array) = value else { + return Err(DecodeError::TypeMismatch { + expected: type_id, + received: JsonValueKind::from(value), + }); + }; + + if array.len() != fields.len() { + return Err(DecodeError::TupleLengthMismatch { + expected: type_id, + expected_length: fields.len(), + received_length: array.len(), + }); + } + + let mut values: Vec<_, A> = Vec::with_capacity_in(array.len(), self.alloc.clone()); + for (element, &field) in array.iter().zip(fields) { + values.push(self.decode(field, element.into())?); + } + + value::Tuple::new(values).map(Value::Tuple).ok_or( + DecodeError::MalformedConstruction { + expected: Some(type_id), + }, + ) + } + + TypeKind::Union(union_type) => { + // Go through *each variant* and try to find the first one that matches + for &variant in &union_type.variants { + if let Ok(value) = self.decode(variant, value) { + return Ok(value); + } + } + + Err(DecodeError::NoMatchingVariant { + expected: type_id, + received: JsonValueKind::from(value), + }) + } + + TypeKind::Intrinsic(hashql_core::r#type::kind::IntrinsicType::List(list)) => { + let JsonValueRef::Array(array) = value else { + return Err(DecodeError::TypeMismatch { + expected: type_id, + received: JsonValueKind::from(value), + }); + }; + + let mut output = value::List::new(); + + for element in array { + output.push_back(self.decode(list.element, element.into())?); + } + + Ok(Value::List(output)) + } + TypeKind::Intrinsic(hashql_core::r#type::kind::IntrinsicType::Dict(dict)) => { + let JsonValueRef::Object(object) = value else { + return Err(DecodeError::TypeMismatch { + expected: type_id, + received: JsonValueKind::from(value), + }); + }; + + let mut output = value::Dict::new(); + + for (key, value) in object { + output.insert( + self.decode(dict.key, JsonValueRef::String(key))?, + self.decode(dict.value, value.into())?, + ); + } + + Ok(Value::Dict(output)) + } + + TypeKind::Intersection(_) => Err(DecodeError::IntersectionType { type_id }), + + &TypeKind::Apply(Apply { + base, + substitutions: _, + }) + | &TypeKind::Generic(Generic { base, arguments: _ }) => self.decode(base, value), + TypeKind::Closure(_) => Err(DecodeError::ClosureType { type_id }), + TypeKind::Never => Err(DecodeError::NeverType { type_id }), + + // We're flying free here, issue a warning, and just try to deserialize using the + // old tactics + // TODO: issue a warning + TypeKind::Param(_) | TypeKind::Infer(_) | TypeKind::Unknown => { + self.decode_unknown(value) + } + } + } + + /// Deserializes a column value into the expected type, or returns an error. + /// + /// The `column` parameter is only used for error reporting; + /// it identifies which result column failed to deserialize. + pub(crate) fn try_decode( + &self, + r#type: TypeId, + value: JsonValueRef<'_>, + column: Indexed, + ) -> Result, BridgeError<'heap>> + where + A: Clone, + { + self.decode(r#type, value) + .map_err(|source| BridgeError::ValueDeserialization { column, source }) + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/codec/decode/tests.rs b/libs/@local/hashql/eval/src/orchestrator/codec/decode/tests.rs new file mode 100644 index 00000000000..d632d0eec36 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/codec/decode/tests.rs @@ -0,0 +1,490 @@ +use alloc::{alloc::Global, rc::Rc}; +use core::assert_matches; + +use hashql_core::{ + heap::Heap, + symbol::sym, + r#type::{TypeId, builder::TypeBuilder, environment::Environment}, +}; +use hashql_mir::{ + intern::Interner, + interpret::value::{self, Value}, +}; + +use super::{DecodeError, Decoder, JsonValueRef}; + +fn str_value(content: &str) -> Value<'_, Global> { + Value::String(value::Str::from(Rc::::from(content))) +} + +fn decoder<'env, 'heap>( + env: &'env Environment<'heap>, + interner: &'env Interner<'heap>, +) -> Decoder<'env, 'heap, Global> { + Decoder::new(env, interner, Global) +} + +#[test] +fn primitive_string() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let result = decoder + .decode(types.string(), JsonValueRef::String("hello")) + .expect("should succeed"); + assert_eq!(result, str_value("hello")); +} + +#[test] +fn primitive_integer() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let number = serde_json::Number::from(42); + let result = decoder + .decode(types.integer(), JsonValueRef::Number(&number)) + .expect("should succeed"); + assert_eq!(result, Value::Integer(value::Int::from(42_i128))); +} + +#[test] +fn primitive_number() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let number = serde_json::Number::from_f64(2.72).expect("valid f64"); + let result = decoder + .decode(types.number(), JsonValueRef::Number(&number)) + .expect("should decode number"); + assert_eq!(result, Value::Number(value::Num::from(2.72))); +} + +#[test] +fn primitive_boolean_true() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let result = decoder + .decode(types.boolean(), JsonValueRef::Bool(true)) + .expect("should succeed"); + let Value::Integer(int) = result else { + panic!("expected Value::Integer, got {result:?}"); + }; + assert_eq!(int.as_bool(), Some(true)); +} + +#[test] +fn primitive_boolean_false() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let result = decoder + .decode(types.boolean(), JsonValueRef::Bool(false)) + .expect("should succeed"); + let Value::Integer(int) = result else { + panic!("expected Value::Integer, got {result:?}"); + }; + assert_eq!(int.as_bool(), Some(false)); +} + +#[test] +fn primitive_null() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let result = decoder + .decode(types.null(), JsonValueRef::Null) + .expect("should succeed"); + assert_eq!(result, Value::Unit); +} + +#[test] +fn primitive_type_mismatch() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let result = decoder.decode(types.integer(), JsonValueRef::String("hello")); + assert_matches!(result, Err(DecodeError::TypeMismatch { .. })); +} + +#[test] +fn struct_matching_fields() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let struct_type = types.r#struct([("a", types.integer()), ("b", types.string())]); + + let mut object = serde_json::Map::new(); + object.insert("a".to_owned(), serde_json::Value::Number(1.into())); + object.insert("b".to_owned(), serde_json::Value::String("two".to_owned())); + + let result = decoder + .decode(struct_type, JsonValueRef::Object(&object)) + .expect("should succeed"); + let Value::Struct(fields) = &result else { + panic!("expected Value::Struct, got {result:?}"); + }; + assert_eq!(fields.len(), 2); + assert_eq!(fields.values()[0], Value::Integer(value::Int::from(1_i128))); + assert_eq!(fields.values()[1], str_value("two")); +} + +#[test] +fn struct_missing_field() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let struct_type = types.r#struct([("a", types.integer()), ("b", types.string())]); + + let mut object = serde_json::Map::new(); + object.insert("a".to_owned(), serde_json::Value::Number(1.into())); + + let result = decoder.decode(struct_type, JsonValueRef::Object(&object)); + assert_matches!(result, Err(DecodeError::StructLengthMismatch { .. })); +} + +#[test] +fn struct_extra_field() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let struct_type = types.r#struct([("a", types.integer())]); + + let mut object = serde_json::Map::new(); + object.insert("a".to_owned(), serde_json::Value::Number(1.into())); + object.insert("b".to_owned(), serde_json::Value::Number(2.into())); + + let result = decoder.decode(struct_type, JsonValueRef::Object(&object)); + assert_matches!(result, Err(DecodeError::StructLengthMismatch { .. })); +} + +#[test] +fn tuple_correct_length() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let tuple_type = types.tuple([types.integer(), types.string()]); + + let array = [ + serde_json::Value::Number(1.into()), + serde_json::Value::String("two".to_owned()), + ]; + + let result = decoder + .decode(tuple_type, JsonValueRef::Array(&array)) + .expect("should succeed"); + let Value::Tuple(elements) = &result else { + panic!("expected Value::Tuple, got {result:?}"); + }; + assert_eq!(elements.len().get(), 2); + assert_eq!( + elements.values()[0], + Value::Integer(value::Int::from(1_i128)) + ); + assert_eq!(elements.values()[1], str_value("two")); +} + +#[test] +fn tuple_length_mismatch() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let tuple_type = types.tuple([types.integer(), types.string()]); + let array = [serde_json::Value::Number(1.into())]; + + let result = decoder.decode(tuple_type, JsonValueRef::Array(&array)); + assert_matches!(result, Err(DecodeError::TupleLengthMismatch { .. })); +} + +#[test] +fn union_first_variant_matches() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let union_type = types.union([types.integer(), types.string()]); + let number = serde_json::Number::from(42); + + let result = decoder + .decode(union_type, JsonValueRef::Number(&number)) + .expect("should succeed"); + assert_eq!(result, Value::Integer(value::Int::from(42_i128))); +} + +#[test] +fn union_second_variant_matches() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let union_type = types.union([types.integer(), types.string()]); + + let result = decoder + .decode(union_type, JsonValueRef::String("hello")) + .expect("should succeed"); + assert_eq!(result, str_value("hello")); +} + +#[test] +fn union_no_variant_matches() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let union_type = types.union([types.integer(), types.string()]); + + let result = decoder.decode(union_type, JsonValueRef::Bool(true)); + assert_matches!(result, Err(DecodeError::NoMatchingVariant { .. })); +} + +#[test] +fn opaque_wraps_inner() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let opaque_type = types.opaque(sym::path::Entity, types.string()); + + let result = decoder + .decode(opaque_type, JsonValueRef::String("inner")) + .expect("should succeed"); + let Value::Opaque(opaque) = &result else { + panic!("expected Value::Opaque, got {result:?}"); + }; + assert_eq!(opaque.name(), sym::path::Entity); + assert_eq!(*opaque.value(), str_value("inner")); +} + +#[test] +fn list_intrinsic() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let list_type = types.list(types.integer()); + let array = [ + serde_json::Value::Number(1.into()), + serde_json::Value::Number(2.into()), + ]; + + let result = decoder + .decode(list_type, JsonValueRef::Array(&array)) + .expect("should succeed"); + let Value::List(list) = &result else { + panic!("expected Value::List, got {result:?}"); + }; + assert_eq!(list.len(), 2); + let items: Vec<_> = list.iter().collect(); + assert_eq!(items[0], &Value::Integer(value::Int::from(1_i128))); + assert_eq!(items[1], &Value::Integer(value::Int::from(2_i128))); +} + +#[test] +fn dict_intrinsic() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let dict_type = types.dict(types.string(), types.integer()); + + let mut object = serde_json::Map::new(); + object.insert("x".to_owned(), serde_json::Value::Number(1.into())); + object.insert("y".to_owned(), serde_json::Value::Number(2.into())); + + let result = decoder + .decode(dict_type, JsonValueRef::Object(&object)) + .expect("should succeed"); + let Value::Dict(dict) = &result else { + panic!("expected Value::Dict, got {result:?}"); + }; + assert_eq!(dict.len(), 2); + assert_eq!( + dict.get(&str_value("x")), + Some(&Value::Integer(value::Int::from(1_i128))) + ); + assert_eq!( + dict.get(&str_value("y")), + Some(&Value::Integer(value::Int::from(2_i128))) + ); +} + +#[test] +fn intersection_type_error() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let intersection_type = types.intersection([types.integer(), types.string()]); + + let result = decoder.decode(intersection_type, JsonValueRef::Null); + assert_matches!(result, Err(DecodeError::IntersectionType { .. })); +} + +#[test] +fn closure_type_error() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let closure_type = types.closure([] as [TypeId; 0], types.integer()); + + let result = decoder.decode(closure_type, JsonValueRef::Null); + assert_matches!(result, Err(DecodeError::ClosureType { .. })); +} + +#[test] +fn never_type_error() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let result = decoder.decode(types.never(), JsonValueRef::Null); + assert_matches!(result, Err(DecodeError::NeverType { .. })); +} + +#[test] +fn unknown_type_integer_fallback() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let number = serde_json::Number::from(42); + let result = decoder + .decode(types.unknown(), JsonValueRef::Number(&number)) + .expect("should succeed"); + assert_eq!(result, Value::Integer(value::Int::from(42_i128))); +} + +#[test] +fn unknown_type_float_fallback() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let number = serde_json::Number::from_f64(2.72).expect("should succeed"); + let result = decoder + .decode(types.unknown(), JsonValueRef::Number(&number)) + .expect("should succeed"); + assert_eq!(result, Value::Number(value::Num::from(2.72))); +} + +#[test] +fn unknown_type_array_becomes_list() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let array = [serde_json::Value::Number(1.into())]; + let result = decoder + .decode(types.unknown(), JsonValueRef::Array(&array)) + .expect("should succeed"); + let Value::List(list) = &result else { + panic!("expected Value::List, got {result:?}"); + }; + assert_eq!(list.len(), 1); + let items: Vec<_> = list.iter().collect(); + assert_eq!(items[0], &Value::Integer(value::Int::from(1_i128))); +} + +#[test] +fn unknown_type_non_url_object_becomes_dict() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let mut object = serde_json::Map::new(); + object.insert("key".to_owned(), serde_json::Value::Number(1.into())); + + let result = decoder + .decode(types.unknown(), JsonValueRef::Object(&object)) + .expect("should succeed"); + let Value::Dict(_) = &result else { + panic!("expected Value::Dict, got {result:?}"); + }; +} + +#[test] +fn unknown_type_url_object_becomes_struct() { + let heap = Heap::new(); + let env = Environment::new(&heap); + let interner = Interner::new(&heap); + let types = TypeBuilder::synthetic(&env); + let decoder = decoder(&env, &interner); + + let mut object = serde_json::Map::new(); + object.insert( + "https://example.com/types/property-type/name/".to_owned(), + serde_json::Value::String("Alice".to_owned()), + ); + + let result = decoder + .decode(types.unknown(), JsonValueRef::Object(&object)) + .expect("should succeed"); + let Value::Struct(fields) = &result else { + panic!("expected Value::Struct, got {result:?}"); + }; + assert_eq!(fields.len(), 1); + assert_eq!(fields.values()[0], str_value("Alice")); +} diff --git a/libs/@local/hashql/eval/src/orchestrator/codec/encode/mod.rs b/libs/@local/hashql/eval/src/orchestrator/codec/encode/mod.rs new file mode 100644 index 00000000000..a6d04f58cd1 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/codec/encode/mod.rs @@ -0,0 +1,269 @@ +use core::{alloc::Allocator, error, ops::Bound}; + +use bytes::BytesMut; +use hashql_core::{symbol::Symbol, value::Primitive}; +use hashql_mir::{ + body::{local::Local, place::FieldIndex}, + interpret::{ + Inputs, RuntimeError, + suspension::{TemporalAxesInterval, TemporalInterval, Timestamp}, + value::{Int, Value}, + }, +}; +use postgres_protocol::types::RangeBound; +use postgres_types::{Json, ToSql, accepts, to_sql_checked}; +use serde::{ + Serialize, + ser::{SerializeMap as _, SerializeSeq as _}, +}; +use serde_json::value::RawValue; + +use super::{Postgres, Serde}; +use crate::{ + orchestrator::error::BridgeError, + postgres::{ParameterValue, TemporalAxis}, +}; + +#[cfg(test)] +mod tests; + +// timestamp is in ms +impl ToSql for Postgres { + accepts!(TIMESTAMPTZ); + + to_sql_checked!(); + + #[expect(clippy::cast_possible_truncation)] + fn to_sql( + &self, + _: &postgres_types::Type, + out: &mut BytesMut, + ) -> Result> + where + Self: Sized, + { + // The value has been determined via `Date.UTC(2000, 0, 1)` in JS, and is the same as the one that jdbc uses: https://jdbc.postgresql.org/documentation/publicapi/constant-values.html + const BASE: i128 = 946_684_800_000; + + // Our timestamp is milliseconds since Unix epoch (1970-01-01). + // Postgres stores microseconds since 2000-01-01. + let value = ((Int::from(self.0).as_int() - BASE) * 1000) as i64; + + postgres_protocol::types::timestamp_to_sql(value, out); + Ok(postgres_types::IsNull::No) + } +} + +impl ToSql for Postgres { + accepts!(TSTZ_RANGE); + + to_sql_checked!(); + + fn to_sql( + &self, + _: &postgres_types::Type, + out: &mut BytesMut, + ) -> Result> + where + Self: Sized, + { + fn bound_to_sql( + bound: Bound, + buf: &mut BytesMut, + ) -> Result, Box> + { + Ok(match bound { + Bound::Unbounded => RangeBound::Unbounded, + Bound::Included(timestamp) => { + Postgres(timestamp).to_sql(&postgres_types::Type::TIMESTAMPTZ, buf)?; + RangeBound::Inclusive(postgres_protocol::IsNull::No) + } + Bound::Excluded(timestamp) => { + Postgres(timestamp).to_sql(&postgres_types::Type::TIMESTAMPTZ, buf)?; + RangeBound::Exclusive(postgres_protocol::IsNull::No) + } + }) + } + + postgres_protocol::types::range_to_sql( + |buf| bound_to_sql(self.0.start, buf), + |buf| bound_to_sql(self.0.end, buf), + out, + )?; + + Ok(postgres_types::IsNull::No) + } +} + +impl ToSql for Postgres> { + to_sql_checked!(); + + fn to_sql( + &self, + ty: &postgres_types::Type, + out: &mut BytesMut, + ) -> Result> + where + Self: Sized, + { + self.0.as_str().to_sql(ty, out) + } + + fn accepts(ty: &postgres_types::Type) -> bool + where + Self: Sized, + { + <&str>::accepts(ty) + } +} + +impl Serialize for Serde<&Value<'_, A>> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match &self.0 { + Value::Unit => serializer.serialize_unit(), + Value::Integer(int) => { + if let Some(bool) = int.as_bool() { + serializer.serialize_bool(bool) + } else { + serializer.serialize_i128(int.as_int()) + } + } + Value::Number(num) => serializer.serialize_f64(num.as_f64()), + Value::String(str) => serializer.serialize_str(str.as_str()), + Value::Pointer(_) => Err(serde::ser::Error::custom("pointer value not supported")), + Value::Opaque(opaque) => Self(opaque.value()).serialize(serializer), + Value::Struct(r#struct) => { + let mut inner = serializer.serialize_map(Some(r#struct.len()))?; + + for (field, value) in r#struct.fields().iter().zip(r#struct.values()) { + inner.serialize_entry(&field.as_str(), &Self(value))?; + } + + inner.end() + } + Value::Tuple(tuple) => { + let mut inner = serializer.serialize_seq(Some(tuple.len().get()))?; + + for value in tuple.values() { + inner.serialize_element(&Self(value))?; + } + + inner.end() + } + Value::List(list) => { + let mut inner = serializer.serialize_seq(Some(list.len()))?; + + for value in list.iter() { + inner.serialize_element(&Self(value))?; + } + + inner.end() + } + Value::Dict(dict) => { + let mut inner = serializer.serialize_map(Some(dict.len()))?; + + for (key, value) in dict.iter() { + inner.serialize_entry(&Self(key), &Self(value))?; + } + + inner.end() + } + } + } +} + +/// Serializes a runtime [`Value`] to a JSON [`RawValue`] suitable for use as +/// a PostgreSQL `JSONB` parameter. +/// +/// # Errors +/// +/// Returns [`BridgeError::ValueSerialization`] if the value contains +/// unsupported shapes (e.g. pointer values). +/// +/// [`Value`]: hashql_mir::interpret::value::Value +pub(crate) fn serialize_value<'heap, V: Allocator>( + value: &Value<'heap, V>, +) -> Result>, BridgeError<'heap>> { + let string = serde_json::to_string(&Serde(value)) + .map_err(|source| BridgeError::ValueSerialization { source })?; + + RawValue::from_string(string) + .map_err(|source| BridgeError::ValueSerialization { source }) + .map(Json) +} + +/// Encodes a single query [`Parameter`] into a boxed [`ToSql`] value ready +/// for the PostgreSQL wire protocol. +/// +/// Handles all parameter variants: user inputs (serialized to JSON), literal +/// integers and primitives, interned symbols, captured environment values, +/// and temporal axis intervals. +/// +/// # Errors +/// +/// Returns a [`RuntimeError`] if environment lookup fails or value +/// serialization fails. +/// +/// [`ToSql`]: postgres_types::ToSql +pub(crate) fn encode_parameter_in<'ctx, 'heap, V: Allocator + 'ctx, A: Allocator>( + parameter: &ParameterValue<'heap>, + inputs: &'ctx Inputs<'heap, impl Allocator>, + temporal_axes: &TemporalAxesInterval, + env: impl FnOnce( + Local, + FieldIndex, + ) -> Result<&'ctx Value<'heap, V>, RuntimeError<'heap, BridgeError<'heap>, V>>, + alloc: A, +) -> Result, RuntimeError<'heap, BridgeError<'heap>, V>> { + match parameter { + &ParameterValue::Input(symbol) => { + let value = inputs + .get(symbol) + .map(|value| serialize_value(value).map_err(RuntimeError::Suspension)) + .transpose()?; + Ok(Box::new_in(value, alloc)) + } + ParameterValue::Int(int) => { + let int = int.as_int(); + if let Ok(int) = i64::try_from(int) { + Ok(Box::new_in(int, alloc)) + } else { + // Too large to be represented as an i64, instead use JSONB + Ok(Box::new_in(Json(int), alloc)) + } + } + ParameterValue::Primitive(primitive) => match primitive { + Primitive::Null => Ok(Box::new_in(None::>, alloc)), + &Primitive::Boolean(value) => Ok(Box::new_in(value, alloc)), + Primitive::Float(float) => Ok(Box::new_in(float.as_f64(), alloc)), + Primitive::Integer(integer) => { + if let Some(int) = integer.as_i64() { + Ok(Box::new_in(int, alloc)) + } else { + // Too large to be represented as an i64, because that means we also + // **cannot** serialize it via serde, we fallback to + // using floats. + Ok(Box::new_in(integer.as_f64(), alloc)) + } + } + Primitive::String(value) => Ok(Box::new_in(Box::::from(value.as_str()), alloc)), + }, + &ParameterValue::Symbol(symbol) => Ok(Box::new_in(Postgres(symbol), alloc)), + &ParameterValue::Env(local, field_index) => { + let value = env(local, field_index)?; + let serialized = serialize_value(value).map_err(RuntimeError::Suspension)?; + Ok(Box::new_in(serialized, alloc) as Box) + } + ParameterValue::TemporalAxis(TemporalAxis::Decision) => Ok(Box::new_in( + Postgres(temporal_axes.decision_time.clone()), + alloc, + )), + ParameterValue::TemporalAxis(TemporalAxis::Transaction) => Ok(Box::new_in( + Postgres(temporal_axes.transaction_time.clone()), + alloc, + )), + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/codec/encode/tests.rs b/libs/@local/hashql/eval/src/orchestrator/codec/encode/tests.rs new file mode 100644 index 00000000000..3c8f326b922 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/codec/encode/tests.rs @@ -0,0 +1,223 @@ +use alloc::{alloc::Global, rc::Rc}; +use core::ops::Bound; + +use bytes::BytesMut; +use hashql_core::heap::Heap; +use hashql_mir::{ + intern::Interner, + interpret::{ + suspension::{TemporalInterval, Timestamp}, + value::{self, Value}, + }, +}; +use postgres_types::ToSql as _; + +use super::{Postgres, Serde, serialize_value}; + +fn to_json_string(value: &Value<'_, Global>) -> String { + serde_json::to_string(&Serde(value)).expect("should succeed") +} + +#[test] +fn serialize_boolean_true() { + let value = Value::::Integer(value::Int::from(true)); + assert_eq!(to_json_string(&value), "true"); +} + +#[test] +fn serialize_boolean_false() { + let value = Value::::Integer(value::Int::from(false)); + assert_eq!(to_json_string(&value), "false"); +} + +#[test] +fn serialize_integer() { + let value = Value::::Integer(value::Int::from(42_i128)); + assert_eq!(to_json_string(&value), "42"); +} + +#[test] +fn serialize_integer_one_not_as_bool() { + // Int::from(1_i32) has size=128, not size=1. + // Must serialize as numeric 1, not as boolean true. + let value = Value::::Integer(value::Int::from(1_i32)); + assert_eq!(to_json_string(&value), "1"); +} + +#[test] +fn serialize_integer_zero_not_as_bool() { + // Int::from(0_i32) has size=128, not size=1. + // Must serialize as numeric 0, not as boolean false. + let value = Value::::Integer(value::Int::from(0_i32)); + assert_eq!(to_json_string(&value), "0"); +} + +#[test] +fn serialize_number() { + let value = Value::::Number(value::Num::from(2.72)); + assert_eq!(to_json_string(&value), "2.72"); +} + +#[test] +fn serialize_string() { + let value = Value::::String(value::Str::from(Rc::::from("hello"))); + assert_eq!(to_json_string(&value), "\"hello\""); +} + +#[test] +fn serialize_unit() { + let value = Value::::Unit; + assert_eq!(to_json_string(&value), "null"); +} + +#[test] +fn serialize_opaque_unwraps() { + let inner = Value::::Integer(value::Int::from(42_i128)); + let value = Value::Opaque(value::Opaque::new( + hashql_core::symbol::sym::path::Entity, + Rc::new(inner), + )); + assert_eq!(to_json_string(&value), "42"); +} + +#[test] +fn serialize_tuple_as_array() { + let tuple = value::Tuple::new(alloc::vec![ + Value::::Integer(value::Int::from(1_i128)), + Value::Integer(value::Int::from(2_i128)), + ]) + .expect("should succeed"); + + let value = Value::Tuple(tuple); + assert_eq!(to_json_string(&value), "[1,2]"); +} + +#[test] +fn serialize_list() { + let mut list = value::List::::new(); + list.push_back(Value::Integer(value::Int::from(1_i128))); + list.push_back(Value::Integer(value::Int::from(2_i128))); + + let value = Value::List(list); + assert_eq!(to_json_string(&value), "[1,2]"); +} + +#[test] +fn serialize_struct_as_map() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + + let fields = interner + .symbols + .intern_slice(&[heap.intern_symbol("name"), heap.intern_symbol("value")]); + + let values = alloc::vec![ + Value::::String(value::Str::from(Rc::::from("Alice"))), + Value::Integer(value::Int::from(42_i128)), + ]; + + let struct_value = value::Struct::new(fields, values).expect("should succeed"); + let value = Value::Struct(struct_value); + let json = to_json_string(&value); + + let parsed: serde_json::Value = serde_json::from_str(&json).expect("should succeed"); + let object = parsed.as_object().expect("should be an object"); + assert_eq!(object.len(), 2); + assert_eq!(parsed["name"], "Alice"); + assert_eq!(parsed["value"], 42); +} + +#[test] +fn serialize_pointer_fails() { + let value = Value::::Pointer(value::Ptr::new(hashql_mir::def::DefId::new(0))); + let result = serde_json::to_string(&Serde(&value)); + assert!(result.is_err(), "pointer values should not be serializable"); +} + +#[test] +fn serialize_value_produces_raw_json() { + let value = Value::::Integer(value::Int::from(42_i128)); + let result = serialize_value(&value).expect("should succeed"); + assert_eq!(result.0.get(), "42"); +} + +#[test] +fn timestamp_to_sql_known_epoch() { + let mut buffer = BytesMut::new(); + + // 2000-01-01T00:00:00Z in milliseconds since Unix epoch = 946684800000 + let timestamp = Timestamp::from(value::Int::from(946_684_800_000_i128)); + Postgres(timestamp) + .to_sql(&postgres_types::Type::TIMESTAMPTZ, &mut buffer) + .expect("should succeed"); + + // Should encode as 0 microseconds since the postgres epoch (2000-01-01) + assert_eq!(buffer.len(), 8); + #[expect( + clippy::big_endian_bytes, + reason = "postgres wire format is big-endian" + )] + let encoded = i64::from_be_bytes(buffer[..8].try_into().expect("should succeed")); + assert_eq!(encoded, 0); +} + +#[test] +fn timestamp_to_sql_one_second_after_epoch() { + let mut buffer = BytesMut::new(); + + // 2000-01-01T00:00:01Z = 946684801000 ms since Unix epoch + let timestamp = Timestamp::from(value::Int::from(946_684_801_000_i128)); + Postgres(timestamp) + .to_sql(&postgres_types::Type::TIMESTAMPTZ, &mut buffer) + .expect("should succeed"); + + #[expect( + clippy::big_endian_bytes, + reason = "postgres wire format is big-endian" + )] + let encoded = i64::from_be_bytes(buffer[..8].try_into().expect("should succeed")); + // 1 second = 1_000_000 microseconds + assert_eq!(encoded, 1_000_000); +} + +#[test] +fn temporal_interval_point_encodes() { + let mut buffer = BytesMut::new(); + + let timestamp = Timestamp::from(value::Int::from(946_684_800_000_i128)); + let interval = TemporalInterval { + start: Bound::Included(timestamp), + end: Bound::Included(timestamp), + }; + + Postgres(interval) + .to_sql(&postgres_types::Type::TSTZ_RANGE, &mut buffer) + .expect("should succeed"); + + // Range wire format: 1 byte flags + lower bound (4 byte len + 8 byte data) + + // upper bound (4 byte len + 8 byte data) = 25 bytes for two inclusive timestamps + assert_eq!(buffer.len(), 25); + + // Flags byte: bit 1 (has lower) | bit 2 (has upper) | bit 2 (lower inclusive) | + // bit 3 (upper inclusive) = 0x06 for [inclusive, inclusive] + // (postgres_protocol range encoding details) + let flags = buffer[0]; + assert_ne!(flags, 0, "flags should indicate both bounds are present"); +} + +#[test] +fn temporal_interval_unbounded_encodes() { + let mut buffer = BytesMut::new(); + + let interval = TemporalInterval { + start: Bound::Unbounded, + end: Bound::Unbounded, + }; + + Postgres(interval) + .to_sql(&postgres_types::Type::TSTZ_RANGE, &mut buffer) + .expect("should succeed"); + + // Fully unbounded range: only 1 byte for flags + assert_eq!(buffer.len(), 1); +} diff --git a/libs/@local/hashql/eval/src/orchestrator/codec/mod.rs b/libs/@local/hashql/eval/src/orchestrator/codec/mod.rs new file mode 100644 index 00000000000..f3d0422b5d4 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/codec/mod.rs @@ -0,0 +1,103 @@ +//! JSON codec for converting between interpreter [`Value`]s and the PostgreSQL +//! wire format. +//! +//! - [`decode`]: deserializes JSON column values (from `tokio_postgres` rows) into typed +//! [`Value`]s, guided by the HashQL type system. +//! - [`encode`]: serializes runtime [`Value`]s and query parameters into forms that +//! `tokio_postgres` can send to the database (via [`ToSql`]). +//! +//! The [`JsonValueRef`] type provides a borrowed view over `serde_json::Value` +//! that avoids cloning during decode, while [`JsonValueKind`] is a data-free +//! tag used in error reporting. +//! +//! [`Value`]: hashql_mir::interpret::value::Value +//! [`ToSql`]: postgres_types::ToSql + +pub(crate) mod decode; +pub(crate) mod encode; + +pub use self::decode::Decoder; +pub use crate::orchestrator::error::DecodeError; + +/// Newtype wrapper that provides [`ToSql`](postgres_types::ToSql) +/// implementations for types that need custom PostgreSQL wire encoding. +#[derive(Debug)] +pub(crate) struct Postgres(pub T); + +/// Newtype wrapper that provides [`Serialize`](serde::Serialize) +/// implementations for types that need custom JSON serialization. +/// +/// Wrap a `&Value` in `Serde` to serialize it to JSON using the interpreter's +/// value representation rules: booleans serialize as JSON booleans (not +/// integers), opaques unwrap to their inner value, structs serialize as +/// objects with field names as keys. +#[derive(Debug)] +pub struct Serde(pub T); + +/// Borrowed view over a JSON value, avoiding clones during decode. +/// +/// Mirrors the variants of [`serde_json::Value`] but holds references +/// instead of owned data. Constructed from `&serde_json::Value` via the +/// [`From`] impl, or directly for single-typed columns (e.g. +/// `JsonValueRef::String(&str)` for a `TEXT` column). +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum JsonValueRef<'value> { + Null, + Bool(bool), + Number(&'value serde_json::Number), + String(&'value str), + Array(&'value [serde_json::Value]), + Object(&'value serde_json::Map), +} + +/// The kind of a JSON value, without carrying the actual data. +/// +/// Used in error reporting to describe what was received when a decode fails. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum JsonValueKind { + Null, + Bool, + Number, + String, + Array, + Object, +} + +impl JsonValueKind { + pub(crate) const fn as_str(self) -> &'static str { + match self { + Self::Null => "null", + Self::Bool => "boolean", + Self::Number => "number", + Self::String => "string", + Self::Array => "array", + Self::Object => "object", + } + } +} + +impl From> for JsonValueKind { + fn from(value: JsonValueRef<'_>) -> Self { + match value { + JsonValueRef::Null => Self::Null, + JsonValueRef::Bool(_) => Self::Bool, + JsonValueRef::Number(_) => Self::Number, + JsonValueRef::String(_) => Self::String, + JsonValueRef::Array(_) => Self::Array, + JsonValueRef::Object(_) => Self::Object, + } + } +} + +impl<'value> From<&'value serde_json::Value> for JsonValueRef<'value> { + fn from(value: &'value serde_json::Value) -> Self { + match value { + serde_json::Value::Null => JsonValueRef::Null, + &serde_json::Value::Bool(value) => JsonValueRef::Bool(value), + serde_json::Value::Number(number) => JsonValueRef::Number(number), + serde_json::Value::String(string) => JsonValueRef::String(string.as_str()), + serde_json::Value::Array(array) => JsonValueRef::Array(array), + serde_json::Value::Object(object) => JsonValueRef::Object(object), + } + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/error.rs b/libs/@local/hashql/eval/src/orchestrator/error.rs new file mode 100644 index 00000000000..b19b096192e --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/error.rs @@ -0,0 +1,718 @@ +//! Errors that occur while fulfilling [`GraphRead`] suspensions. +//! +//! These are internal runtime errors: failures in compiled query execution, +//! row decoding, or parameter encoding. The user wrote HashQL, not SQL; if +//! the bridge fails, it indicates a bug in the compiler or runtime. +//! +//! [`GraphRead`]: hashql_mir::body::terminator::GraphRead + +use alloc::string::String; + +use hashql_core::{ + pretty::{Formatter, RenderOptions}, + span::SpanId, + symbol::Symbol, + r#type::{TypeFormatter, TypeFormatterOptions, TypeId, environment::Environment}, +}; +use hashql_diagnostics::{ + Diagnostic, Label, category::TerminalDiagnosticCategory, diagnostic::Message, + severity::Severity, +}; +use hashql_mir::{ + body::{basic_block::BasicBlockId, local::Local}, + def::DefId, + interpret::error::{ + InterpretDiagnostic, InterpretDiagnosticCategory, SuspensionDiagnosticCategory, + }, +}; + +use super::{Indexed, codec::JsonValueKind}; +use crate::postgres::ColumnDescriptor; + +const QUERY_EXECUTION: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "query-execution", + name: "Query Execution", +}; + +const ROW_HYDRATION: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "row-hydration", + name: "Row Hydration", +}; + +const PARAMETER_ENCODING: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "parameter-encoding", + name: "Parameter Encoding", +}; + +const VALUE_DESERIALIZATION: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "value-deserialization", + name: "Value Deserialization", +}; + +const CONTINUATION_DESERIALIZATION: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "continuation-deserialization", + name: "Continuation Deserialization", +}; + +const INVALID_CONTINUATION_BLOCK_ID: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "invalid-continuation-block-id", + name: "Invalid Continuation Block ID", +}; + +const INVALID_CONTINUATION_LOCAL: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "invalid-continuation-local", + name: "Invalid Continuation Local", +}; + +const QUERY_LOOKUP: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "query-lookup", + name: "Query Lookup", +}; + +const INCOMPLETE_CONTINUATION: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "incomplete-continuation", + name: "Incomplete Continuation", +}; + +const MISSING_EXECUTION_RESIDUAL: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "missing-execution-residual", + name: "Missing Execution Residual", +}; + +const INVALID_FILTER_RETURN: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "invalid-filter-return", + name: "Invalid Filter Return", +}; + +const VALUE_SERIALIZATION: TerminalDiagnosticCategory = TerminalDiagnosticCategory { + id: "value-serialization", + name: "Value Serialization", +}; + +const fn category(terminal: &'static TerminalDiagnosticCategory) -> InterpretDiagnosticCategory { + InterpretDiagnosticCategory::Suspension(SuspensionDiagnosticCategory(terminal)) +} + +/// Errors that occur while decoding a JSON value into a typed [`Value`]. +/// +/// Each variant carries the leaf [`TypeId`] at the point of failure: the +/// specific type in the tree where decoding broke. The caller provides the +/// top-level type context (e.g. via a column descriptor), so the diagnostic +/// can show both *what* the column was supposed to produce and *where* in the +/// type tree it went wrong. +/// +/// [`Value`]: hashql_mir::interpret::value::Value +#[derive(Debug, Copy, Clone)] +pub enum DecodeError<'heap> { + /// The JSON value kind does not match the expected type. + /// + /// For example, the decoder expected a JSON object (for a struct) but + /// received a number. + TypeMismatch { + /// The leaf type that was being decoded when the mismatch occurred. + expected: TypeId, + /// The JSON value kind that was actually received. + received: JsonValueKind, + }, + + /// A required field is missing from a JSON object when decoding a struct. + MissingField { + /// The struct type being decoded. + expected: TypeId, + /// The name of the missing field. + field: Symbol<'heap>, + }, + + /// The JSON object has a different number of keys than the struct expects. + StructLengthMismatch { + /// The struct type being decoded. + expected: TypeId, + /// The number of fields the struct type requires. + expected_length: usize, + /// The number of keys in the JSON object. + received_length: usize, + }, + + /// The JSON array length does not match the expected tuple arity. + TupleLengthMismatch { + /// The tuple type being decoded. + expected: TypeId, + /// The number of elements the tuple type requires. + expected_length: usize, + /// The number of elements in the JSON array. + received_length: usize, + }, + + /// None of a union type's variants could decode the value. + NoMatchingVariant { + /// The union type being decoded. + expected: TypeId, + /// The JSON value kind that no variant accepted. + received: JsonValueKind, + }, + + /// A JSON number could not be represented as `f64`. + /// + /// This only occurs when `serde_json`'s `arbitrary_precision` feature is + /// active and the number overflows to infinity or NaN. The variant + /// optionally carries the type being decoded, absent when the failure + /// occurs inside the untyped fallback path. + NumberOutOfRange { + /// The numeric type that was being decoded, if known. + expected: Option, + }, + + /// An internal invariant was violated during value construction. + /// + /// This indicates a bug in the decoder itself, for example constructing + /// a struct with mismatched field/value counts, or an empty tuple. The + /// variant optionally carries the type, absent when the failure occurs + /// inside the untyped fallback path. + MalformedConstruction { + /// The type being constructed when the invariant was violated, if known. + expected: Option, + }, + + /// An intersection type reached the decoder. + /// + /// Intersection types cannot be safely represented as JSON, so the + /// placement pass should have rejected any query that would require + /// deserializing one from a postgres result. + IntersectionType { + /// The intersection type that was encountered. + type_id: TypeId, + }, + + /// A closure type reached the decoder. + /// + /// Closures are opaque runtime values that cannot be serialized or + /// transported through postgres. The placement pass should have + /// rejected any query that would require deserializing a closure. + ClosureType { + /// The closure type that was encountered. + type_id: TypeId, + }, + + /// A never type (`!`) reached the decoder. + /// + /// The never type is uninhabited: no value of type `!` can exist, so + /// attempting to deserialize one is always a bug. + NeverType { + /// The never type that was encountered. + type_id: TypeId, + }, +} + +/// Errors from the bridge while fulfilling a [`GraphRead`] suspension. +/// +/// All variants represent internal failures. The user wrote HashQL, not SQL; +/// if the bridge fails, the compiler or runtime produced something invalid. +/// +/// [`GraphRead`]: hashql_mir::body::terminator::GraphRead +#[derive(Debug)] +pub enum BridgeError<'heap> { + /// The compiled SQL query was rejected by PostgreSQL. + /// + /// Carries the generated SQL so the diagnostic can show exactly what + /// the compiler produced. + QueryExecution { + /// The SQL statement that was sent to the database. + sql: String, + /// The rejection error from the database. + source: tokio_postgres::Error, + }, + + /// A row returned by PostgreSQL could not be decoded into a value. + /// + /// The query executed successfully, but a column in the result set has a + /// type the runtime does not expect, indicating a mismatch between what + /// the SQL lowering pass promised and what the database actually returned. + RowHydration { + /// The column descriptor identifying what this column represents. + column: Indexed, + /// The database error describing the type mismatch. + source: tokio_postgres::Error, + }, + + /// A decoded column value does not match the expected type for its entity path. + /// + /// The column decoded successfully at the PostgreSQL wire level, but the + /// resulting value could not be deserialized into the HashQL type the + /// runtime expects for this storage location. This indicates the SQL + /// lowering pass produced a query whose result shape does not match the + /// entity schema. + ValueDeserialization { + /// The column descriptor identifying what this column represents. + column: Indexed, + /// The specific decode failure. + source: DecodeError<'heap>, + }, + + /// A continuation local could not be deserialized back into its expected type. + /// + /// Continuation locals are values that were serialized into JSON by the SQL + /// lowering pass and returned alongside query results so the interpreter can + /// resume execution. If one of these cannot be decoded, the lowering pass + /// produced a continuation whose shape the runtime cannot reconstruct. + ContinuationDeserialization { + /// The definition containing the continuation. + body: DefId, + /// The local variable that failed to deserialize. + local: Local, + /// The specific decode failure. + source: DecodeError<'heap>, + }, + + /// A query parameter could not be serialized for PostgreSQL. + /// + /// The SQL lowering pass emitted a parameter that the encoder does not + /// know how to serialize into the wire format the database expects. + ParameterEncoding { + /// The zero-based index of the parameter that failed (`$1` = index 0). + parameter: usize, + /// The encoding error. + source: Box, + }, + + /// A continuation block ID returned by PostgreSQL is out of range. + /// + /// The SQL lowering pass encodes the target basic block as an integer in the + /// query result. A negative value cannot represent a valid [`BasicBlockId`] + /// and indicates a bug in the lowering pass. + InvalidContinuationBlockId { + /// The definition containing the continuation. + body: DefId, + /// The invalid block ID value returned by PostgreSQL. + block_id: i32, + }, + + /// A continuation local index returned by PostgreSQL is out of range. + /// + /// The SQL lowering pass encodes local variable indices as integers in the + /// query result. A negative value cannot represent a valid [`Local`] and + /// indicates a bug in the lowering pass. + /// + /// [`Local`]: hashql_mir::body::local::Local + InvalidContinuationLocal { + /// The definition containing the continuation. + body: DefId, + /// The invalid local value returned by PostgreSQL. + local: i32, + }, + + /// No prepared query exists for this graph read location. + /// + /// Every [`GraphRead`] terminator in the MIR should have a corresponding + /// compiled query produced by the SQL lowering pass. + /// + /// [`GraphRead`]: hashql_mir::body::terminator::GraphRead + QueryLookup { + /// The definition containing the graph read. + body: DefId, + /// The basic block containing the graph read terminator. + block: BasicBlockId, + }, + + /// A continuation state was not fully populated before finishing. + /// + /// When a row contains a non-null continuation target, the locals and values + /// columns must also be present. A missing or null field indicates the SQL + /// lowering pass produced a continuation with an incomplete column set. + IncompleteContinuation { + /// The definition containing the continuation. + body: DefId, + /// The name of the field that was missing or null. + field: &'static str, + }, + + /// No execution residual was found for a definition that requires one. + /// + /// The execution analysis pass should produce island mappings for every + /// definition that appears in a filter chain. A missing residual indicates + /// the execution pipeline did not analyze this definition. + MissingExecutionResidual { + /// The definition that has no execution residual. + body: DefId, + }, + + /// A filter body returned a non-boolean value. + /// + /// Filter bodies must evaluate to a boolean. If the interpreter produces + /// a value that is not representable as a boolean, the HIR type checking + /// or lowering pass has a bug. + InvalidFilterReturn { + /// The filter definition that returned a non-boolean. + body: DefId, + }, + + /// A runtime value could not be serialized to JSON. + /// + /// Serialization failures indicate a bug in the encoder or an unsupported + /// value shape (e.g. pointer values). + ValueSerialization { + /// The serialization error from `serde_json`. + source: serde_json::Error, + }, +} + +impl<'heap> BridgeError<'heap> { + pub fn into_diagnostic(self, span: SpanId, env: &Environment<'heap>) -> InterpretDiagnostic { + match self { + Self::QueryExecution { sql, source } => query_execution(span, &sql, &source), + Self::RowHydration { column, source } => row_hydration(span, column, &source), + Self::ValueDeserialization { column, source } => { + value_deserialization(span, column, &source, env) + } + Self::ContinuationDeserialization { + body, + local, + source, + } => continuation_deserialization(span, body, local, &source, env), + Self::InvalidContinuationBlockId { body, block_id } => { + invalid_continuation_block_id(span, body, block_id) + } + Self::InvalidContinuationLocal { body, local } => { + invalid_continuation_local(span, body, local) + } + Self::ParameterEncoding { parameter, source } => { + parameter_encoding(span, parameter, &*source) + } + Self::QueryLookup { body, block } => query_lookup(span, body, block), + Self::IncompleteContinuation { body, field } => { + incomplete_continuation(span, body, field) + } + Self::MissingExecutionResidual { body } => missing_execution_residual(span, body), + Self::InvalidFilterReturn { body } => invalid_filter_return(span, body), + Self::ValueSerialization { source } => value_serialization(span, &source), + } + } +} + +fn query_execution(span: SpanId, sql: &str, error: &tokio_postgres::Error) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&QUERY_EXECUTION), Severity::Bug).primary( + Label::new(span, "compiled query was rejected by the database"), + ); + + diagnostic.add_message(Message::note(format!("generated SQL: {sql}"))); + + diagnostic.add_message(Message::note(format!("the database reported: {error}"))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce queries that the database accepts", + )); + + diagnostic +} + +fn row_hydration( + span: SpanId, + Indexed { + index, + value: column, + }: Indexed, + source: &tokio_postgres::Error, +) -> InterpretDiagnostic { + let mut diagnostic = + Diagnostic::new(category(&ROW_HYDRATION), Severity::Bug).primary(Label::new( + span, + format!("cannot decode result column {index} ({column})"), + )); + + diagnostic.add_message(Message::note(format!("the database reported: {source}"))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce queries whose result types the runtime can decode", + )); + + diagnostic +} + +/// Adds notes describing a [`DecodeError`] to a diagnostic. +fn add_decode_error_notes( + diagnostic: &mut InterpretDiagnostic, + source: &DecodeError<'_>, + env: &Environment<'_>, +) { + let fmt = Formatter::new(env.heap); + let mut type_fmt = TypeFormatter::new(&fmt, env, TypeFormatterOptions::default()); + let render = RenderOptions::default(); + + match source { + DecodeError::TypeMismatch { expected, received } => { + diagnostic.add_message(Message::note(format!( + "expected `{}` but received JSON {}", + type_fmt.render(*expected, render), + received.as_str(), + ))); + } + DecodeError::MissingField { expected, field } => { + diagnostic.add_message(Message::note(format!( + "field `{field}` is missing from the JSON object when decoding `{}`", + type_fmt.render(*expected, render), + ))); + } + DecodeError::StructLengthMismatch { + expected, + expected_length, + received_length, + } => { + diagnostic.add_message(Message::note(format!( + "expected {expected_length} fields for `{}` but received {received_length}", + type_fmt.render(*expected, render), + ))); + } + DecodeError::TupleLengthMismatch { + expected, + expected_length, + received_length, + } => { + diagnostic.add_message(Message::note(format!( + "expected {expected_length} elements for `{}` but received {received_length}", + type_fmt.render(*expected, render), + ))); + } + DecodeError::NoMatchingVariant { expected, received } => { + diagnostic.add_message(Message::note(format!( + "no variant of `{}` could decode JSON {}", + type_fmt.render(*expected, render), + received.as_str(), + ))); + } + DecodeError::NumberOutOfRange { expected } => { + if let Some(expected) = expected { + diagnostic.add_message(Message::note(format!( + "JSON number is out of range for `{}`", + type_fmt.render(*expected, render), + ))); + } else { + diagnostic.add_message(Message::note( + "JSON number is out of range and cannot be represented as a floating-point \ + value", + )); + } + } + DecodeError::MalformedConstruction { expected } => { + if let Some(expected) = expected { + diagnostic.add_message(Message::note(format!( + "internal invariant violated while constructing `{}`", + type_fmt.render(*expected, render), + ))); + } else { + diagnostic.add_message(Message::note( + "internal invariant violated during value construction", + )); + } + } + DecodeError::IntersectionType { type_id } => { + diagnostic.add_message(Message::note(format!( + "intersection type `{}` cannot be safely represented as JSON", + type_fmt.render(*type_id, render), + ))); + diagnostic.add_message(Message::help( + "the placement pass should reject queries that require deserializing intersection \ + types from postgres", + )); + } + DecodeError::ClosureType { type_id } => { + diagnostic.add_message(Message::note(format!( + "closure type `{}` cannot be transported through postgres", + type_fmt.render(*type_id, render), + ))); + diagnostic.add_message(Message::help( + "the placement pass should reject queries that require deserializing closures \ + from postgres", + )); + } + DecodeError::NeverType { type_id } => { + diagnostic.add_message(Message::note(format!( + "the never type `{}` is uninhabited and cannot have a value", + type_fmt.render(*type_id, render), + ))); + diagnostic.add_message(Message::help( + "the MIR pipeline should prevent never types from reaching evaluation", + )); + } + } +} + +fn value_deserialization( + span: SpanId, + Indexed { + index, + value: column, + }: Indexed, + source: &DecodeError<'_>, + env: &Environment<'_>, +) -> InterpretDiagnostic { + let mut diagnostic = + Diagnostic::new(category(&VALUE_DESERIALIZATION), Severity::Bug).primary(Label::new( + span, + format!("cannot deserialize result column {index} ({column})"), + )); + + add_decode_error_notes(&mut diagnostic, source, env); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce queries whose result types match the entity schema", + )); + + diagnostic +} + +fn continuation_deserialization( + span: SpanId, + body: DefId, + local: Local, + source: &DecodeError<'_>, + env: &Environment<'_>, +) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&CONTINUATION_DESERIALIZATION), Severity::Bug) + .primary(Label::new( + span, + format!("cannot deserialize continuation local {local} in definition {body}"), + )); + + add_decode_error_notes(&mut diagnostic, source, env); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce continuations whose types the runtime can \ + reconstruct", + )); + + diagnostic +} + +fn invalid_continuation_block_id(span: SpanId, body: DefId, block_id: i32) -> InterpretDiagnostic { + let mut diagnostic = + Diagnostic::new(category(&INVALID_CONTINUATION_BLOCK_ID), Severity::Bug).primary( + Label::new(span, "continuation returned an invalid block ID"), + ); + + diagnostic.add_message(Message::note(format!( + "definition {body} returned block ID {block_id}, which cannot represent a valid block" + ))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce non-negative block IDs for continuations", + )); + + diagnostic +} + +fn invalid_continuation_local(span: SpanId, body: DefId, local: i32) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&INVALID_CONTINUATION_LOCAL), Severity::Bug) + .primary(Label::new(span, "continuation returned an invalid local")); + + diagnostic.add_message(Message::note(format!( + "definition {body} returned local {local}, which cannot represent a valid local" + ))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce non-negative local indices for continuations", + )); + + diagnostic +} + +fn parameter_encoding( + span: SpanId, + parameter: usize, + error: &(dyn core::error::Error + Send + Sync), +) -> InterpretDiagnostic { + let mut diagnostic = + Diagnostic::new(category(&PARAMETER_ENCODING), Severity::Bug).primary(Label::new( + span, + format!( + "cannot encode parameter ${} for the database", + parameter + 1 + ), + )); + + diagnostic.add_message(Message::note(format!("the encoder reported: {error}"))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should only emit parameter types the encoder supports", + )); + + diagnostic +} + +fn query_lookup(span: SpanId, body: DefId, block: BasicBlockId) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&QUERY_LOOKUP), Severity::Bug).primary( + Label::new(span, "no compiled query found for this data access"), + ); + + diagnostic.add_message(Message::note(format!( + "missing query for definition {body} at block {block}" + ))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce a compiled query for every data access", + )); + + diagnostic +} + +fn incomplete_continuation(span: SpanId, body: DefId, field: &str) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&INCOMPLETE_CONTINUATION), Severity::Bug) + .primary(Label::new( + span, + "continuation state is missing required columns", + )); + + diagnostic.add_message(Message::note(format!( + "continuation for definition {body} has a non-null target but `{field}` was not populated" + ))); + + diagnostic.add_message(Message::help( + "the SQL lowering pass should produce all continuation columns together", + )); + + diagnostic +} + +fn missing_execution_residual(span: SpanId, body: DefId) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&MISSING_EXECUTION_RESIDUAL), Severity::Bug) + .primary(Label::new( + span, + "no execution residual found for this definition", + )); + + diagnostic.add_message(Message::note(format!( + "definition {body} appears in a filter chain but has no island mapping" + ))); + + diagnostic.add_message(Message::help( + "the execution analysis pass should produce island mappings for all filter definitions", + )); + + diagnostic +} + +fn invalid_filter_return(span: SpanId, body: DefId) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&INVALID_FILTER_RETURN), Severity::Bug) + .primary(Label::new(span, "filter body returned a non-boolean value")); + + diagnostic.add_message(Message::note(format!( + "filter definition {body} must evaluate to a boolean" + ))); + + diagnostic.add_message(Message::help( + "the HIR type checking pass should ensure filter bodies return a boolean", + )); + + diagnostic +} + +fn value_serialization(span: SpanId, error: &serde_json::Error) -> InterpretDiagnostic { + let mut diagnostic = Diagnostic::new(category(&VALUE_SERIALIZATION), Severity::Bug) + .primary(Label::new(span, "cannot serialize runtime value to JSON")); + + diagnostic.add_message(Message::note(format!("serialization failed: {error}"))); + + diagnostic.add_message(Message::help( + "all values passed to the database should be serializable", + )); + + diagnostic +} diff --git a/libs/@local/hashql/eval/src/orchestrator/events.rs b/libs/@local/hashql/eval/src/orchestrator/events.rs new file mode 100644 index 00000000000..e2c6b4cd3b6 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/events.rs @@ -0,0 +1,158 @@ +//! Opt-in event tracing for the orchestrator execution pipeline. +//! +//! The orchestrator emits [`Event`]s at key decision points: query dispatch, +//! row hydration, filter evaluation, island transitions, and result collection. +//! An [`EventLog`] sink receives them. The default `()` implementation compiles +//! to a no-op with zero runtime cost. [`AppendEventLog`] collects events into +//! a [`Vec`] for test assertions. +//! +//! # Design +//! +//! All [`Event`] variants are `Copy`. This guarantees that the `()` sink +//! optimizes away completely: no allocation, no drop glue, no residual code +//! in the dispatch loop. Non-`Copy` payloads (e.g. [`String`]) would prevent +//! LLVM from eliminating dead event construction even through a no-op sink. +//! +//! [`EventLog::log`] takes `&self` rather than `&mut self` so that events can +//! be emitted through shared borrows of the [`Orchestrator`]. Interior +//! mutability is handled by the sink implementation ([`AppendEventLog`] uses +//! a [`LocalLock`]). +//! +//! [`Orchestrator`]: super::Orchestrator + +use core::{ + fmt::{self, Display}, + mem, +}; + +use hashql_core::sync::lock::LocalLock; +use hashql_mir::{ + body::basic_block::BasicBlockId, + def::DefId, + pass::execution::{IslandId, TargetId}, +}; + +/// A single orchestrator execution event. +/// +/// Each variant captures the structured data needed to reconstruct what +/// happened at a particular point in the execution pipeline. Formatting +/// is the listener's responsibility; use the [`Display`] implementation +/// for human-readable output. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Event { + /// SQL query dispatched to PostgreSQL. + QueryExecuted { body: DefId, block: BasicBlockId }, + /// A result row was received from PostgreSQL. + RowReceived, + + /// A filter body started evaluating for the current row. + FilterStarted { body: DefId }, + /// The filter accepted the current row. + FilterAccepted { body: DefId }, + /// The filter rejected the current row. + FilterRejected { body: DefId }, + + /// Entered an execution island within a filter body. + IslandEntered { + body: DefId, + island: IslandId, + target: TargetId, + }, + /// Postgres continuation state was flushed into the callstack. + ContinuationFlushed { body: DefId, island: IslandId }, + /// Postgres island had no continuation state (implicit true). + ContinuationImplicitTrue { body: DefId }, + + /// A row survived all filters and was added to the output. + RowAccepted, + /// A row was rejected by the filter chain. + RowRejected, +} + +impl Display for Event { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::QueryExecuted { body, block } => { + write!(f, "query executed: body {body}, block {block}") + } + Self::RowReceived => f.write_str("row received"), + Self::FilterStarted { body } => write!(f, "filter started: body {body}"), + Self::FilterAccepted { body } => write!(f, "filter accepted: body {body}"), + Self::FilterRejected { body } => write!(f, "filter rejected: body {body}"), + Self::IslandEntered { + body, + island, + target, + } => write!( + f, + "island entered: body {body}, island {island}, target {target}" + ), + Self::ContinuationFlushed { body, island } => { + write!(f, "continuation flushed: body {body}, island {island}") + } + Self::ContinuationImplicitTrue { body } => { + write!(f, "continuation implicit true: body {body}") + } + Self::RowAccepted => f.write_str("row accepted"), + Self::RowRejected => f.write_str("row rejected"), + } + } +} + +/// Receiver for orchestrator [`Event`]s. +/// +/// Implement this trait to observe execution decisions without modifying +/// the orchestrator's control flow. The method takes `&self` to allow +/// emission through shared borrows; implementations that need mutation +/// should use interior mutability (e.g. [`LocalLock`]). +/// +/// The `()` implementation discards all events and compiles to a no-op. +pub trait EventLog { + /// Records a single event. + fn log(&self, event: Event); +} + +impl EventLog for () { + #[inline(always)] + fn log(&self, _: Event) {} +} + +impl EventLog for &T { + #[inline] + fn log(&self, event: Event) { + T::log(self, event); + } +} + +/// An [`EventLog`] that appends events to an internal [`Vec`]. +/// +/// Uses a [`LocalLock`] for interior mutability so that [`log`](EventLog::log) +/// can be called through `&self`. Retrieve collected events with [`take`](Self::take), +/// which drains the buffer. +#[derive(Debug)] +pub struct AppendEventLog(LocalLock>); + +impl AppendEventLog { + /// Creates an empty event log. + #[must_use] + pub const fn new() -> Self { + Self(LocalLock::new(Vec::new())) + } + + /// Drains and returns all collected events, leaving the buffer empty. + pub fn take(&self) -> Vec { + mem::take(&mut *self.0.lock()) + } +} + +impl Default for AppendEventLog { + fn default() -> Self { + Self::new() + } +} + +impl EventLog for AppendEventLog { + fn log(&self, event: Event) { + self.0.lock().push(event); + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/mod.rs b/libs/@local/hashql/eval/src/orchestrator/mod.rs new file mode 100644 index 00000000000..978193b83ba --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/mod.rs @@ -0,0 +1,296 @@ +//! Orchestration layer between the MIR interpreter and external data sources. +//! +//! The interpreter executes HashQL programs over MIR bodies but cannot satisfy +//! data access on its own. When execution reaches a [`GraphRead`] terminator, +//! the interpreter yields a [`Suspension`] describing what data is needed and +//! where to resume. The orchestrator takes over: it looks up the pre-compiled +//! SQL query, encodes parameters, sends the query to PostgreSQL, hydrates each +//! result row into a typed [`Value`], runs any client-side filter chains, and +//! packages the output into a [`Continuation`] that the interpreter can apply +//! to resume execution. +//! +//! Key types: +//! +//! - [`Orchestrator`]: top-level driver that owns the database client and query registry. Provides +//! [`run_in`] for full query execution and [`fulfill_in`] for resolving a single suspension. +//! - [`Indexed`]: positional wrapper that carries a column's index alongside its descriptor through +//! the hydration pipeline, used for error reporting. +//! +//! Submodules: +//! +//! - `codec`: JSON codec between interpreter [`Value`]s and the PostgreSQL wire format. The +//! `decode` side deserializes result columns into typed values guided by the HashQL type system; +//! the `encode` side serializes runtime values and query parameters for transmission to +//! PostgreSQL. +//! - `partial`: three-state hydration tracking (Skipped, Null, Value) that assembles flat result +//! columns into nested vertex value trees. Each `Partial*` struct mirrors a level of the vertex +//! type hierarchy. +//! - `postgres`: continuation state for multi-island execution. When a compiled query returns +//! continuation columns (target block, locals, serialized values), this module hydrates and +//! validates them, then flushes the decoded state into the interpreter's callstack. +//! - `request`: per-suspension-type handlers (currently [`GraphRead`]). +//! - `tail`: result accumulation strategies (currently collection into a list). +//! - `error`: error types for all failure modes in the bridge. All variants use `Severity::Bug` +//! because the user wrote HashQL, not SQL: if the bridge fails, the compiler or runtime produced +//! something invalid. +//! +//! [`GraphRead`]: hashql_mir::body::terminator::GraphRead +//! [`Suspension`]: hashql_mir::interpret::suspension::Suspension +//! [`Value`]: hashql_mir::interpret::value::Value +//! [`Continuation`]: hashql_mir::interpret::suspension::Continuation +//! [`run_in`]: Orchestrator::run_in +//! [`fulfill_in`]: Orchestrator::fulfill_in + +use alloc::alloc::Global; +use core::{alloc::Allocator, ops::Deref}; + +use hashql_mir::{ + def::DefId, + interpret::{ + CallStack, Inputs, Runtime, RuntimeConfig, RuntimeError, + error::InterpretDiagnostic, + suspension::{Continuation, Suspension}, + value::Value, + }, +}; +use tokio_postgres::Client; + +pub use self::events::{AppendEventLog, Event, EventLog}; +use self::{error::BridgeError, request::GraphReadOrchestrator}; +use crate::{context::EvalContext, postgres::PreparedQueries}; + +pub mod codec; +pub(crate) mod error; +mod events; +mod partial; +mod postgres; +mod request; +mod tail; + +/// A value paired with its positional index. +/// +/// Used throughout the hydration pipeline to carry a column's index alongside +/// its [`ColumnDescriptor`] so that error diagnostics can report both *which* +/// column failed and *what* it represents. +/// +/// Dereferences to the inner value, so callers can access the descriptor +/// transparently. +/// +/// [`ColumnDescriptor`]: crate::postgres::ColumnDescriptor +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Indexed { + pub index: usize, + value: T, +} + +impl Indexed { + pub(crate) const fn new(index: usize, value: T) -> Self { + Self { index, value } + } +} + +impl Deref for Indexed { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +/// Top-level driver that bridges the MIR interpreter with PostgreSQL. +/// +/// Owns a database [`Client`], a reference to the compiled query registry, and +/// the evaluation context (type environment, body definitions, execution +/// analysis results). The type parameter `C` is reserved for future +/// configuration; `A` is the allocator used by the query registry; `E` is +/// the [`EventLog`] sink for execution tracing. +/// +/// By default `E` is `()`, which compiles all event logging to no-ops. Use +/// [`with_event_log`](Self::with_event_log) to attach a collector such as +/// [`AppendEventLog`] for test assertions or debugging. +/// +/// Use [`run_in`](Self::run_in) to execute a complete query from scratch, or +/// [`fulfill_in`](Self::fulfill_in) / [`fulfill`](Self::fulfill) to resolve an +/// individual [`Suspension`] when driving the interpreter manually. +/// +/// [`Suspension`]: hashql_mir::interpret::suspension::Suspension +pub struct Orchestrator<'env, 'ctx, 'heap, C, E, A: Allocator> { + client: C, + queries: &'env PreparedQueries<'heap, A>, + context: &'env EvalContext<'ctx, 'heap, A>, + /// Event sink for execution tracing. See [`EventLog`]. + pub event_log: E, +} + +impl<'env, 'ctx, 'heap, C, A: Allocator> Orchestrator<'env, 'ctx, 'heap, C, (), A> { + pub const fn new( + client: C, + queries: &'env PreparedQueries<'heap, A>, + context: &'env EvalContext<'ctx, 'heap, A>, + ) -> Self { + Self { + client, + queries, + context, + event_log: (), + } + } +} + +impl<'env, 'ctx, 'heap, C, E, A: Allocator> Orchestrator<'env, 'ctx, 'heap, C, E, A> { + /// Replaces the event log, returning a new orchestrator with the given + /// sink. + pub fn with_event_log(self, event_log: E2) -> Orchestrator<'env, 'ctx, 'heap, C, E2, A> { + Orchestrator { + client: self.client, + queries: self.queries, + context: self.context, + event_log, + } + } +} + +#[expect(clippy::future_not_send)] +impl<'ctx, 'heap, C, E: EventLog, A: Allocator> Orchestrator<'_, 'ctx, 'heap, C, E, A> { + /// Executes a complete query, resolving suspensions in a loop until the + /// interpreter returns a final [`Value`]. + /// + /// Creates a fresh [`Runtime`] and [`CallStack`], then alternates between + /// running the interpreter and fulfilling suspensions until the program + /// either returns or fails. On failure, the callstack is unwound to + /// produce span information for the diagnostic. + /// + /// `L` is the allocator for runtime values and intermediate results. + /// + /// # Errors + /// + /// Returns an [`InterpretDiagnostic`] if the interpreter fails or any + /// suspension cannot be fulfilled (database errors, decoding failures, + /// filter evaluation failures). + /// + /// [`Value`]: hashql_mir::interpret::value::Value + pub async fn run_in( + &self, + inputs: &Inputs<'heap, L>, + + body: DefId, + args: impl IntoIterator, IntoIter: ExactSizeIterator>, + + alloc: L, + ) -> Result, InterpretDiagnostic> + where + C: AsRef, + { + let mut runtime = Runtime::new_in( + RuntimeConfig::default(), + self.context.bodies, + inputs, + alloc.clone(), + ); + runtime.reset(); + + let mut callstack = CallStack::new(&runtime, body, args); + + let Err(error) = try { + loop { + let next = runtime.run_until_suspension(&mut callstack)?; + match next { + hashql_mir::interpret::Yield::Return(value) => { + return Ok(value); + } + hashql_mir::interpret::Yield::Suspension(suspension) => { + let continuation = self + .fulfill_in(inputs, &callstack, suspension, alloc.clone()) + .await?; + + continuation.apply(&mut callstack)?; + } + } + } + }; + + Err( + error.into_diagnostic(callstack.unwind().map(|(_, span)| span), |suspension| { + let span = callstack + .unwind() + .next() + .map_or(self.context.bodies[body].span, |(_, span)| span); + + suspension.into_diagnostic(span, self.context.env) + }), + ) + } + + /// Convenience wrapper around [`run_in`](Self::run_in) that uses the + /// [`Global`] allocator. + /// + /// # Errors + /// + /// Returns an [`InterpretDiagnostic`] on failure. See + /// [`run_in`](Self::run_in). + pub async fn run( + &self, + inputs: &Inputs<'heap, Global>, + body: DefId, + args: impl IntoIterator, IntoIter: ExactSizeIterator>, + ) -> Result, InterpretDiagnostic> + where + C: AsRef, + { + self.run_in(inputs, body, args, Global).await + } + + /// Resolves a single [`Suspension`] by dispatching to the appropriate + /// request handler. + /// + /// Currently only [`GraphRead`] suspensions are supported. Returns a + /// [`Continuation`] that the caller + /// must [`apply`] to the callstack to resume interpretation. + /// + /// # Errors + /// + /// Returns a [`RuntimeError`] if query execution, row hydration, or + /// filter evaluation fails. + /// + /// [`Suspension`]: hashql_mir::interpret::suspension::Suspension + /// [`GraphRead`]: hashql_mir::body::terminator::GraphRead + /// [`Continuation`]: hashql_mir::interpret::suspension::Continuation + /// [`apply`]: hashql_mir::interpret::suspension::Continuation::apply + pub async fn fulfill_in( + &self, + inputs: &Inputs<'heap, L>, + callstack: &CallStack<'ctx, 'heap, L>, + suspension: Suspension<'ctx, 'heap>, + alloc: L, + ) -> Result, RuntimeError<'heap, BridgeError<'heap>, L>> + where + C: AsRef, + { + match suspension { + Suspension::GraphRead(suspension) => { + GraphReadOrchestrator::new(self) + .fulfill_in(inputs, callstack, suspension, alloc) + .await + } + } + } + + /// Convenience wrapper around [`fulfill_in`](Self::fulfill_in) that uses + /// the [`Global`] allocator. + /// + /// # Errors + /// + /// Returns a [`RuntimeError`] if query execution, row hydration, or + /// filter evaluation fails. See [`fulfill_in`](Self::fulfill_in). + pub async fn fulfill( + &self, + inputs: &Inputs<'heap, Global>, + callstack: &CallStack<'ctx, 'heap, Global>, + suspension: Suspension<'ctx, 'heap>, + ) -> Result, RuntimeError<'heap, BridgeError<'heap>, Global>> + where + C: AsRef, + { + self.fulfill_in(inputs, callstack, suspension, Global).await + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/partial.rs b/libs/@local/hashql/eval/src/orchestrator/partial.rs new file mode 100644 index 00000000000..a4488e5ecc0 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/partial.rs @@ -0,0 +1,1039 @@ +//! Partial vertex representation for row hydration. +//! +//! When the bridge receives a row from PostgreSQL, each column corresponds to +//! a leaf [`TraversalPath`] in the provides set. The columns are flat, one per +//! requested storage location, but the interpreter expects a nested [`Value`] +//! tree with intermediate structs, opaque wrappers, and correct `Option` +//! representation. +//! +//! This module provides [`Hydrated`], a three-state enum that tracks whether +//! a field was requested by the query and, if so, whether the database +//! returned a value or `NULL`. The [`Required`] and [`Optional`] aliases +//! restrict the state space based on the schema: non-nullable fields cannot +//! be [`Null`](Hydrated::Null), enforced at the type level via the +//! uninhabited type [`!`]. +//! +//! The `Partial*` structs mirror the vertex type hierarchy. Each leaf field +//! holds a [`Hydrated`] wrapping a [`Value`]; intermediate structs group +//! fields by their position in the type tree. Conversion from a partial +//! struct to a [`Value`] is a separate step that walks the partial tree, +//! wraps intermediate levels in their opaque constructors, and collapses +//! `Option` boundaries. +//! +//! [`TraversalPath`]: hashql_mir::pass::execution::traversal::TraversalPath +//! [`Value`]: hashql_mir::interpret::value::Value + +use alloc::rc::Rc; +use core::alloc::Allocator; + +use hashql_core::{ + symbol::{Symbol, sym}, + r#type::{TypeId, environment::Environment}, +}; +use hashql_mir::{ + intern::Interner, + interpret::value::{Int, Num, Opaque, StructBuilder, Value}, + pass::execution::{ + VertexType, + traversal::{EntityPath, TraversalPath}, + }, +}; +use tokio_postgres::Row; +use uuid::Uuid; + +use super::{ + Indexed, + codec::{JsonValueRef, decode::Decoder}, + error::BridgeError, +}; +use crate::postgres::ColumnDescriptor; + +macro_rules! hydrate { + ($this:ident -> $entry:ident $(-> $field:ident)+ = $value:expr) => { + $this .$entry $(.ensure().$field)+ .set($value) + }; +} + +/// Per-field hydration state for partial entity assembly. +/// +/// Each field in the partial entity representation has one of three states: +/// +/// - **Skipped**: the query's provides set did not include this field. The field will be omitted +/// from the assembled [`Value`] struct entirely. +/// - **Null**: the query requested this field, but the database returned `NULL`. This only occurs +/// for schema-optional fields (e.g. `link_data` on non-link entities, where a `LEFT JOIN` +/// produces all `NULL`s). The type parameter `A` controls whether this variant is constructible. +/// - **Value**: the query requested this field and data was returned. +/// +/// Use the [`Required`] and [`Optional`] aliases rather than specifying `A` directly. +/// +/// [`Value`]: hashql_mir::interpret::value::Value +#[derive(Debug, Clone, PartialEq, Eq, Default)] +pub(crate) enum Hydrated { + /// Not in the provides set. The query did not request this field. + #[default] + Skipped, + /// Requested, but the database returned `NULL`. + /// + /// Only constructible when `A = ()` ([`Optional`] fields). For [`Required`] + /// fields (`A = !`), this variant is uninhabited. + Null(N), + /// Requested and present. + Value(T), +} + +impl Hydrated { + pub(crate) fn map(self, func: impl FnOnce(T) -> U) -> Hydrated { + match self { + Self::Skipped => Hydrated::Skipped, + Self::Null(marker) => Hydrated::Null(marker), + Self::Value(value) => Hydrated::Value(func(value)), + } + } +} + +impl Hydrated { + /// Sets this required field to [`Value`](Self::Value). + /// + /// # Panics (debug only) + /// + /// Debug-panics if the field is already populated. + pub(crate) fn set(&mut self, value: T) { + debug_assert!( + matches!(self, Self::Skipped), + "field already populated: duplicate column in row hydration" + ); + + *self = Self::Value(value); + } +} + +impl Hydrated { + /// Sets this optional field from a nullable column value. + /// + /// [`Some`] produces [`Value`](Self::Value), [`None`] produces + /// [`Null`](Self::Null). + /// + /// # Panics (debug only) + /// + /// Debug-panics if the field is already populated. + pub(crate) fn set(&mut self, value: Option) { + debug_assert!( + matches!(self, Self::Skipped), + "field already populated: duplicate column in row hydration" + ); + + match value { + Some(value) => *self = Self::Value(value), + None => *self = Self::Null(()), + } + } + + pub(crate) fn null(&mut self) { + debug_assert!( + matches!(self, Self::Skipped | Self::Null(())), + "field already populated with a value: cannot null" + ); + + *self = Self::Null(()); + } + + pub(crate) fn try_null(&mut self) { + if matches!(self, Self::Value(_)) { + *self = Self::Null(()); + } + } + + pub(crate) fn filter(self, func: impl FnOnce(&T) -> bool) -> Self { + match self { + Self::Skipped => Self::Skipped, + Self::Null(()) => Self::Null(()), + Self::Value(value) => { + if func(&value) { + Self::Value(value) + } else { + Self::Null(()) + } + } + } + } +} + +impl Hydrated { + /// Ensures this field contains a value, initializing it with [`Default::default`] + /// if it was [`Skipped`](Self::Skipped) or [`Null`](Self::Null). + /// + /// Returns a mutable reference to the inner value for further drilling. + /// + /// This is the primary mechanism for populating nested partial structs + /// from flat column values: each intermediate level is initialized on + /// first access, then the caller continues into the next level. + pub(crate) fn ensure(&mut self) -> &mut T { + if !matches!(self, Self::Value(_)) { + *self = Self::Value(T::default()); + } + + match self { + Self::Value(value) => value, + Self::Skipped | Self::Null(_) => unreachable!(), + } + } +} + +impl<'heap, A: Allocator> Hydrated, !> { + pub(crate) fn finish_in( + self, + builder: &mut StructBuilder<'heap, A, N>, + field: Symbol<'heap>, + ) { + let value = match self { + Self::Skipped => return, + Self::Value(value) => value, + }; + + builder.push(field, value); + } +} + +impl<'heap, A: Allocator> Hydrated, ()> { + pub(crate) fn finish_in( + self, + builder: &mut StructBuilder<'heap, A, N>, + field: Symbol<'heap>, + alloc: A, + ) { + let value = match self { + Self::Skipped => return, + Self::Null(()) => { + Value::Opaque(Opaque::new(sym::path::None, Rc::new_in(Value::Unit, alloc))) + } + Self::Value(value) => { + Value::Opaque(Opaque::new(sym::path::Some, Rc::new_in(value, alloc))) + } + }; + + builder.push(field, value); + } +} + +/// Hydration state for a non-nullable schema field. +/// +/// The [`Null`](Hydrated::Null) variant is uninhabited: a required field is either +/// [`Skipped`](Hydrated::Skipped) or has a [`Value`](Hydrated::Value). +pub(crate) type Required = Hydrated; + +/// Hydration state for a nullable schema field. +/// +/// All three states are inhabited: [`Skipped`](Hydrated::Skipped), +/// [`Null`](Hydrated::Null), or [`Value`](Hydrated::Value). +/// [`Null`](Hydrated::Null) represents a schema-level absence (e.g. `link_data` +/// on a non-link entity), not a missing column. +pub(crate) type Optional = Hydrated; + +/// Partial representation of `EntityEncodings`. +pub(crate) struct PartialEncodings<'heap, A: Allocator> { + pub vectors: Required>, +} + +impl<'heap, A: Allocator> PartialEncodings<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 1> = StructBuilder::new(); + self.vectors.finish_in(&mut builder, sym::vectors); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new( + sym::path::EntityEncodings, + Rc::new_in(value, alloc), + )) + } +} + +impl Default for PartialEncodings<'_, A> { + fn default() -> Self { + Self { + vectors: Required::Skipped, + } + } +} + +/// Partial identity of a linked entity (left or right target of a link). +/// +/// Unlike [`PartialEntityId`], this only has `web_id` and `entity_uuid`: +/// link targets are not addressable by `draft_id` through +/// [`EntityPath`](hashql_mir::pass::execution::traversal::EntityPath). +pub(crate) struct PartialLinkEntityId<'heap, A: Allocator> { + pub web_id: Required>, + pub entity_uuid: Required>, +} + +impl<'heap, A: Allocator> PartialLinkEntityId<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 3> = StructBuilder::new(); + self.web_id.finish_in(&mut builder, sym::web_id); + self.entity_uuid.finish_in(&mut builder, sym::entity_uuid); + + builder.push( + sym::draft_id, + Value::Opaque(Opaque::new( + sym::path::None, + Rc::new_in(Value::Unit, alloc.clone()), + )), + ); + + let inner = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new(sym::path::EntityId, Rc::new_in(inner, alloc))) + } +} + +impl Default for PartialLinkEntityId<'_, A> { + fn default() -> Self { + Self { + web_id: Required::Skipped, + entity_uuid: Required::Skipped, + } + } +} + +/// Partial representation of `EntityProvenance`. +pub(crate) struct PartialProvenance<'heap, A: Allocator> { + pub inferred: Required>, + pub edition: Required>, +} + +impl<'heap, A: Allocator> PartialProvenance<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 2> = StructBuilder::new(); + self.inferred.finish_in(&mut builder, sym::inferred); + self.edition.finish_in(&mut builder, sym::edition); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + + Value::Opaque(Opaque::new( + sym::path::EntityProvenance, + Rc::new_in(value, alloc), + )) + } +} + +impl Default for PartialProvenance<'_, A> { + fn default() -> Self { + Self { + inferred: Required::Skipped, + edition: Required::Skipped, + } + } +} + +/// Partial representation of `TemporalMetadata`. +pub(crate) struct PartialTemporalVersioning<'heap, A: Allocator> { + pub decision_time: Required>, + pub transaction_time: Required>, +} + +impl<'heap, A: Allocator> PartialTemporalVersioning<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 2> = StructBuilder::new(); + self.decision_time + .finish_in(&mut builder, sym::decision_time); + self.transaction_time + .finish_in(&mut builder, sym::transaction_time); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new( + sym::path::TemporalMetadata, + Rc::new_in(value, alloc), + )) + } +} + +impl Default for PartialTemporalVersioning<'_, A> { + fn default() -> Self { + Self { + decision_time: Required::Skipped, + transaction_time: Required::Skipped, + } + } +} + +/// Partial representation of `EntityId` (the entity's own identity). +/// +/// Schema field `draft_id` is `Option`, the others are required. +/// +/// This is distinct from [`PartialLinkEntityId`], which represents the +/// identity of a *linked* entity and does not include `draft_id`. +pub(crate) struct PartialEntityId<'heap, A: Allocator> { + pub web_id: Required>, + pub entity_uuid: Required>, + pub draft_id: Optional>, +} + +impl<'heap, A: Allocator> PartialEntityId<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 3> = StructBuilder::new(); + self.web_id.finish_in(&mut builder, sym::web_id); + self.entity_uuid.finish_in(&mut builder, sym::entity_uuid); + self.draft_id + .finish_in(&mut builder, sym::draft_id, alloc.clone()); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new(sym::path::EntityId, Rc::new_in(value, alloc))) + } +} + +impl Default for PartialEntityId<'_, A> { + fn default() -> Self { + Self { + web_id: Required::Skipped, + entity_uuid: Required::Skipped, + draft_id: Optional::Skipped, + } + } +} + +/// Partial representation of `RecordId`. +/// +/// Contains `entity_id` (composite of web, uuid, draft) and `edition_id`. +pub(crate) struct PartialRecordId<'heap, A: Allocator> { + pub entity_id: Required>, + pub edition_id: Required>, +} + +impl<'heap, A: Allocator> PartialRecordId<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 2> = StructBuilder::new(); + + self.entity_id + .map(|value| value.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::entity_id); + self.edition_id.finish_in(&mut builder, sym::edition_id); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new(sym::path::RecordId, Rc::new_in(value, alloc))) + } +} + +impl Default for PartialRecordId<'_, A> { + fn default() -> Self { + Self { + entity_id: Required::Skipped, + edition_id: Required::Skipped, + } + } +} + +/// Partial representation of `LinkData`. +/// +/// Schema fields `left_entity_confidence` and `right_entity_confidence` are +/// `Option`, all others are required. +/// +/// The entity ID fields use [`PartialLinkEntityId`] (web + uuid only), +/// not [`PartialEntityId`] (which includes `draft_id`). +pub(crate) struct PartialLinkData<'heap, A: Allocator> { + pub left_entity_id: Required>, + pub right_entity_id: Required>, + pub left_entity_confidence: Optional>, + pub left_entity_provenance: Required>, + pub right_entity_confidence: Optional>, + pub right_entity_provenance: Required>, +} + +impl<'heap, A: Allocator> PartialLinkData<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 6> = StructBuilder::new(); + + self.left_entity_id + .map(|value| value.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::left_entity_id); + self.right_entity_id + .map(|value| value.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::right_entity_id); + self.left_entity_confidence.finish_in( + &mut builder, + sym::left_entity_confidence, + alloc.clone(), + ); + self.left_entity_provenance + .finish_in(&mut builder, sym::left_entity_provenance); + self.right_entity_confidence.finish_in( + &mut builder, + sym::right_entity_confidence, + alloc.clone(), + ); + self.right_entity_provenance + .finish_in(&mut builder, sym::right_entity_provenance); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new(sym::path::LinkData, Rc::new_in(value, alloc))) + } + + const fn has_value(&self) -> bool { + let Self { + left_entity_id, + right_entity_id, + left_entity_confidence, + left_entity_provenance, + right_entity_confidence, + right_entity_provenance, + } = self; + + matches!(left_entity_id, Hydrated::Value(_)) + || matches!(right_entity_id, Hydrated::Value(_)) + || matches!(left_entity_confidence, Hydrated::Value(_)) + || matches!(left_entity_provenance, Hydrated::Value(_)) + || matches!(right_entity_confidence, Hydrated::Value(_)) + || matches!(right_entity_provenance, Hydrated::Value(_)) + } +} + +impl Default for PartialLinkData<'_, A> { + fn default() -> Self { + Self { + left_entity_id: Required::Skipped, + right_entity_id: Required::Skipped, + left_entity_confidence: Optional::Skipped, + left_entity_provenance: Required::Skipped, + right_entity_confidence: Optional::Skipped, + right_entity_provenance: Required::Skipped, + } + } +} + +/// Partial representation of `EntityMetadata`. +/// +/// Schema field `confidence` is `Option`, all others are required. +/// The `property_metadata` field corresponds to the `properties` field in the +/// schema type ([`EntityPath::PropertyMetadata`]), renamed here to avoid +/// confusion with the entity's top-level `properties`. +/// +/// [`EntityPath::PropertyMetadata`]: hashql_mir::pass::execution::traversal::EntityPath::PropertyMetadata +pub(crate) struct PartialMetadata<'heap, A: Allocator> { + pub record_id: Required>, + pub temporal_versioning: Required>, + pub entity_type_ids: Required>, + pub archived: Required>, + pub provenance: Required>, + pub confidence: Optional>, + pub property_metadata: Required>, +} + +impl<'heap, A: Allocator> PartialMetadata<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 7> = StructBuilder::new(); + + self.record_id + .map(|partial| partial.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::record_id); + self.temporal_versioning + .map(|partial| partial.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::temporal_versioning); + self.entity_type_ids + .finish_in(&mut builder, sym::entity_type_ids); + self.archived.finish_in(&mut builder, sym::archived); + self.provenance + .map(|partial| partial.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::provenance); + self.confidence + .finish_in(&mut builder, sym::confidence, alloc.clone()); + self.property_metadata + .finish_in(&mut builder, sym::property_metadata); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new( + sym::path::EntityMetadata, + Rc::new_in(value, alloc), + )) + } +} + +impl Default for PartialMetadata<'_, A> { + fn default() -> Self { + Self { + record_id: Required::Skipped, + temporal_versioning: Required::Skipped, + entity_type_ids: Required::Skipped, + archived: Required::Skipped, + provenance: Required::Skipped, + confidence: Optional::Skipped, + property_metadata: Required::Skipped, + } + } +} + +/// Partial representation of `Entity`. +/// +/// Mirrors the top-level entity struct with four fields: +/// - `properties`: the generic `T` parameter, always a leaf [`Value`] +/// - `metadata`: [`EntityMetadata`], a deep nested struct +/// - `link_data`: `Option`, nullable at the schema level +/// - `encodings`: [`EntityEncodings`], currently just `vectors` +/// +/// [`EntityMetadata`]: hashql_core::module::std_lib::graph::types::knowledge::entity::types::entity_metadata +/// [`EntityEncodings`]: hashql_core::module::std_lib::graph::types::knowledge::entity::types::entity_encodings +pub(crate) struct PartialEntity<'heap, A: Allocator> { + pub properties: Required>, + pub metadata: Required>, + pub link_data: Optional>, + pub encodings: Required>, +} + +impl<'heap, A: Allocator> PartialEntity<'heap, A> { + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + let mut builder: StructBuilder<'heap, A, 4> = StructBuilder::new(); + self.properties.finish_in(&mut builder, sym::properties); + self.metadata + .map(|partial| partial.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::metadata); + self.link_data + .filter(PartialLinkData::has_value) + .map(|partial| partial.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::link_data, alloc.clone()); + self.encodings + .map(|partial| partial.finish_in(interner, alloc.clone())) + .finish_in(&mut builder, sym::encodings); + + let value = Value::Struct(builder.finish(interner, alloc.clone())); + Value::Opaque(Opaque::new(sym::path::Entity, Rc::new_in(value, alloc))) + } + + #[expect(clippy::too_many_lines)] + fn hydrate_from_postgres( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + path: EntityPath, + r#type: TypeId, + column: Indexed, + row: &Row, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let row_hydration_error = |source| BridgeError::RowHydration { column, source }; + + match path { + EntityPath::Properties => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + let value = decoder.try_decode(r#type, (&value).into(), column)?; + self.properties.set(value); + } + EntityPath::Vectors => unreachable!( + "entity vectors should never reach postgres compilation; the placement pass \ + should have rejected this" + ), + EntityPath::RecordId => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + + let entity_id = &value["entity_id"]; + let edition_id = &value["edition_id"]; + + self.hydrate_entity_id(env, decoder, column, entity_id)?; + self.hydrate_edition_id(env, decoder, column, edition_id)?; + } + EntityPath::EntityId => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + + self.hydrate_entity_id(env, decoder, column, &value)?; + } + EntityPath::WebId => { + let value: Uuid = row.try_get(column.index).map_err(row_hydration_error)?; + self.hydrate_web_id( + env, + decoder, + column, + JsonValueRef::String(&value.hyphenated().to_string()), + )?; + } + EntityPath::EntityUuid => { + let value: Uuid = row.try_get(column.index).map_err(row_hydration_error)?; + self.hydrate_entity_uuid( + env, + decoder, + column, + JsonValueRef::String(&value.hyphenated().to_string()), + )?; + } + EntityPath::DraftId => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + let value = value.map(|uuid| uuid.hyphenated().to_string()); + + self.hydrate_draft_id( + env, + decoder, + column, + value.as_deref().map(JsonValueRef::String), + )?; + } + EntityPath::EditionId => { + let value: Uuid = row.try_get(column.index).map_err(row_hydration_error)?; + self.hydrate_edition_id( + env, + decoder, + column, + JsonValueRef::String(&value.hyphenated().to_string()), + )?; + } + EntityPath::TemporalVersioning => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + let transaction_time = &value["transaction_time"]; + let decision_time = &value["decision_time"]; + + self.hydrate_decision_time(env, decoder, column, decision_time)?; + self.hydrate_transaction_time(env, decoder, column, transaction_time)?; + } + EntityPath::DecisionTime => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + self.hydrate_decision_time(env, decoder, column, &value)?; + } + EntityPath::TransactionTime => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + self.hydrate_transaction_time(env, decoder, column, &value)?; + } + EntityPath::EntityTypeIds => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + let value = decoder.try_decode(r#type, (&value).into(), column)?; + hydrate!(self->metadata->entity_type_ids = value); + } + EntityPath::Archived => { + let value: bool = row.try_get(column.index).map_err(row_hydration_error)?; + hydrate!(self->metadata->archived = Value::Integer(Int::from(value))); + } + EntityPath::Confidence => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + hydrate!(self->metadata->confidence = value.map(Num::from).map(Value::Number)); + } + EntityPath::ProvenanceInferred => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + + let value = decoder.try_decode(r#type, (&value).into(), column)?; + hydrate!(self->metadata->provenance->inferred = value); + } + EntityPath::ProvenanceEdition => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + + let value = decoder.try_decode(r#type, (&value).into(), column)?; + hydrate!(self->metadata->provenance->edition = value); + } + EntityPath::PropertyMetadata => { + let value: serde_json::Value = + row.try_get(column.index).map_err(row_hydration_error)?; + + let value = decoder.try_decode(r#type, (&value).into(), column)?; + hydrate!(self->metadata->property_metadata = value); + } + EntityPath::LeftEntityWebId => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + + let Some(value) = value else { + self.link_data.null(); + return Ok(()); + }; + + let value = decoder.try_decode( + r#type, + JsonValueRef::String(&value.hyphenated().to_string()), + column, + )?; + hydrate!(self->link_data->left_entity_id->web_id = value); + } + EntityPath::LeftEntityUuid => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + + let Some(value) = value else { + self.link_data.null(); + return Ok(()); + }; + + let value = decoder.try_decode( + r#type, + JsonValueRef::String(&value.hyphenated().to_string()), + column, + )?; + hydrate!(self->link_data->left_entity_id->entity_uuid = value); + } + EntityPath::RightEntityWebId => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + + let Some(value) = value else { + self.link_data.null(); + return Ok(()); + }; + + let value = decoder.try_decode( + r#type, + JsonValueRef::String(&value.hyphenated().to_string()), + column, + )?; + hydrate!(self->link_data->right_entity_id->web_id = value); + } + EntityPath::RightEntityUuid => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + + let Some(value) = value else { + self.link_data.null(); + return Ok(()); + }; + + let value = decoder.try_decode( + r#type, + JsonValueRef::String(&value.hyphenated().to_string()), + column, + )?; + hydrate!(self->link_data->right_entity_id->entity_uuid = value); + } + EntityPath::LeftEntityConfidence => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + hydrate!(self->link_data->left_entity_confidence = value.map(Num::from).map(Value::Number)); + } + EntityPath::RightEntityConfidence => { + let value: Option = row.try_get(column.index).map_err(row_hydration_error)?; + hydrate!(self->link_data->right_entity_confidence = value.map(Num::from).map(Value::Number)); + } + EntityPath::LeftEntityProvenance => { + let value: Option = + row.try_get(column.index).map_err(row_hydration_error)?; + + let Some(value) = value else { + self.link_data.try_null(); + return Ok(()); + }; + + let value = decoder.try_decode(r#type, (&value).into(), column)?; + hydrate!(self->link_data->left_entity_provenance = value); + } + EntityPath::RightEntityProvenance => { + let value: Option = + row.try_get(column.index).map_err(row_hydration_error)?; + + let Some(value) = value else { + self.link_data.try_null(); + return Ok(()); + }; + + let value = decoder.try_decode(r#type, (&value).into(), column)?; + hydrate!(self->link_data->right_entity_provenance = value); + } + } + + Ok(()) + } + + fn hydrate_entity_id( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: &serde_json::Value, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let web_id = &value["web_id"]; + let entity_uuid = &value["entity_uuid"]; + + self.hydrate_web_id(env, decoder, column, web_id)?; + self.hydrate_entity_uuid(env, decoder, column, entity_uuid)?; + self.hydrate_draft_id(env, decoder, column, value.get("draft_id"))?; + + Ok(()) + } + + fn hydrate_decision_time<'value>( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: impl Into>, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let value = decoder.try_decode( + EntityPath::DecisionTime.expect_type(env), + value.into(), + column, + )?; + hydrate!(self->metadata->temporal_versioning->decision_time = value); + + Ok(()) + } + + fn hydrate_transaction_time<'value>( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: impl Into>, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let value = decoder.try_decode( + EntityPath::TransactionTime.expect_type(env), + value.into(), + column, + )?; + hydrate!(self->metadata->temporal_versioning->transaction_time = value); + + Ok(()) + } + + fn hydrate_web_id<'value>( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: impl Into>, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let value = decoder.try_decode(EntityPath::WebId.expect_type(env), value.into(), column)?; + hydrate!(self->metadata->record_id->entity_id->web_id = value); + + Ok(()) + } + + fn hydrate_entity_uuid<'value>( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: impl Into>, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let value = decoder.try_decode( + EntityPath::EntityUuid.expect_type(env), + value.into(), + column, + )?; + hydrate!(self->metadata->record_id->entity_id->entity_uuid = value); + + Ok(()) + } + + fn hydrate_draft_id<'value>( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: Option>>, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let value = value + .map(Into::into) + .filter(|value| !matches!(value, JsonValueRef::Null)) + .map(|value| decoder.try_decode(EntityPath::DraftId.expect_type(env), value, column)) + .transpose()?; + hydrate!(self->metadata->record_id->entity_id->draft_id = value); + + Ok(()) + } + + fn hydrate_edition_id<'value>( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + column: Indexed, + value: impl Into>, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + let value = + decoder.try_decode(EntityPath::EditionId.expect_type(env), value.into(), column)?; + hydrate!(self->metadata->record_id->edition_id = value); + + Ok(()) + } +} + +impl Default for PartialEntity<'_, A> { + fn default() -> Self { + Self { + properties: Required::Skipped, + metadata: Required::Skipped, + link_data: Optional::Skipped, + encodings: Required::Skipped, + } + } +} + +pub(crate) enum Partial<'heap, A: Allocator> { + Entity(PartialEntity<'heap, A>), +} + +impl<'heap, A: Allocator> Partial<'heap, A> { + pub(crate) fn new(vertex_type: VertexType) -> Self { + match vertex_type { + VertexType::Entity => Self::Entity(PartialEntity::default()), + } + } + + pub(crate) fn hydrate_from_postgres( + &mut self, + env: &Environment<'heap>, + decoder: &Decoder<'_, 'heap, A>, + path: TraversalPath, + r#type: TypeId, + column: Indexed, + row: &Row, + ) -> Result<(), BridgeError<'heap>> + where + A: Clone, + { + match (self, path) { + (Self::Entity(entity), TraversalPath::Entity(entity_path)) => { + entity.hydrate_from_postgres(env, decoder, entity_path, r#type, column, row) + } + } + } + + pub(crate) fn finish_in(self, interner: &Interner<'heap>, alloc: A) -> Value<'heap, A> + where + A: Clone, + { + match self { + Self::Entity(entity) => entity.finish_in(interner, alloc), + } + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/postgres.rs b/libs/@local/hashql/eval/src/orchestrator/postgres.rs new file mode 100644 index 00000000000..15c3437b59a --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/postgres.rs @@ -0,0 +1,249 @@ +//! Continuation state for resuming interpreter execution after a database +//! round-trip. +//! +//! When a compiled query returns continuation columns (target block, locals, +//! values), they arrive as flat nullable fields in the result row. This module +//! provides [`PartialPostgresState`] for accumulating those fields during +//! hydration, and [`PostgresState`] for the validated, decoded form that can +//! be flushed into a [`CallStack`] to resume interpretation at the correct +//! basic block with the correct local variable bindings. +//! +//! [`CallStack`]: hashql_mir::interpret::CallStack + +use core::alloc::Allocator; + +use hashql_mir::{ + body::{Body, basic_block::BasicBlockId, local::Local}, + def::DefId, + interpret::{CallStack, RuntimeError, value::Value}, + pass::execution::IslandId, +}; +use tokio_postgres::Row; + +use super::{Indexed, codec::decode::Decoder, error::BridgeError, partial::Optional}; +use crate::{ + orchestrator::partial::Hydrated, + postgres::{ColumnDescriptor, ContinuationField}, +}; + +/// In-progress continuation state being assembled from result row columns. +/// +/// Each continuation is identified by a `(body, island)` pair. As columns are +/// encountered during hydration, [`hydrate`](Self::hydrate) populates the +/// target block, locals, and values fields. Once all columns for a row have +/// been processed, [`finish_in`](Self::finish_in) validates completeness and +/// decodes the JSON values into typed [`Value`]s, producing a +/// [`PostgresState`] (or `None` if the continuation target was `NULL`, +/// indicating no resumption is needed). +/// +/// [`Value`]: hashql_mir::interpret::value::Value +pub(crate) struct PartialPostgresState { + pub body: DefId, + pub island: IslandId, + + target: Optional, + locals: Optional>, + values: Optional>, +} + +impl PartialPostgresState { + pub(crate) const fn new(body: DefId, island: IslandId) -> Self { + Self { + body, + island, + target: Optional::Skipped, + locals: Optional::Skipped, + values: Optional::Skipped, + } + } + + pub(crate) fn hydrate<'heap>( + &mut self, + column: Indexed, + field: ContinuationField, + row: &Row, + alloc: A, + ) -> Result<(), BridgeError<'heap>> { + match field { + ContinuationField::Block => { + // row is a single (nullable) block id, encoded as an int + let block_id: Option = + row.try_get(column.index) + .map_err(|error| BridgeError::RowHydration { + column, + source: error, + })?; + + match block_id { + Some(block_id) => { + let block_id = u32::try_from(block_id).map_err(|_err| { + BridgeError::InvalidContinuationBlockId { + body: self.body, + block_id, + } + })?; + + self.target = Optional::Value(BasicBlockId::new(block_id)); + } + None => { + self.target = Optional::Null(()); + } + } + } + ContinuationField::Locals => { + let locals: Option> = + row.try_get(column.index) + .map_err(|error| BridgeError::RowHydration { + column, + source: error, + })?; + + match locals { + Some(locals) => { + let mut result = Vec::with_capacity_in(locals.len(), alloc); + for local in locals { + let local = u32::try_from(local).map(Local::new).map_err(|_err| { + BridgeError::InvalidContinuationLocal { + body: self.body, + local, + } + })?; + result.push(local); + } + self.locals = Optional::Value(result); + } + None => { + self.locals = Optional::Null(()); + } + } + } + ContinuationField::Values => { + let values: Option> = + row.try_get(column.index) + .map_err(|error| BridgeError::RowHydration { + column, + source: error, + })?; + + match values { + Some(values) => { + self.values = Optional::Value(values); + } + None => { + self.values = Optional::Null(()); + } + } + } + } + + Ok(()) + } + + pub(crate) fn finish_in<'heap>( + self, + decoder: &Decoder<'_, 'heap, A>, + body: &Body<'heap>, + alloc: A, + ) -> Result>, BridgeError<'heap>> + where + A: Clone, + { + debug_assert_eq!(body.id, self.body); + + let target = match self.target { + Hydrated::Null(()) => return Ok(None), + Hydrated::Skipped => { + return Err(BridgeError::IncompleteContinuation { + body: self.body, + field: "target", + }); + } + Hydrated::Value(target) => target, + }; + + let locals = match self.locals { + Optional::Null(()) | Optional::Skipped => { + return Err(BridgeError::IncompleteContinuation { + body: self.body, + field: "locals", + }); + } + Optional::Value(locals) => locals, + }; + let values = match self.values { + Optional::Null(()) | Optional::Skipped => { + return Err(BridgeError::IncompleteContinuation { + body: self.body, + field: "values", + }); + } + Optional::Value(values) => values, + }; + debug_assert_eq!(locals.len(), values.len()); + + let mut evaluated_locals = Vec::with_capacity_in(locals.len(), alloc); + + for (local, value) in locals.into_iter().zip(values) { + let r#type = body.local_decls[local].r#type; + + let value = decoder.decode(r#type, (&value).into()).map_err(|source| { + BridgeError::ContinuationDeserialization { + body: self.body, + local, + source, + } + })?; + evaluated_locals.push((local, value)); + } + + Ok(Some(PostgresState { + body: self.body, + island: self.island, + + target, + locals: evaluated_locals, + })) + } +} + +/// Validated continuation state ready to be applied to a [`CallStack`]. +/// +/// Contains the target [`BasicBlockId`] to jump to and the decoded local +/// variable bindings. Call [`flush`](Self::flush) to write these into the +/// callstack's current frame, advancing execution to the continuation point. +/// +/// [`CallStack`]: hashql_mir::interpret::CallStack +pub(crate) struct PostgresState<'heap, A: Allocator> { + pub body: DefId, + pub island: IslandId, + + target: BasicBlockId, + locals: Vec<(Local, Value<'heap, A>), A>, +} + +impl<'heap, A: Allocator> PostgresState<'heap, A> { + /// Writes the continuation state into `callstack`, setting the current + /// block to the target and populating locals with the decoded values. + pub(crate) fn flush<'ctx, E>( + &self, + callstack: &mut CallStack<'ctx, 'heap, A>, + ) -> Result<(), RuntimeError<'heap, E, A>> + where + A: Clone, + { + callstack.set_current_block_unchecked(self.target)?; + + // We must now advance the *last frame* (the current frame), with the current block + // (unsafely) + // And then get all locals and values into the frame + let frame_locals = callstack + .locals_mut() + .unwrap_or_else(|_err: RuntimeError<'heap, !, A>| unreachable!()); + + for (local, value) in &self.locals { + *frame_locals.local_mut(*local) = value.clone(); + } + + Ok(()) + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/request/graph_read.rs b/libs/@local/hashql/eval/src/orchestrator/request/graph_read.rs new file mode 100644 index 00000000000..c1c95e39f24 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/request/graph_read.rs @@ -0,0 +1,450 @@ +//! Orchestrator for [`GraphRead`] suspensions. +//! +//! A [`GraphRead`] suspension is the interpreter's request to load vertices +//! from the graph store. The [`GraphReadOrchestrator`] resolves it by: +//! +//! 1. Looking up the pre-compiled SQL query for the suspension's `(body, block)` pair. +//! 2. Encoding the query parameters from the interpreter's current state. +//! 3. Executing the query against PostgreSQL and streaming rows. +//! 4. For each row: hydrating flat columns into a nested vertex [`Value`], decoding any +//! continuation state, running client-side filter chains (which may themselves involve +//! interpreter and postgres interleaving), and accumulating accepted values via a [`Tail`] +//! strategy. +//! 5. Packaging the collected output into a [`Continuation`] for the interpreter to resume with. +//! +//! [`GraphRead`]: hashql_mir::body::terminator::GraphRead +//! [`Value`]: hashql_mir::interpret::value::Value +//! [`Continuation`]: hashql_mir::interpret::suspension::Continuation +//! [`Tail`]: super::super::tail::Tail + +use core::{alloc::Allocator, pin::pin}; + +use futures_lite::StreamExt as _; +use hashql_mir::{ + body::terminator::{GraphRead, GraphReadBody}, + def::DefId, + interpret::{ + CallStack, Inputs, Runtime, RuntimeConfig, RuntimeError, Yield, + suspension::{Continuation, GraphReadSuspension}, + value::Value, + }, + pass::execution::TargetId, +}; +use tokio_postgres::{Client, Row}; + +use crate::{ + orchestrator::{ + Indexed, Orchestrator, + codec::{decode::Decoder, encode::encode_parameter_in}, + error::BridgeError, + events::{Event, EventLog}, + partial::Partial, + postgres::{PartialPostgresState, PostgresState}, + tail::Tail, + }, + postgres::{ColumnDescriptor, PreparedQuery}, +}; + +type PartialState<'heap, L> = (Partial<'heap, L>, Vec, L>); +type State<'heap, L> = (Value<'heap, L>, Vec, L>); + +/// Handler for [`GraphRead`] suspensions. +/// +/// Borrows the parent [`Orchestrator`] for access to the database client, +/// query registry, and evaluation context. All work happens through +/// [`fulfill_in`](Self::fulfill_in), which drives the full pipeline from +/// query execution through row hydration, filtering, and result collection. +/// +/// [`GraphRead`]: hashql_mir::body::terminator::GraphRead +/// [`Orchestrator`]: super::super::Orchestrator +pub(crate) struct GraphReadOrchestrator<'or, 'ctx, 'env, 'heap, C, E, A: Allocator> { + inner: &'or Orchestrator<'ctx, 'env, 'heap, C, E, A>, +} + +#[expect(clippy::future_not_send)] +impl<'or, 'ctx, 'env, 'heap, C: AsRef, E: EventLog, A: Allocator> + GraphReadOrchestrator<'or, 'ctx, 'env, 'heap, C, E, A> +{ + pub(crate) const fn new(orchestrator: &'or Orchestrator<'ctx, 'env, 'heap, C, E, A>) -> Self { + Self { + inner: orchestrator, + } + } + + fn postgres_hydrate_in( + &self, + decoder: &Decoder<'ctx, 'heap, L>, + + query: &PreparedQuery<'_, impl Allocator>, + row: &Row, + + alloc: L, + ) -> Result, RuntimeError<'heap, BridgeError<'heap>, L>> { + let mut partial = Partial::new(query.vertex_type); + let mut partial_states = Vec::new_in(alloc.clone()); + + for (index, &column) in query.columns.iter().enumerate() { + match column { + ColumnDescriptor::Path { path, r#type } => { + partial + .hydrate_from_postgres( + self.inner.context.env, + decoder, + path, + r#type, + Indexed::new(index, column), + row, + ) + .map_err(RuntimeError::Suspension)?; + } + ColumnDescriptor::Continuation { + body, + island, + field, + } => { + #[expect( + clippy::option_if_let_else, + reason = "this is required for borrowing because we borrow and push to \ + states" + )] + let state = if let Some(state) = partial_states.iter_mut().find( + |interpreter: &&mut PartialPostgresState<_>| { + interpreter.body == body && interpreter.island == island + }, + ) { + state + } else { + partial_states.push_mut(PartialPostgresState::new(body, island)) + }; + + state + .hydrate(Indexed::new(index, column), field, row, alloc.clone()) + .map_err(RuntimeError::Suspension)?; + } + } + } + + Ok((partial, partial_states)) + } + + fn finish_in( + &self, + + decoder: &Decoder<'ctx, 'heap, L>, + + partial: Partial<'heap, L>, + partial_states: Vec, L>, + + alloc: L, + ) -> Result, RuntimeError<'heap, BridgeError<'heap>, L>> { + let mut states = Vec::with_capacity_in(partial_states.len(), alloc.clone()); + + for state in partial_states { + let body = &self.inner.context.bodies[state.body]; + + let state = state + .finish_in(decoder, body, alloc.clone()) + .map_err(RuntimeError::Suspension)?; + + if let Some(state) = state { + states.push(state); + } + } + + let entity = partial.finish_in(self.inner.context.interner, alloc); + + Ok((entity, states)) + } + + #[expect(clippy::too_many_arguments)] + async fn process_row_filter_in( + &self, + inputs: &Inputs<'heap, L>, + + runtime: &mut Runtime<'ctx, 'heap, L>, + states: &[PostgresState<'heap, L>], + + body: DefId, + + entity: &Value<'heap, L>, + env: &Value<'heap, L>, + + alloc: L, + ) -> Result, L>> { + let residual = self.inner.context.execution.lookup(body).ok_or_else(|| { + RuntimeError::Suspension(BridgeError::MissingExecutionResidual { body }) + })?; + + let Ok(mut callstack) = CallStack::new_in( + &self.inner.context.bodies[body], + [Ok::<_, !>(env.clone()), Ok(entity.clone())], + alloc.clone(), + ); + + self.inner.event_log.log(Event::FilterStarted { body }); + + let eval = 'eval: loop { + let (island_id, island_node) = residual.islands.lookup(callstack.current_block()?); + let target = island_node.target(); + + self.inner.event_log.log(Event::IslandEntered { + body, + island: island_id, + target, + }); + + match target { + TargetId::Interpreter => { + loop { + let next = runtime.run_until_transition(&mut callstack, |target| { + residual.islands.lookup(target).0 == island_id + })?; + + match next { + core::ops::ControlFlow::Continue(Yield::Return(value)) => { + let Value::Integer(value) = value else { + return Err(RuntimeError::Suspension( + BridgeError::InvalidFilterReturn { body }, + )); + }; + + let Some(value) = value.as_bool() else { + return Err(RuntimeError::Suspension( + BridgeError::InvalidFilterReturn { body }, + )); + }; + + break 'eval value; + } + core::ops::ControlFlow::Continue(Yield::Suspension(suspension)) => { + let continuation = Box::pin(self.inner.fulfill_in( + inputs, + &callstack, + suspension, + alloc.clone(), + )) + .await?; + + continuation.apply(&mut callstack)?; + } + core::ops::ControlFlow::Break(_) => { + // We're finished, this means, and the next island is + // up. To determine the next island we simply break. + break; + } + } + } + } + TargetId::Postgres => { + // Postgres is special, because we hoist any computation directly + // into the initial query. + // There can be two different cases here: + // 1. The value is NULL, meaning that the filter has already been fully + // evaluated in the postgres query + // 2. The value is not NULL, which means that we need to continue evaluation of + // the filter body. + let Some(state) = states + .iter() + .find(|state| state.body == body && state.island == island_id) + else { + // This is the implicit value, in case that the where clause + // upstream has been evaluated. If the postgres query has + // produced a value, it must mean that the condition must've + // been true. + self.inner + .event_log + .log(Event::ContinuationImplicitTrue { body }); + break 'eval true; + }; + + // We must not flush the locals of the body to the values that have + // been captured, and advance the pointer. + state.flush(&mut callstack)?; + self.inner.event_log.log(Event::ContinuationFlushed { + body, + island: island_id, + }); + } + TargetId::Embedding => { + // TODO: in the future this may benefit from a dispatch barrier, the + // idea that we wait for sufficient embedding calls to the same + // island to dispatch. Must be smaller than the buffer size. + unimplemented!() + } + } + }; + + Ok(eval) + } + + async fn process_row_transform_in( + &self, + inputs: &Inputs<'heap, L>, + parent: &CallStack<'ctx, 'heap, L>, + + states: &[PostgresState<'heap, L>], + + entity: Value<'heap, L>, + + read: &GraphRead<'heap>, + + alloc: L, + ) -> Result>, RuntimeError<'heap, BridgeError<'heap>, L>> { + let mut runtime = Runtime::new_in( + RuntimeConfig::default(), + self.inner.context.bodies, + inputs, + alloc.clone(), + ); + + for body in &read.body { + match body { + &GraphReadBody::Filter(body, env) => { + let env = parent.locals()?.local(env)?; + + runtime.reset(); + let result = self + .process_row_filter_in( + inputs, + &mut runtime, + states, + body, + &entity, + env, + alloc.clone(), + ) + .await?; + + // Filters are short circuiting and act as `&&`, meaning if one is false, all + // are. + if result { + self.inner.event_log.log(Event::FilterAccepted { body }); + } else { + self.inner.event_log.log(Event::FilterRejected { body }); + return Ok(None); + } + } + } + } + + Ok(Some(entity)) + } + + async fn process_row_in( + &self, + inputs: &Inputs<'heap, L>, + parent: &CallStack<'ctx, 'heap, L>, + + read: &GraphRead<'heap>, + query: &PreparedQuery<'heap, impl Allocator>, + + row: Row, + + alloc: L, + ) -> Result>, RuntimeError<'heap, BridgeError<'heap>, L>> { + let decoder = Decoder::new( + self.inner.context.env, + self.inner.context.interner, + alloc.clone(), + ); + + let (partial, partial_states) = + self.postgres_hydrate_in(&decoder, query, &row, alloc.clone())?; + + let (entity, states) = self.finish_in(&decoder, partial, partial_states, alloc.clone())?; + + // Now that we have the completed states, it's time to fulfill the graph read, by running + // everything through the filter chain. + // This is sequential in nature, because in the future filters may depend on the mapped + // value. The parallelisation opportunity of sequential filters isn't applicable here, + // instead that should be done inside either the HIR or MIR. + self.process_row_transform_in(inputs, parent, &states, entity, read, alloc) + .await + } + + // The entrypoint for graph read operations. The entrypoint is *always* postgres, because that's + // the primary data store. + pub(crate) async fn fulfill_in( + &self, + inputs: &Inputs<'heap, L>, + callstack: &CallStack<'ctx, 'heap, L>, + suspension @ GraphReadSuspension { + body, + block, + read, + axis: _, + }: GraphReadSuspension<'env, 'heap>, + alloc: L, + ) -> Result, RuntimeError<'heap, BridgeError<'heap>, L>> { + // Because postgres is our source of truth, it means that any graph read suspension must be + // resolved by querying postgres first. + let query = + self.inner.queries.find(body, block).ok_or_else(|| { + RuntimeError::Suspension(BridgeError::QueryLookup { body, block }) + })?; + let statement = query.transpile().to_string(); + + let locals = callstack.locals().map_err(RuntimeError::widen)?; + let mut params = Vec::with_capacity_in(query.parameters.len(), alloc.clone()); + for param in query.parameters.iter().map(|parameter| { + encode_parameter_in( + parameter, + inputs, + &suspension.axis, + |local, field| { + let value = locals.local(local)?; + value.project(field) + }, + alloc.clone(), + ) + }) { + params.push(param?); + } + + self.inner + .event_log + .log(Event::QueryExecuted { body, block }); + + // The actual data and entities that we need to take a look at. + let response = self + .inner + .client + .as_ref() + .query_raw(&statement, params.iter().map(|param| &**param)) + .await + .map_err(|source| BridgeError::QueryExecution { + sql: statement.clone(), + source, + }) + .map_err(RuntimeError::Suspension)?; + + let mut response = pin!(response); + + // TODO: parallelisation opportunity + let mut output = Tail::new(read.tail); + while let Some(row) = response.next().await { + let row = row + .map_err(|error| BridgeError::QueryExecution { + sql: statement.clone(), + source: error, + }) + .map_err(RuntimeError::Suspension)?; + + self.inner.event_log.log(Event::RowReceived); + + let item = self + .process_row_in(inputs, callstack, read, query, row, alloc.clone()) + .await?; + + if let Some(item) = item { + self.inner.event_log.log(Event::RowAccepted); + output.push(item); + } else { + self.inner.event_log.log(Event::RowRejected); + } + } + + let output = output.finish(); + Ok(suspension.resolve(output)) + } +} diff --git a/libs/@local/hashql/eval/src/orchestrator/request/mod.rs b/libs/@local/hashql/eval/src/orchestrator/request/mod.rs new file mode 100644 index 00000000000..a274e6d7a1e --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/request/mod.rs @@ -0,0 +1,10 @@ +//! Per-suspension-type request handlers. +//! +//! Each suspension variant has a dedicated orchestrator that knows how to +//! fulfill it. Currently the only variant is [`GraphRead`], handled by +//! [`GraphReadOrchestrator`]. +//! +//! [`GraphRead`]: hashql_mir::body::terminator::GraphRead + +mod graph_read; +pub(crate) use self::graph_read::GraphReadOrchestrator; diff --git a/libs/@local/hashql/eval/src/orchestrator/tail.rs b/libs/@local/hashql/eval/src/orchestrator/tail.rs new file mode 100644 index 00000000000..c155be562e1 --- /dev/null +++ b/libs/@local/hashql/eval/src/orchestrator/tail.rs @@ -0,0 +1,49 @@ +//! Result accumulation strategies for graph read operations. +//! +//! After each row is hydrated and passes any filter chains, the resulting +//! [`Value`] must be collected into a final output. The [`Tail`] enum +//! determines the accumulation strategy, currently only [`Collect`], which +//! gathers all values into a [`List`]. +//! +//! [`Value`]: hashql_mir::interpret::value::Value +//! [`Collect`]: Tail::Collect +//! [`List`]: hashql_mir::interpret::value::List + +use core::alloc::Allocator; + +use hashql_mir::{ + body::terminator::GraphReadTail, + interpret::value::{self, Value}, +}; + +/// Accumulator for row results, determined by the [`GraphReadTail`] variant. +/// +/// Created once per graph read suspension, receives each post-filter value via +/// [`push`](Self::push), and produces the final output via +/// [`finish`](Self::finish). +pub(crate) enum Tail<'heap, A: Allocator> { + Collect(value::List<'heap, A>), +} + +impl<'heap, A: Allocator> Tail<'heap, A> { + pub(crate) fn new(tail: GraphReadTail) -> Self { + match tail { + GraphReadTail::Collect => Self::Collect(value::List::new()), + } + } + + pub(crate) fn push(&mut self, value: value::Value<'heap, A>) + where + A: Clone, + { + match self { + Self::Collect(list) => list.push_back(value), + } + } + + pub(crate) fn finish(self) -> Value<'heap, A> { + match self { + Self::Collect(list) => Value::List(list), + } + } +} diff --git a/libs/@local/hashql/eval/src/postgres/continuation.rs b/libs/@local/hashql/eval/src/postgres/continuation.rs index b722e1e0ab9..b8f6600e012 100644 --- a/libs/@local/hashql/eval/src/postgres/continuation.rs +++ b/libs/@local/hashql/eval/src/postgres/continuation.rs @@ -45,6 +45,32 @@ impl ContinuationAlias { } } +/// Continuation fields returned to the bridge in the `SELECT` list. +/// +/// A subset of [`ContinuationColumn`] that excludes internal-only columns +/// ([`Entry`](ContinuationColumn::Entry) and [`Filter`](ContinuationColumn::Filter)). +/// Each variant corresponds to a column the bridge must decode to reconstruct +/// island exit control flow and live-out locals. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ContinuationField { + /// The target basic block id for island exits. + Block, + /// Array of local ids being transferred on island exit. + Locals, + /// Array of jsonb values corresponding to [`Self::Locals`]. + Values, +} + +impl From for ContinuationColumn { + fn from(value: ContinuationField) -> Self { + match value { + ContinuationField::Block => Self::Block, + ContinuationField::Locals => Self::Locals, + ContinuationField::Values => Self::Values, + } + } +} + /// All column names used within the continuation LATERAL subquery and the /// `continuation` composite type. /// diff --git a/libs/@local/hashql/eval/src/postgres/filter/mod.rs b/libs/@local/hashql/eval/src/postgres/filter/mod.rs index fb854e37498..f900b3f08f0 100644 --- a/libs/@local/hashql/eval/src/postgres/filter/mod.rs +++ b/libs/@local/hashql/eval/src/postgres/filter/mod.rs @@ -96,7 +96,10 @@ impl From for Expression { let row = match continuation { Continuation::Return { filter } => { vec![ - filter.grouped().cast(PostgresType::Boolean), + filter + .grouped() + .cast(PostgresType::Boolean) + .coalesce(Self::Constant(query::Constant::Boolean(false))), null.clone(), null.clone(), null, @@ -184,7 +187,20 @@ fn finish_switch_int( let discriminant = Box::new(discriminant.grouped().cast(PostgresType::Int)); let mut discriminant = Some(discriminant); - let mut conditions = Vec::with_capacity(targets.values().len()); + // +1 for the NULL guard: a NULL discriminant means the computation could + // not be evaluated (e.g. missing JSONB key), so we reject the row. + let mut conditions = Vec::with_capacity(targets.values().len() + 1); + + conditions.push(( + Expression::Unary(UnaryExpression { + op: UnaryOperator::IsNull, + expr: discriminant.clone().unwrap_or_else(|| unreachable!()), + }), + Continuation::Return { + filter: Expression::Constant(query::Constant::Boolean(false)), + } + .into(), + )); for (index, (&value, then)) in targets.values().iter().zip(branch_results).enumerate() { let is_last = index == targets.values().len() - 1; @@ -219,6 +235,8 @@ pub(crate) struct GraphReadFilterCompiler<'ctx, 'heap, A: Allocator = Global, S: context: &'ctx EvalContext<'ctx, 'heap, A>, body: &'ctx Body<'heap>, + env: Local, + /// MIR local → SQL expression mapping, with snapshot/rollback for branching. locals: LocalSnapshotVec, AppendOnly, S>, diagnostics: EvalDiagnosticIssues, @@ -230,6 +248,7 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea pub(crate) fn new( context: &'ctx EvalContext<'ctx, 'heap, A>, body: &'ctx Body<'heap>, + env: Local, scratch: S, ) -> Self where @@ -238,6 +257,7 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea Self { context, body, + env, locals: IdSnapshotVec::new_in(scratch.clone()), diagnostics: DiagnosticIssues::new(), scratch, @@ -295,8 +315,8 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea }, rest @ .., ] => { - let param = db.parameters.env(self.body.id, *field); - (param.into(), rest) + let param = db.parameters.env(self.env, *field); + (param.to_expr(), rest) } [..] => { self.diagnostics.push(invalid_env_projection(span)); @@ -334,7 +354,7 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea ProjectionKind::Field(field_index) => { Expression::Constant(query::Constant::U32(field_index.as_u32())) } - &ProjectionKind::FieldByName(symbol) => db.parameters.symbol(symbol).into(), + &ProjectionKind::FieldByName(symbol) => db.parameters.symbol(symbol).to_expr(), &ProjectionKind::Index(local) => self .locals .lookup(local) @@ -360,11 +380,11 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea constant: &Constant<'heap>, ) -> Expression { match constant { - Constant::Int(int) if let Some(uint) = int.as_u32() => { + Constant::Int(int) if let Ok(uint) = u32::try_from(int.as_uint()) => { Expression::Constant(query::Constant::U32(uint)) } - &Constant::Int(int) => db.parameters.int(int).into(), - &Constant::Primitive(primitive) => db.parameters.primitive(primitive).into(), + &Constant::Int(int) => db.parameters.int(int).to_expr(), + &Constant::Primitive(primitive) => db.parameters.primitive(primitive).to_expr(), // Unit is the zero-sized type, represented as JSON `null` inside jsonb values. Constant::Unit => Expression::Constant(query::Constant::JsonNull), Constant::FnPtr(_) => { @@ -418,17 +438,21 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea // Operands coming from jsonb extraction are untyped from Postgres' perspective. // Arithmetic and bitwise operators need explicit casts; comparisons work on jsonb // directly. - let (op, cast) = match *op { - BinOp::Add => (BinaryOperator::Add, Some(PostgresType::Numeric)), - BinOp::Sub => (BinaryOperator::Subtract, Some(PostgresType::Numeric)), - BinOp::BitAnd => (BinaryOperator::BitwiseAnd, Some(PostgresType::BigInt)), - BinOp::BitOr => (BinaryOperator::BitwiseOr, Some(PostgresType::BigInt)), - BinOp::Eq => (BinaryOperator::Equal, None), - BinOp::Ne => (BinaryOperator::NotEqual, None), - BinOp::Lt => (BinaryOperator::Less, None), - BinOp::Lte => (BinaryOperator::LessOrEqual, None), - BinOp::Gt => (BinaryOperator::Greater, None), - BinOp::Gte => (BinaryOperator::GreaterOrEqual, None), + let (op, cast, function) = match *op { + BinOp::Add => (BinaryOperator::Add, Some(PostgresType::Numeric), None), + BinOp::Sub => (BinaryOperator::Subtract, Some(PostgresType::Numeric), None), + BinOp::BitAnd => (BinaryOperator::BitwiseAnd, Some(PostgresType::BigInt), None), + BinOp::BitOr => (BinaryOperator::BitwiseOr, Some(PostgresType::BigInt), None), + BinOp::Eq => (BinaryOperator::Equal, None, Some(query::Function::ToJson)), + BinOp::Ne => ( + BinaryOperator::NotEqual, + None, + Some(query::Function::ToJson), + ), + BinOp::Lt => (BinaryOperator::Less, None, None), + BinOp::Lte => (BinaryOperator::LessOrEqual, None, None), + BinOp::Gt => (BinaryOperator::Greater, None, None), + BinOp::Gte => (BinaryOperator::GreaterOrEqual, None, None), }; if let Some(target) = cast { @@ -436,6 +460,11 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea right = right.grouped().cast(target); } + if let Some(function) = function { + left = Expression::Function(function(Box::new(left))); + right = Expression::Function(function(Box::new(right))); + } + Expression::Binary(BinaryExpression { op, left: Box::new(left), @@ -450,12 +479,12 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea let index = db.parameters.input(*name); match *op { - InputOp::Load { required: _ } => index.into(), + InputOp::Load { required: _ } => index.to_expr(), InputOp::Exists => Expression::Unary(UnaryExpression { op: UnaryOperator::Not, expr: Box::new(Expression::Unary(UnaryExpression { op: UnaryOperator::IsNull, - expr: Box::new(index.into()), + expr: Box::new(index.to_expr()), })), }), } @@ -488,7 +517,7 @@ impl<'ctx, 'heap, A: Allocator, S: Allocator> GraphReadFilterCompiler<'ctx, 'hea let key = db.parameters.symbol(key); let value = self.compile_operand(db, span, value); - expressions.push((key.into(), value)); + expressions.push((key.to_expr(), value)); } // Values are reconstructed to their corresponding tuple and struct definitions diff --git a/libs/@local/hashql/eval/src/postgres/filter/tests.rs b/libs/@local/hashql/eval/src/postgres/filter/tests.rs index 41035645467..ed184c25007 100644 --- a/libs/@local/hashql/eval/src/postgres/filter/tests.rs +++ b/libs/@local/hashql/eval/src/postgres/filter/tests.rs @@ -15,6 +15,7 @@ use hash_graph_postgres_store::store::postgres::query::{Expression, Transpile as use hashql_core::{ heap::{Heap, Scratch}, id::Id as _, + module::std_lib::graph::types::knowledge::entity as entity_types, symbol::sym, r#type::{TypeBuilder, TypeId, environment::Environment}, }; @@ -125,9 +126,11 @@ impl core::fmt::Display for FilterReport { fn compile_filter_islands<'heap>(fixture: &Fixture<'heap>, heap: &'heap Heap) -> FilterReport { let mut scratch = Scratch::new(); let def = fixture.def(); + let interner = Interner::new(heap); let context = EvalContext::new_in( &fixture.env, + &interner, &fixture.bodies, &fixture.execution, heap, @@ -157,7 +160,7 @@ fn compile_filter_islands<'heap>(fixture: &Fixture<'heap>, heap: &'heap Heap) -> let island = &residual.islands[island_id]; let mut db = DatabaseContext::new_in(heap); - let mut compiler = GraphReadFilterCompiler::new(&context, body, Global); + let mut compiler = GraphReadFilterCompiler::new(&context, body, Local::ENV, Global); let expression = compiler.compile_body(&mut db, island); let diagnostics = compiler.into_diagnostics(); @@ -227,9 +230,11 @@ fn compile_full_query_with_mask<'heap>( ) -> QueryReport { let mut scratch = Scratch::new(); let def = fixture.def(); + let interner = Interner::new(heap); let mut context = EvalContext::new_in( &fixture.env, + &interner, &fixture.bodies, &fixture.execution, heap, @@ -253,7 +258,7 @@ fn compile_full_query_with_mask<'heap>( let prepared_query = { let mut compiler = PostgresCompiler::new_in(&mut context, &mut scratch).with_property_mask(property_mask); - compiler.compile(&read) + compiler.compile_graph_read(&read) }; assert!( @@ -558,10 +563,10 @@ fn data_island_provides_without_lateral() { let callee_id = DefId::new(99); - // Light entity path accesses — solver puts everything on Interpreter, creating only a + // Light entity path accesses: solver puts everything on Interpreter, creating only a // Postgres Data island for the entity columns. No Postgres exec island exists. let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], + decl env: (), vertex: (|t| entity_types::types::entity(t, t.unknown(), None)), uuid: ?, func: [fn() -> ?], result: ?; @proj v_uuid = vertex.entity_uuid: ?; @@ -608,7 +613,7 @@ fn provides_drives_select_and_joins() { // bb0 accesses entity paths (Postgres-origin), then bb1 uses a closure (Interpreter). // The Postgres island should provide the accessed paths to the Interpreter island. let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], + decl env: (), vertex: (|t| entity_types::types::entity(t, t.unknown(), None)), uuid: ?, archived: ?, func: [fn() -> ?], result: ?; @proj v_uuid = vertex.entity_uuid: ?, v_metadata = vertex.metadata: ?, @@ -739,7 +744,7 @@ fn property_mask() { // Properties access in bb0 (Postgres Data island) with an apply in bb1 (Interpreter) // ensures Properties and `PropertyMetadata` appear in the provides set. let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { - decl env: (), vertex: [Opaque sym::path::Entity; ?], + decl env: (), vertex: (|t| entity_types::types::entity(t, t.unknown(), None)), props: ?, prop_meta: ?, func: [fn() -> ?], result: ?; @proj v_props = vertex.properties: ?, v_meta = vertex.metadata: ?, @@ -979,6 +984,43 @@ fn unary_bitnot() { assert_snapshot!("unary_bitnot", report.to_string()); } +/// Temporal leaf path: `vertex.metadata.temporal_versioning.decision_time` decomposes +/// the `tstzrange` column into a structured interval with `lower`/`upper`/`lower_inc`/ +/// `upper_inc`/`lower_inf` and epoch-millisecond extraction. +#[test] +fn temporal_decision_time_interval() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let callee_id = DefId::new(99); + + let body = body!(interner, env; [graph::read::filter]@0/2 -> ? { + decl env: (), vertex: [Opaque sym::path::Entity; ?], + decision: ?, func: [fn() -> ?], result: ?; + @proj v_meta = vertex.metadata: ?, + v_temporal = v_meta.temporal_versioning: ?, + v_decision = v_temporal.decision_time: ?; + + bb0() { + decision = load v_decision; + goto bb1(); + }, + bb1() { + func = load callee_id; + result = apply func; + return result; + } + }); + + let fixture = Fixture::new(&heap, env, body); + let report = compile_full_query(&fixture, &heap); + + let settings = snapshot_settings(); + let _guard = settings.bind_to_scope(); + assert_snapshot!("temporal_decision_time_interval", report.to_string()); +} + /// `BinOp::BitAnd` → `BinaryOperator::BitwiseAnd` with `::bigint` casts on both operands. #[test] fn binary_bitand_bigint_cast() { diff --git a/libs/@local/hashql/eval/src/postgres/mod.rs b/libs/@local/hashql/eval/src/postgres/mod.rs index 24c8d72bcf6..71743748071 100644 --- a/libs/@local/hashql/eval/src/postgres/mod.rs +++ b/libs/@local/hashql/eval/src/postgres/mod.rs @@ -36,25 +36,36 @@ use hash_graph_postgres_store::store::postgres::query::{ self, Column, Expression, Identifier, SelectExpression, SelectStatement, Transpile as _, WhereExpression, table::EntityTemporalMetadata, }; -use hashql_core::heap::BumpAllocator; +use hashql_core::{ + debug_panic, + heap::BumpAllocator, + id::Id as _, + r#type::{TypeBuilder, TypeId, environment::LatticeEnvironment}, +}; use hashql_mir::{ body::{ Body, - terminator::{GraphRead, GraphReadBody}, + basic_block::BasicBlockId, + local::Local, + terminator::{GraphRead, GraphReadBody, GraphReadHead, TerminatorKind}, }, - def::DefId, + def::{DefId, DefIdSlice}, pass::{ analysis::dataflow::lattice::HasBottom as _, execution::{ - IslandKind, IslandNode, TargetId, VertexType, + IslandId, IslandKind, IslandNode, TargetId, VertexType, traversal::{EntityPath, TraversalMapLattice, TraversalPath, TraversalPathBitMap}, }, }, }; -pub use self::parameters::{ParameterIndex, Parameters, TemporalAxis}; use self::{ continuation::ContinuationColumn, filter::GraphReadFilterCompiler, projections::Projections, + types::traverse_struct, +}; +pub use self::{ + continuation::ContinuationField, + parameters::{Parameter, ParameterIndex, ParameterValue, Parameters, TemporalAxis}, }; use crate::context::EvalContext; @@ -64,6 +75,7 @@ mod filter; mod parameters; mod projections; mod traverse; +mod types; /// Mutable compilation state accumulated while building a single SQL query. /// @@ -113,8 +125,11 @@ impl DatabaseContext<'_, A> { let tx_param = self .parameters .temporal_axis(TemporalAxis::Transaction) - .into(); - let dt_param = self.parameters.temporal_axis(TemporalAxis::Decision).into(); + .to_expr(); + let dt_param = self + .parameters + .temporal_axis(TemporalAxis::Decision) + .to_expr(); self.where_expression.add_condition(Expression::overlap( Expression::ColumnReference(query::ColumnReference { @@ -135,13 +150,57 @@ impl DatabaseContext<'_, A> { } } +/// Describes a single column in the `SELECT` list of a compiled query. +/// +/// The bridge uses this manifest to decode each column in a result row without +/// parsing column names. Entity field columns carry a [`TraversalPath`] for +/// hydration; continuation columns carry the body/island identity for routing +/// control flow back to the interpreter. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ColumnDescriptor { + /// An entity field produced by the provides set. + /// + /// The [`TraversalPath`] identifies the storage location; the [`TypeId`] is the + /// field's type within the instantiated vertex type, used for type-directed + /// deserialization. + Path { path: TraversalPath, r#type: TypeId }, + /// A decomposed continuation field from an island's `CROSS JOIN LATERAL`. + Continuation { + body: DefId, + island: IslandId, + field: ContinuationField, + }, +} + +impl Display for ColumnDescriptor { + fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Path { path, .. } => write!(fmt, "entity path `{}`", path.as_symbol()), + Self::Continuation { + body, + island, + field, + } => { + write!( + fmt, + "continuation {} (body {body}, island {island})", + ContinuationColumn::from(*field).as_str() + ) + } + } + } +} + /// A fully-compiled SQL query ready for execution. /// -/// Contains the typed query AST ([`SelectStatement`]) and the parameter catalog ([`Parameters`]) -/// that the interpreter uses to bind runtime values in the correct order. +/// Contains the typed query AST ([`SelectStatement`]), the parameter catalog ([`Parameters`]) +/// for binding runtime values, and a column manifest ([`ColumnDescriptor`]s) that tells the +/// bridge how to decode each result column. pub struct PreparedQuery<'heap, A: Allocator> { + pub vertex_type: VertexType, pub parameters: Parameters<'heap, A>, pub statement: SelectStatement, + pub columns: Vec, } impl PreparedQuery<'_, A> { @@ -150,6 +209,32 @@ impl PreparedQuery<'_, A> { } } +/// Registry of compiled SQL queries, indexed by definition and basic block. +/// +/// The SQL lowering pass produces one [`PreparedQuery`] per [`GraphRead`] +/// terminator in the MIR. This struct stores them contiguously in `queries` +/// with `offsets` providing per-definition starting positions, so +/// [`find`](Self::find) can locate the correct query for a given `(DefId, +/// BasicBlockId)` pair. +/// +/// [`GraphRead`]: hashql_mir::body::terminator::GraphRead +pub struct PreparedQueries<'heap, A: Allocator> { + offsets: Box, A>, + queries: Vec<(BasicBlockId, PreparedQuery<'heap, A>), A>, +} + +impl<'heap, A: Allocator> PreparedQueries<'heap, A> { + pub fn find(&self, body: DefId, block: BasicBlockId) -> Option<&PreparedQuery<'heap, A>> { + let start = self.offsets[body]; + let end = self.offsets[body.plus(1)]; + + self.queries[start..end] + .iter() + .find(|(id, _)| *id == block) + .map(|(_, query)| query) + } +} + /// Compiles Postgres-targeted MIR islands into a single PostgreSQL `SELECT`. /// /// Created per evaluation and used to compile [`GraphRead`] terminators. Compilation emits @@ -200,11 +285,42 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> self } + /// Joins the property types across all filter bodies into a single type. + /// + /// Each filter body may operate on a different `Entity`. This computes the + /// least upper bound of all the `T` parameters, producing the unified property type + /// for the query's SELECT list. Returns `unknown` if there are no filter bodies. + fn resolve_property_type(&self, read: &GraphRead<'heap>) -> TypeId { + let mut lattice = LatticeEnvironment::new(self.context.env).without_warnings(); + + read.body + .iter() + .map(|body| match body { + &GraphReadBody::Filter(def_id, _) => { + let vertex = self.context.bodies[def_id].local_decls[Local::VERTEX].r#type; + + let path = EntityPath::Properties.field_path(); + + traverse_struct(self.context.env, vertex, path).unwrap_or_else(|| { + debug_panic!( + "failed to extract property type from vertex type {vertex:?}; the \ + vertex type should contain a resolvable properties field" + ); + + TypeBuilder::synthetic(self.context.env).unknown() + }) + } + }) + .reduce(|lhs, rhs| lattice.join(lhs, rhs)) + .unwrap_or_else(|| TypeBuilder::synthetic(self.context.env).unknown()) + } + /// Returns `None` for data-only islands that produce no SQL. fn compile_graph_read_filter_island( &mut self, db: &mut DatabaseContext<'heap, A>, body: &Body<'heap>, + env: Local, island: &IslandNode, provides: &mut TraversalPathBitMap, ) -> Option { @@ -219,7 +335,7 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> // TODO: we might want a longer lived graph read filter compiler here let expression = self.scratch.scoped(|alloc| { - let mut compiler = GraphReadFilterCompiler::new(self.context, body, &alloc); + let mut compiler = GraphReadFilterCompiler::new(self.context, body, env, &alloc); let expression = compiler.compile_body(db, island); let mut diagnostics = compiler.into_diagnostics(); @@ -236,6 +352,7 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> &mut self, db: &mut DatabaseContext<'heap, A>, def: DefId, + env: Local, provides: &mut TraversalPathBitMap, ) { let body = &self.context.bodies[def]; @@ -251,7 +368,7 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> for (island_id, island) in islands { let Some(expression) = - self.compile_graph_read_filter_island(db, body, island, provides) + self.compile_graph_read_filter_island(db, body, env, island, provides) else { continue; }; @@ -287,15 +404,14 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> } } - /// Compiles a [`GraphRead`] into a [`PreparedQuery`]. - /// - /// [`GraphRead`]: hashql_mir::body::terminator::GraphRead - pub fn compile(&mut self, read: &'ctx GraphRead<'heap>) -> PreparedQuery<'heap, A> + fn compile_graph_read_entity(&mut self, read: &GraphRead<'heap>) -> PreparedQuery<'heap, A> where A: Clone, { let mut db = DatabaseContext::new_in(self.alloc.clone()); + let mut property_type = None; + // Temporal conditions go first - they're always present on the base table // and don't depend on anything the filter body produces. db.add_temporal_conditions(); @@ -304,8 +420,8 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> for body in &read.body { match body { - &GraphReadBody::Filter(def_id, _) => { - self.compile_graph_read_filter(&mut db, def_id, &mut provides); + &GraphReadBody::Filter(def_id, env) => { + self.compile_graph_read_filter(&mut db, def_id, env, &mut provides); } } } @@ -314,6 +430,7 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> // Each EntityPath in `provides` becomes a SELECT expression via eval_entity_path, // which also registers the necessary projection joins in DatabaseContext. let mut select_expressions = vec![]; + let mut columns = Vec::new_in(self.alloc.clone()); for traversal_path in provides[VertexType::Entity].iter() { let TraversalPath::Entity(path) = traversal_path; @@ -328,10 +445,20 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> let alias = Identifier::from(traversal_path.as_symbol().unwrap()); + let field_type = traversal_path + .resolve_type(self.context.env) + .unwrap_or_else(|| { + *property_type.get_or_insert_with(|| self.resolve_property_type(read)) + }); + select_expressions.push(SelectExpression::Expression { expression, alias: Some(alias), }); + columns.push(ColumnDescriptor::Path { + path: traversal_path, + r#type: field_type, + }); } // Decompose each continuation LATERAL into individual columns so the @@ -341,13 +468,18 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> let table_ref = cont_alias.table_ref(); for field in [ - ContinuationColumn::Block, - ContinuationColumn::Locals, - ContinuationColumn::Values, + ContinuationField::Block, + ContinuationField::Locals, + ContinuationField::Values, ] { select_expressions.push(SelectExpression::Expression { - expression: continuation::field_access(&table_ref, field), - alias: Some(cont_alias.field_identifier(field)), + expression: continuation::field_access(&table_ref, field.into()), + alias: Some(cont_alias.field_identifier(field.into())), + }); + columns.push(ColumnDescriptor::Continuation { + body: cont_alias.body, + island: cont_alias.island, + field, }); } } @@ -371,8 +503,57 @@ impl<'eval, 'ctx, 'heap, A: Allocator, S: BumpAllocator> .build(); PreparedQuery { + vertex_type: VertexType::Entity, parameters: db.parameters, statement: query, + columns, } } + + /// Compiles a [`GraphRead`] into a [`PreparedQuery`]. + /// + /// [`GraphRead`]: hashql_mir::body::terminator::GraphRead + pub fn compile_graph_read(&mut self, read: &'ctx GraphRead<'heap>) -> PreparedQuery<'heap, A> + where + A: Clone, + { + match read.head { + GraphReadHead::Entity { .. } => self.compile_graph_read_entity(read), + } + } + + #[expect(unsafe_code)] + pub fn compile(&mut self) -> PreparedQueries<'heap, A> + where + A: Clone, + { + // SAFETY: 0 is a valid value for `usize` + let offsets = unsafe { + Box::new_zeroed_slice_in(self.context.bodies.len() + 1, self.alloc.clone()) + .assume_init() + }; + let mut offsets = DefIdSlice::from_boxed_slice(offsets); + + let mut queries = Vec::with_capacity_in(self.context.bodies.len(), self.alloc.clone()); + + let bodies = self.context.bodies; + for (body_id, body) in bodies.iter_enumerated() { + for (block_id, block) in body.basic_blocks.iter_enumerated() { + match &block.terminator.kind { + TerminatorKind::GraphRead(read) => { + let query = self.compile_graph_read(read); + queries.push((block_id, query)); + } + TerminatorKind::Goto(_) + | TerminatorKind::SwitchInt(_) + | TerminatorKind::Return(_) + | TerminatorKind::Unreachable => {} + } + } + + offsets[body_id.plus(1)] = queries.len(); + } + + PreparedQueries { offsets, queries } + } } diff --git a/libs/@local/hashql/eval/src/postgres/parameters.rs b/libs/@local/hashql/eval/src/postgres/parameters.rs index a85a7127db8..411a93544d9 100644 --- a/libs/@local/hashql/eval/src/postgres/parameters.rs +++ b/libs/@local/hashql/eval/src/postgres/parameters.rs @@ -11,14 +11,17 @@ use core::{ fmt::{self, Display}, }; -use hash_graph_postgres_store::store::postgres::query::Expression; +use hash_graph_postgres_store::store::postgres::query::{Expression, PostgresType}; use hashql_core::{ collections::{FastHashMap, fast_hash_map_in}, id::{self, Id as _, IdVec}, symbol::Symbol, value::Primitive, }; -use hashql_mir::{body::place::FieldIndex, def::DefId, interpret::value::Int}; +use hashql_mir::{ + body::{local::Local, place::FieldIndex}, + interpret::value::Int, +}; id::newtype!( /// Index of a SQL parameter in the compiled query, rendered as `$N` by the SQL formatter. @@ -38,12 +41,48 @@ impl From for Expression { } } +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum ParameterKind { + Value, + String, + Integer, + Boolean, + Number, + TimestampInterval, +} + +impl From for PostgresType { + fn from(value: ParameterKind) -> Self { + match value { + ParameterKind::Value => Self::JsonB, + ParameterKind::String => Self::Text, + ParameterKind::Integer => Self::BigInt, + ParameterKind::Boolean => Self::Boolean, + ParameterKind::Number => Self::Numeric, + ParameterKind::TimestampInterval => Self::TimestampTzRange, + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct Parameter { + pub index: ParameterIndex, + pub kind: ParameterKind, +} + +impl Parameter { + #[must_use] + pub fn to_expr(self) -> Expression { + Expression::Cast(Box::new(self.index.into()), PostgresType::from(self.kind)) + } +} + /// Interned identity for a SQL parameter. /// /// Parameters are deduplicated by this key so multiple occurrences of the same logical value /// (e.g. the same input symbol) share one `$N` placeholder. #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -enum Parameter<'heap> { +pub enum ParameterValue<'heap> { /// A user-provided input binding. Input(Symbol<'heap>), /// An integer constant that does not fit in a `u32`. @@ -53,7 +92,7 @@ enum Parameter<'heap> { /// A symbol used as a JSON object key in SQL expressions. Symbol(Symbol<'heap>), /// A captured-environment field access. - Env(DefId, FieldIndex), + Env(Local, FieldIndex), /// Temporal axis range provided by the interpreter at execution time. /// /// The interpreter binds these based on the user's temporal axes configuration: @@ -62,14 +101,31 @@ enum Parameter<'heap> { TemporalAxis(TemporalAxis), } -impl fmt::Display for Parameter<'_> { +impl ParameterValue<'_> { + #[must_use] + pub const fn kind(&self) -> ParameterKind { + match self { + Self::Int(int) if int.is_bool() => ParameterKind::Boolean, + Self::Int(_) | Self::Primitive(Primitive::Integer(_)) => ParameterKind::Integer, + Self::Primitive(Primitive::Boolean(_)) => ParameterKind::Boolean, + Self::Primitive(Primitive::Float(_)) => ParameterKind::Number, + Self::Primitive(Primitive::String(_)) | Self::Symbol(_) => ParameterKind::String, + Self::Input(_) | Self::Primitive(Primitive::Null) | Self::Env(_, _) => { + ParameterKind::Value + } + Self::TemporalAxis(_) => ParameterKind::TimestampInterval, + } + } +} + +impl fmt::Display for ParameterValue<'_> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Input(symbol) => write!(fmt, "Input({symbol})"), Self::Int(int) => write!(fmt, "Int({int})"), Self::Primitive(primitive) => write!(fmt, "Primitive({primitive})"), Self::Symbol(symbol) => write!(fmt, "Symbol({symbol})"), - Self::Env(def, field) => write!(fmt, "Env({def}, #{})", field.as_u32()), + Self::Env(local, field) => write!(fmt, "Env({local}, #{})", field.as_u32()), Self::TemporalAxis(axis) => write!(fmt, "TemporalAxis({axis})"), } } @@ -100,8 +156,8 @@ impl fmt::Display for TemporalAxis { /// /// The interpreter uses the reverse mapping to bind runtime values in the correct order. pub struct Parameters<'heap, A: Allocator = Global> { - lookup: FastHashMap, ParameterIndex, A>, - reverse: IdVec, A>, + lookup: FastHashMap, ParameterIndex, A>, + reverse: IdVec, A>, } impl<'heap, A: Allocator> Parameters<'heap, A> { @@ -115,36 +171,39 @@ impl<'heap, A: Allocator> Parameters<'heap, A> { } } - fn get_or_insert(&mut self, param: Parameter<'heap>) -> ParameterIndex { - *self + fn get_or_insert(&mut self, param: ParameterValue<'heap>) -> Parameter { + let kind = param.kind(); + let index = *self .lookup .entry(param) - .or_insert_with(|| self.reverse.push(param)) + .or_insert_with(|| self.reverse.push(param)); + + Parameter { index, kind } } - pub(crate) fn input(&mut self, name: Symbol<'heap>) -> ParameterIndex { - self.get_or_insert(Parameter::Input(name)) + pub(crate) fn input(&mut self, name: Symbol<'heap>) -> Parameter { + self.get_or_insert(ParameterValue::Input(name)) } /// Allocates a parameter for a symbol used as a JSON object key in SQL expressions. - pub(crate) fn symbol(&mut self, name: Symbol<'heap>) -> ParameterIndex { - self.get_or_insert(Parameter::Symbol(name)) + pub(crate) fn symbol(&mut self, name: Symbol<'heap>) -> Parameter { + self.get_or_insert(ParameterValue::Symbol(name)) } - pub(crate) fn int(&mut self, value: Int) -> ParameterIndex { - self.get_or_insert(Parameter::Int(value)) + pub(crate) fn int(&mut self, value: Int) -> Parameter { + self.get_or_insert(ParameterValue::Int(value)) } - pub(crate) fn primitive(&mut self, primitive: Primitive<'heap>) -> ParameterIndex { - self.get_or_insert(Parameter::Primitive(primitive)) + pub(crate) fn primitive(&mut self, primitive: Primitive<'heap>) -> Parameter { + self.get_or_insert(ParameterValue::Primitive(primitive)) } - pub(crate) fn env(&mut self, body: DefId, field: FieldIndex) -> ParameterIndex { - self.get_or_insert(Parameter::Env(body, field)) + pub(crate) fn env(&mut self, local: Local, field: FieldIndex) -> Parameter { + self.get_or_insert(ParameterValue::Env(local, field)) } - pub(crate) fn temporal_axis(&mut self, axis: TemporalAxis) -> ParameterIndex { - self.get_or_insert(Parameter::TemporalAxis(axis)) + pub(crate) fn temporal_axis(&mut self, axis: TemporalAxis) -> Parameter { + self.get_or_insert(ParameterValue::TemporalAxis(axis)) } /// Returns the number of distinct parameters allocated so far. @@ -156,6 +215,24 @@ impl<'heap, A: Allocator> Parameters<'heap, A> { pub fn is_empty(&self) -> bool { self.reverse.is_empty() } + + pub fn iter( + &self, + ) -> impl ExactSizeIterator> + DoubleEndedIterator { + self.reverse.iter() + } +} + +impl<'this, 'heap> IntoIterator for &'this Parameters<'heap> { + type Item = &'this ParameterValue<'heap>; + + type IntoIter = + impl ExactSizeIterator> + DoubleEndedIterator; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.iter() + } } impl fmt::Display for Parameters<'_, A> { @@ -181,7 +258,10 @@ mod tests { id::Id as _, value::{Primitive, String}, }; - use hashql_mir::{body::place::FieldIndex, def::DefId, interpret::value::Int}; + use hashql_mir::{ + body::{local::Local, place::FieldIndex}, + interpret::value::Int, + }; use super::{Parameters, TemporalAxis}; @@ -247,8 +327,8 @@ mod tests { #[test] fn env_dedup() { let mut params = Parameters::new_in(Global); - let a = params.env(DefId::MIN, FieldIndex::new(0)); - let b = params.env(DefId::MIN, FieldIndex::new(0)); + let a = params.env(Local::MIN, FieldIndex::new(0)); + let b = params.env(Local::MIN, FieldIndex::new(0)); assert_eq!(a, b); assert_eq!(params.len(), 1); diff --git a/libs/@local/hashql/eval/src/postgres/projections.rs b/libs/@local/hashql/eval/src/postgres/projections.rs index aa694d81fc2..670c6066d07 100644 --- a/libs/@local/hashql/eval/src/postgres/projections.rs +++ b/libs/@local/hashql/eval/src/postgres/projections.rs @@ -6,7 +6,8 @@ use core::alloc::Allocator; use hash_graph_postgres_store::store::postgres::query::{ self, Alias, Column, ColumnName, ColumnReference, ForeignKeyReference, FromItem, Identifier, - JoinType, SelectExpression, SelectStatement, Table, TableName, TableReference, table, + JoinType, PostgresType, SelectExpression, SelectStatement, Table, TableName, TableReference, + table, }; use hashql_core::symbol::sym; @@ -273,7 +274,8 @@ impl Projections { query::Expression::ColumnReference(ColumnReference { correlation: Some(eit_ref), name: Column::EntityIsOfTypeIds(table::EntityIsOfTypeIds::Versions).into(), - }), + }) + .cast(PostgresType::Array(Box::new(PostgresType::Text))), ]), with_ordinality: false, alias: Some(TableReference { @@ -309,14 +311,14 @@ impl Projections { expression: query::Expression::Function(query::Function::JsonAgg(Box::new( query::Expression::Function(query::Function::JsonBuildObject(vec![ ( - parameters.symbol(sym::base_url).into(), + parameters.symbol(sym::base_url).to_expr(), query::Expression::ColumnReference(ColumnReference { correlation: None, name: ColumnName::from(Identifier::from("b")), }), ), ( - parameters.symbol(sym::version).into(), + parameters.symbol(sym::version).to_expr(), query::Expression::ColumnReference(ColumnReference { correlation: None, name: ColumnName::from(Identifier::from("v")), diff --git a/libs/@local/hashql/eval/src/postgres/traverse.rs b/libs/@local/hashql/eval/src/postgres/traverse.rs index 460b0af6235..0d201729af1 100644 --- a/libs/@local/hashql/eval/src/postgres/traverse.rs +++ b/libs/@local/hashql/eval/src/postgres/traverse.rs @@ -7,13 +7,60 @@ use core::alloc::Allocator; use hash_graph_postgres_store::store::postgres::query::{ - self, Column, ColumnReference, Expression, table, + self, Column, ColumnReference, Constant, Expression, table, }; use hashql_core::symbol::sym; use hashql_mir::pass::execution::traversal::EntityPath; use super::DatabaseContext; +/// Decomposes a `tstzrange` into a `LeftClosedTemporalInterval` JSONB representation. +/// +/// A `LeftClosedTemporalInterval` has: +/// - `start`: always `InclusiveTemporalBound` (just the epoch-ms integer) +/// - `end`: `ExclusiveTemporalBound` (epoch-ms integer) or `UnboundedTemporalBound` (`null`) +/// +/// Produces: +/// ```sql +/// jsonb_build_object( +/// 'start', (extract(epoch from lower(range)) * 1000)::int8, +/// 'end', CASE WHEN upper_inf(range) THEN NULL +/// ELSE (extract(epoch from upper(range)) * 1000)::int8 +/// END +/// ) +/// ``` +/// +/// The epoch values are milliseconds since Unix epoch, matching the HashQL +/// `Timestamp` representation. The start bound needs no conditional because +/// `LeftClosedTemporalInterval` guarantees it is always inclusive. The end +/// bound uses `upper_inf` to distinguish `ExclusiveTemporalBound` (finite) +/// from `UnboundedTemporalBound` (infinite). +fn eval_tstzrange_as_left_closed_interval( + db: &mut DatabaseContext<'_, A>, + range: Expression, +) -> Expression { + let lower = Expression::Function(query::Function::Lower(Box::new(range.clone()))); + let upper = Expression::Function(query::Function::Upper(Box::new(range.clone()))); + let upper_inf = Expression::Function(query::Function::UpperInf(Box::new(range))); + + let start_ms = Expression::Function(query::Function::ExtractEpochMs(Box::new(lower))); + + // end: NULL for unbounded, epoch-ms for exclusive + let upper_ms = Expression::Function(query::Function::ExtractEpochMs(Box::new(upper))); + let end_bound = Expression::CaseWhen { + conditions: vec![(upper_inf, Expression::Constant(Constant::Null))], + else_result: Some(Box::new(upper_ms)), + }; + + let start_key = db.parameters.symbol(sym::start).to_expr(); + let end_key = db.parameters.symbol(sym::end).to_expr(); + + Expression::Function(query::Function::JsonBuildObject(vec![ + (start_key, start_ms), + (end_key, end_bound), + ])) +} + /// Lowers an [`EntityPath`] to a SQL [`Expression`], requesting joins and allocating parameters /// as needed. /// @@ -35,25 +82,25 @@ pub(crate) fn eval_entity_path( ), EntityPath::RecordId => Expression::Function(query::Function::JsonBuildObject(vec![ ( - db.parameters.symbol(sym::entity_id).into(), + db.parameters.symbol(sym::entity_id).to_expr(), eval_entity_path(db, EntityPath::EntityId), ), ( - db.parameters.symbol(sym::draft_id).into(), - eval_entity_path(db, EntityPath::DraftId), + db.parameters.symbol(sym::edition_id).to_expr(), + eval_entity_path(db, EntityPath::EditionId), ), ])), EntityPath::EntityId => Expression::Function(query::Function::JsonBuildObject(vec![ ( - db.parameters.symbol(sym::web_id).into(), + db.parameters.symbol(sym::web_id).to_expr(), eval_entity_path(db, EntityPath::WebId), ), ( - db.parameters.symbol(sym::entity_uuid).into(), + db.parameters.symbol(sym::entity_uuid).to_expr(), eval_entity_path(db, EntityPath::EntityUuid), ), ( - db.parameters.symbol(sym::draft_id).into(), + db.parameters.symbol(sym::draft_id).to_expr(), eval_entity_path(db, EntityPath::DraftId), ), ])), @@ -76,25 +123,35 @@ pub(crate) fn eval_entity_path( EntityPath::TemporalVersioning => { Expression::Function(query::Function::JsonBuildObject(vec![ ( - db.parameters.symbol(sym::decision_time).into(), + db.parameters.symbol(sym::decision_time).to_expr(), eval_entity_path(db, EntityPath::DecisionTime), ), ( - db.parameters.symbol(sym::transaction_time).into(), + db.parameters.symbol(sym::transaction_time).to_expr(), eval_entity_path(db, EntityPath::TransactionTime), ), ])) } - EntityPath::DecisionTime => Expression::ColumnReference(ColumnReference { - correlation: Some(db.projections.temporal_metadata()), - name: Column::EntityTemporalMetadata(table::EntityTemporalMetadata::DecisionTime) - .into(), - }), - EntityPath::TransactionTime => Expression::ColumnReference(ColumnReference { - correlation: Some(db.projections.temporal_metadata()), - name: Column::EntityTemporalMetadata(table::EntityTemporalMetadata::TransactionTime) + EntityPath::DecisionTime => { + let range = Expression::ColumnReference(ColumnReference { + correlation: Some(db.projections.temporal_metadata()), + name: Column::EntityTemporalMetadata(table::EntityTemporalMetadata::DecisionTime) + .into(), + }); + + eval_tstzrange_as_left_closed_interval(db, range) + } + EntityPath::TransactionTime => { + let range = Expression::ColumnReference(ColumnReference { + correlation: Some(db.projections.temporal_metadata()), + name: Column::EntityTemporalMetadata( + table::EntityTemporalMetadata::TransactionTime, + ) .into(), - }), + }); + + eval_tstzrange_as_left_closed_interval(db, range) + } EntityPath::EntityTypeIds => Expression::ColumnReference(db.projections.entity_type_ids()), EntityPath::Archived => Expression::ColumnReference(ColumnReference { correlation: Some(db.projections.entity_editions()), diff --git a/libs/@local/hashql/eval/src/postgres/types.rs b/libs/@local/hashql/eval/src/postgres/types.rs new file mode 100644 index 00000000000..e1dbdf91d99 --- /dev/null +++ b/libs/@local/hashql/eval/src/postgres/types.rs @@ -0,0 +1,118 @@ +use core::ops::ControlFlow; + +use hashql_core::{ + debug_panic, + symbol::Symbol, + r#type::{ + TypeId, + environment::Environment, + kind::{Apply, Generic, OpaqueType, TypeKind}, + }, +}; + +/// Recursively navigates a type structure following a sequence of struct field names. +/// +/// Returns `Continue(Some(id))` when the path resolves to a concrete type, +/// `Continue(None)` when the current branch has no match (e.g. a union variant +/// without the field), or `Break(())` when union variants disagree on the resolved type. +fn traverse_struct_impl( + env: &Environment<'_>, + vertex: TypeId, + fields: &[Symbol<'_>], + depth: usize, +) -> ControlFlow<(), Option> { + let r#type = env.r#type(vertex); + + // We don't need a sophisticated cycle detection algorithm here, the only reason a cycle could + // occur here is if apply and generic substitutions are the only members in a cycle, haven't + // been resolved and simplified away. Which should've created a type error earlier anyway. + if depth > 32 { + debug_panic!("maximum opaque type recursion depth exceeded"); + + return ControlFlow::Continue(None); + } + + match r#type.kind { + &TypeKind::Generic(Generic { base, arguments: _ }) + | &TypeKind::Apply(Apply { + base, + substitutions: _, + }) => traverse_struct_impl(env, base, fields, depth + 1), + TypeKind::Union(union_type) => { + let mut value = None; + + for &variant in union_type.variants { + let variant_value = traverse_struct_impl(env, variant, fields, depth + 1)?; + + match (value, variant_value) { + (None, _) => value = variant_value, + (Some(existing), Some(variant)) => { + if existing != variant { + debug_panic!( + "union variant mismatch: existing={:?} variant={:?}", + existing, + variant + ); + + return ControlFlow::Break(()); + } + } + (Some(_), None) => {} + } + } + + ControlFlow::Continue(value) + } + + TypeKind::Struct(r#struct) => { + if let [name, rest @ ..] = fields { + let field = r#struct.fields.iter().find(|field| field.name == *name); + + field.map_or(ControlFlow::Continue(None), |field| { + traverse_struct_impl(env, field.value, rest, depth + 1) + }) + } else { + // field is empty + ControlFlow::Continue(Some(vertex)) + } + } + + &TypeKind::Opaque(OpaqueType { + name: _, + repr: base, + }) if !fields.is_empty() => traverse_struct_impl(env, base, fields, depth + 1), + + // We cannot traverse into intersection types, because we don't know which variant to + // choose. + TypeKind::Opaque(_) + | TypeKind::Intersection(_) + | TypeKind::Primitive(_) + | TypeKind::Intrinsic(_) + | TypeKind::Tuple(_) + | TypeKind::Closure(_) + | TypeKind::Param(_) + | TypeKind::Infer(_) + | TypeKind::Never + | TypeKind::Unknown => ControlFlow::Continue(fields.is_empty().then_some(vertex)), + } +} + +/// Resolves a sequence of struct field names within a type, returning the final field's +/// [`TypeId`]. +/// +/// For unions, all variants must agree on the resolved type — returns [`None`] if they +/// disagree. When `fields` is empty, returns the type as-is (preserving opaque wrappers). +pub(crate) fn traverse_struct( + env: &Environment<'_>, + vertex: TypeId, + fields: &[Symbol<'_>], +) -> Option { + match traverse_struct_impl(env, vertex, fields, 0) { + ControlFlow::Continue(value) => value, + ControlFlow::Break(()) => { + debug_panic!("traverse_struct_impl broke without a value"); + + None + } + } +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/directives.rs b/libs/@local/hashql/eval/tests/orchestrator/directives.rs new file mode 100644 index 00000000000..50a660247eb --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/directives.rs @@ -0,0 +1,136 @@ +/// Parsed temporal interval from an `//@ axis` directive. +#[derive(Debug, Clone)] +pub(crate) struct AxisInterval { + pub start: AxisBound, + pub end: AxisBound, +} + +/// A single bound in an axis interval. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum AxisBound { + Unbounded, + Included(i128), + Excluded(i128), +} + +/// Parsed axis directives from a test file. +#[derive(Debug, Default)] +pub(crate) struct AxisDirectives { + pub decision: Option, + pub transaction: Option, +} + +/// Parses `//@ axis[decision]` and `//@ axis[transaction]` directives from +/// the source text. +/// +/// Supported interval syntax: +/// - `(T)` : point interval (pinned) +/// - `[a, b)` / `(a, b]` / `[a, b]` / `(a, b)` : range with bounds +/// - `(, b]` / `(, b)` : unbounded start +/// - `[a,)` / `(a,)` : unbounded end +pub(crate) fn parse_directives(source: &str) -> AxisDirectives { + let mut directives = AxisDirectives::default(); + + for line in source.lines() { + let trimmed = line.trim(); + + let Some(rest) = trimmed.strip_prefix("//@") else { + // Stop scanning once we hit a non-directive, non-comment line. + if !trimmed.is_empty() && !trimmed.starts_with("//") { + break; + } + continue; + }; + + let rest = rest.trim(); + + let Some(rest) = rest.strip_prefix("axis[") else { + continue; + }; + + let (axis_name, rest) = rest + .split_once(']') + .expect("malformed axis directive: missing ]"); + + let rest = rest.trim(); + let rest = rest + .strip_prefix('=') + .expect("malformed axis directive: missing ="); + let rest = rest.trim(); + + let interval = parse_interval(rest); + + match axis_name { + "decision" => directives.decision = Some(interval), + "transaction" => directives.transaction = Some(interval), + other => panic!("unknown axis name: {other}"), + } + } + + directives +} + +/// Parses an interval expression like `(T)`, `[a, b)`, `(, b]`, etc. +fn parse_interval(input: &str) -> AxisInterval { + let input = input.trim(); + + let first = input.bytes().next().expect("empty interval expression"); + let start_inclusive = match first { + b'[' => true, + b'(' => false, + other => panic!("unexpected interval start: {}", other as char), + }; + + let last = input + .bytes() + .next_back() + .expect("empty interval expression"); + let end_inclusive = match last { + b']' => true, + b')' => false, + other => panic!("unexpected interval end: {}", other as char), + }; + + // Safe to slice at 1 and len-1: brackets are single-byte ASCII. + let inner = input.get(1..input.len() - 1).expect("interval too short"); + + // Point interval: (T) — bracket style is irrelevant, always [T, T]. + if !inner.contains(',') { + let timestamp = inner + .trim() + .parse::() + .expect("could not parse point interval timestamp"); + + return AxisInterval { + start: AxisBound::Included(timestamp), + end: AxisBound::Included(timestamp), + }; + } + + // Range interval: split on comma. + let (start_str, end_str) = inner + .split_once(',') + .expect("interval must contain a comma"); + + let start = parse_bound(start_str.trim(), start_inclusive); + let end = parse_bound(end_str.trim(), end_inclusive); + + AxisInterval { start, end } +} + +/// Parses a single bound value. Empty string means unbounded. +fn parse_bound(value: &str, inclusive: bool) -> AxisBound { + if value.is_empty() { + return AxisBound::Unbounded; + } + + let timestamp = value + .parse::() + .unwrap_or_else(|error| panic!("could not parse bound {value:?}: {error}")); + + if inclusive { + AxisBound::Included(timestamp) + } else { + AxisBound::Excluded(timestamp) + } +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/discover.rs b/libs/@local/hashql/eval/tests/orchestrator/discover.rs new file mode 100644 index 00000000000..4bafc4962aa --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/discover.rs @@ -0,0 +1,109 @@ +use std::path::{Path, PathBuf}; + +use hashql_compiletest::pipeline::Pipeline; +use hashql_mir::{ + body::Body, + def::{DefId, DefIdVec}, + intern::Interner, +}; + +/// Signature for programmatic test builders. +/// +/// Each builder receives the pipeline (providing the heap and the shared type +/// environment) and returns the MIR components needed for execution: an +/// interner, the entry definition, and the body set. Inputs are constructed +/// by the test runner from seeded entity data, not by the builder. +pub(crate) type ProgrammaticBuilder = + for<'heap> fn(&Pipeline<'heap>) -> (Interner<'heap>, DefId, DefIdVec>); + +/// A discovered test case, either from a `.jsonc` file or a programmatic +/// registration. +pub(crate) struct TestCase { + /// Display name used by libtest-mimic (and nextest filtering). + pub name: String, + /// Source of the test. + pub source: TestSource, + /// Path to the expected output file (`.stdout`). + pub expected_output: PathBuf, +} + +pub(crate) enum TestSource { + /// Full-pipeline test from a J-Expr file. + JExpr { path: PathBuf }, + /// Programmatic test with a MIR builder function. + Programmatic { builder: ProgrammaticBuilder }, +} + +/// Scans `base_dir/jsonc/` for `.jsonc` files and returns a `TestCase` for +/// each one. The test name is derived from the file stem. +pub(crate) fn discover_jexpr_tests(base_dir: &Path) -> Vec { + let jsonc_dir = base_dir.join("jsonc"); + + if !jsonc_dir.is_dir() { + return Vec::new(); + } + + let mut entries: Vec<_> = std::fs::read_dir(&jsonc_dir) + .expect("could not read jsonc test directory") + .filter_map(|entry| { + let entry = entry.expect("could not read directory entry"); + let path = entry.path(); + + path.extension() + .is_some_and(|ext| ext == "jsonc") + .then_some(path) + }) + .collect(); + + entries.sort(); + + entries + .into_iter() + .map(|path| { + let name = path + .file_stem() + .expect("jsonc file has no stem") + .to_str() + .expect("non-UTF-8 file name") + .to_owned(); + + let expected_output = path.with_extension("stdout"); + + TestCase { + name: format!("jsonc::{name}"), + source: TestSource::JExpr { path }, + expected_output, + } + }) + .collect() +} + +/// Registers programmatic tests from a list of `(name, builder)` pairs. +/// The expected output files live in `base_dir/programmatic/.stdout`. +pub(crate) fn discover_programmatic_tests( + base_dir: &Path, + registry: &[(&str, ProgrammaticBuilder)], +) -> Vec { + let programmatic_dir = base_dir.join("programmatic"); + + registry + .iter() + .map(|&(name, builder)| { + let expected_output = programmatic_dir.join(format!("{name}.stdout")); + + TestCase { + name: format!("programmatic::{name}"), + source: TestSource::Programmatic { builder }, + expected_output, + } + }) + .collect() +} + +/// Returns the base directory for orchestrator UI tests. +pub(crate) fn test_ui_dir() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("ui") + .join("orchestrator") +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/error.rs b/libs/@local/hashql/eval/tests/orchestrator/error.rs new file mode 100644 index 00000000000..66967795993 --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/error.rs @@ -0,0 +1,46 @@ +use core::{error::Error, fmt}; + +/// Errors during test infrastructure setup: starting the container, +/// connecting to the database, running migrations, or seeding data. +#[derive(Debug)] +pub(crate) enum SetupError { + Container, + Connection, + Migration, + Seed, +} + +impl fmt::Display for SetupError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Container => f.write_str("failed to start test container"), + Self::Connection => f.write_str("failed to connect to database"), + Self::Migration => f.write_str("failed to run database migrations"), + Self::Seed => f.write_str("failed to seed test data"), + } + } +} + +impl Error for SetupError {} + +/// Errors during individual test execution. +#[derive(Debug)] +pub(crate) enum TestError { + ReadSource, + Execution, + Serialization, + OutputMismatch, +} + +impl fmt::Display for TestError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ReadSource => f.write_str("failed to read test source file"), + Self::Execution => f.write_str("query execution failed"), + Self::Serialization => f.write_str("failed to serialize result value"), + Self::OutputMismatch => f.write_str("output comparison failed"), + } + } +} + +impl Error for TestError {} diff --git a/libs/@local/hashql/eval/tests/orchestrator/execution.rs b/libs/@local/hashql/eval/tests/orchestrator/execution.rs new file mode 100644 index 00000000000..c26eff6549a --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/execution.rs @@ -0,0 +1,152 @@ +use alloc::alloc::Global; +use core::mem; + +use hashql_compiletest::pipeline::Pipeline; +use hashql_core::{heap::ResetAllocator as _, span::SpanId}; +use hashql_diagnostics::{Diagnostic, diagnostic::BoxedDiagnostic}; +use hashql_eval::{ + context::EvalContext, + orchestrator::{AppendEventLog, Event, Orchestrator}, + postgres::PostgresCompiler, +}; +use hashql_mir::{ + body::Body, + def::{DefId, DefIdSlice, DefIdVec}, + intern::Interner, + interpret::{Inputs, value::Value}, +}; +use tokio::runtime; +use tokio_postgres::Client; + +/// Intermediate state after parsing and lowering a J-Expr query. +/// +/// Holds the MIR artifacts needed to build typed inputs (via the decoder +/// and the environment) before proceeding to execution. +pub(crate) struct Lowered<'heap> { + pub interner: Interner<'heap>, + pub entry: DefId, + pub bodies: DefIdVec>, +} + +/// Parses and lowers J-Expr source, returning MIR artifacts. +/// +/// After this call the pipeline's environment contains all types referenced +/// by the query, so the caller can use the decoder to construct typed input +/// values before calling [`run`]. +/// +/// # Errors +/// +/// Returns a diagnostic on parse or lowering failure. +pub(crate) fn lower<'heap>( + pipeline: &mut Pipeline<'heap>, + bytes: impl AsRef<[u8]>, +) -> Result, BoxedDiagnostic<'static, SpanId>> { + let ast = pipeline.parse(bytes)?; + let (interner, entry, bodies) = pipeline.lower(ast)?; + + Ok(Lowered { + interner, + entry, + bodies, + }) +} + +/// Transforms, analyzes, and executes a lowered query. +/// +/// The caller provides pre-built inputs (constructed after lowering so that +/// the type environment is available for decoding). +/// +/// # Errors +/// +/// Returns a diagnostic on transform, analysis, or execution failure. +pub(crate) fn run<'heap>( + pipeline: &mut Pipeline<'heap>, + + runtime: &runtime::Runtime, + client: &Client, + + inputs: &Inputs<'heap, Global>, + + lowered: &mut Lowered<'heap>, +) -> Result<(Value<'heap, Global>, Vec), BoxedDiagnostic<'static, SpanId>> { + run_impl( + pipeline, + runtime, + client, + inputs, + &lowered.interner, + lowered.entry, + &mut lowered.bodies, + ) +} + +/// Executes a pre-built MIR program. +/// +/// Used by programmatic tests that construct bodies directly via the `body!` +/// macro instead of parsing J-Expr source. +/// +/// # Errors +/// +/// Returns a diagnostic on transform, analysis, or execution failure. +pub(crate) fn execute<'heap>( + pipeline: &mut Pipeline<'heap>, + + runtime: &runtime::Runtime, + client: &Client, + + inputs: &Inputs<'heap, Global>, + + interner: &Interner<'heap>, + entry: DefId, + bodies: &mut DefIdSlice>, +) -> Result<(Value<'heap, Global>, Vec), BoxedDiagnostic<'static, SpanId>> { + run_impl(pipeline, runtime, client, inputs, interner, entry, bodies) +} + +struct PostgresClient<'client>(&'client Client); +impl AsRef for PostgresClient<'_> { + fn as_ref(&self) -> &Client { + self.0 + } +} + +fn run_impl<'heap>( + pipeline: &mut Pipeline<'heap>, + + runtime: &runtime::Runtime, + client: &Client, + + inputs: &Inputs<'heap, Global>, + + interner: &Interner<'heap>, + entry: DefId, + bodies: &mut DefIdSlice>, +) -> Result<(Value<'heap, Global>, Vec), BoxedDiagnostic<'static, SpanId>> { + pipeline.transform(interner, bodies)?; + let analysis = pipeline.prepare(interner, bodies)?; + + let mut context = EvalContext::new_in( + &pipeline.env, + interner, + bodies, + &analysis, + pipeline.heap, + &mut pipeline.scratch, + ); + let mut postgres = PostgresCompiler::new_in(&mut context, &mut pipeline.scratch); + let queries = postgres.compile(); + pipeline.scratch.reset(); + + let diagnostics = mem::take(&mut context.diagnostics); + pipeline.diagnostics.append(&mut diagnostics.boxed()); + + let event_log = AppendEventLog::new(); + let orchestrator = + Orchestrator::new(PostgresClient(client), &queries, &context).with_event_log(&event_log); + + let value = runtime + .block_on(orchestrator.run(inputs, entry, [])) + .map_err(Diagnostic::boxed)?; + + Ok((value, event_log.take())) +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/inputs.rs b/libs/@local/hashql/eval/tests/orchestrator/inputs.rs new file mode 100644 index 00000000000..d2276c909c8 --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/inputs.rs @@ -0,0 +1,282 @@ +use alloc::alloc::Global; + +use hashql_compiletest::pipeline::Pipeline; +use hashql_core::{ + heap::Heap, + module::std_lib::graph::types::{ + knowledge::entity, principal::actor_group::web::types as web_types, + }, + symbol::sym, + r#type::TypeBuilder, +}; +use hashql_eval::orchestrator::codec::{Decoder, JsonValueRef}; +use hashql_mir::{ + intern::Interner, + interpret::{ + Inputs, + value::{self, Value}, + }, +}; +use type_system::knowledge::entity::id::EntityUuid; + +use crate::{ + directives::{AxisBound, AxisDirectives, AxisInterval}, + seed::SeededEntities, +}; + +/// Constructs `Opaque(Timestamp, Integer(ms))`. +fn timestamp_value(ms: i128) -> Value<'static, Global> { + Value::Opaque(value::Opaque::new( + sym::path::Timestamp, + Value::Integer(value::Int::from(ms)), + )) +} + +/// Constructs `Opaque(UnboundedTemporalBound, Unit)`. +fn unbounded_bound() -> Value<'static, Global> { + Value::Opaque(value::Opaque::new( + sym::path::UnboundedTemporalBound, + Value::Unit, + )) +} + +/// Constructs `Opaque(ExclusiveTemporalBound, Timestamp(ms))`. +fn exclusive_bound(ms: i128) -> Value<'static, Global> { + Value::Opaque(value::Opaque::new( + sym::path::ExclusiveTemporalBound, + timestamp_value(ms), + )) +} + +/// Constructs `Opaque(Interval, {end: .., start: ..})`. +/// +/// Fields are sorted lexicographically (`end` before `start`). +fn interval_value<'heap>( + interner: &Interner<'heap>, + start: Value<'heap, Global>, + end: Value<'heap, Global>, +) -> Value<'heap, Global> { + // Fields sorted: "end" < "start" + let fields = interner.symbols.intern_slice(&[sym::end, sym::start]); + let values = vec![end, start]; + + Value::Opaque(value::Opaque::new( + sym::path::Interval, + Value::Struct(value::Struct::new(fields, values).expect("interval struct is valid")), + )) +} + +/// Converts an [`AxisInterval`] to a `Value` representing a temporal +/// interval: `Opaque(Interval, {start: , end: })`. +fn axis_interval_to_value<'heap>( + interner: &Interner<'heap>, + interval: &AxisInterval, +) -> Value<'heap, Global> { + let start = match interval.start { + AxisBound::Unbounded => unbounded_bound(), + AxisBound::Included(ms) => Value::Opaque(value::Opaque::new( + sym::path::InclusiveTemporalBound, + timestamp_value(ms), + )), + AxisBound::Excluded(ms) => exclusive_bound(ms), + }; + let end = match interval.end { + AxisBound::Unbounded => unbounded_bound(), + AxisBound::Included(ms) => Value::Opaque(value::Opaque::new( + sym::path::InclusiveTemporalBound, + timestamp_value(ms), + )), + AxisBound::Excluded(ms) => exclusive_bound(ms), + }; + interval_value(interner, start, end) +} + +/// Returns `true` if the interval is a point (both bounds are Included with +/// the same value). +fn is_point(interval: &AxisInterval) -> Option { + match (&interval.start, &interval.end) { + (AxisBound::Included(start), AxisBound::Included(end)) if start == end => Some(*start), + _ => None, + } +} + +/// Builds temporal axes from parsed directives. +/// +/// `QueryTemporalAxes` is a union of `PinnedTransactionTimeTemporalAxes` and +/// `PinnedDecisionTimeTemporalAxes`. Each has a `pinned` field (single +/// timestamp) and a `variable` field (range interval). The directive system +/// determines which axis is pinned (a point `(T)`) and which is variable +/// (a range `[a, b)` or defaulting to unbounded). +fn temporal_axes_from_directives<'heap>( + interner: &Interner<'heap>, + directives: &AxisDirectives, +) -> Value<'heap, Global> { + let far_future_ms: i128 = 4_102_444_800_000; // 2100-01-01T00:00:00Z + let default_variable = || AxisInterval { + start: AxisBound::Unbounded, + end: AxisBound::Excluded(far_future_ms), + }; + + // Determine which axis is pinned and which is variable. + // Default: pin transaction time, variable decision time. + let (pinned_axis, pinned_ms, variable_axis_name, variable_interval) = + match (&directives.decision, &directives.transaction) { + (None, None) => ( + sym::path::TransactionTime, + far_future_ms, + sym::path::DecisionTime, + default_variable(), + ), + (Some(decision), None) => { + let ms = + is_point(decision).expect("pinned decision axis must be a point interval (T)"); + ( + sym::path::DecisionTime, + ms, + sym::path::TransactionTime, + default_variable(), + ) + } + (None, Some(transaction)) => { + let ms = is_point(transaction) + .expect("pinned transaction axis must be a point interval (T)"); + ( + sym::path::TransactionTime, + ms, + sym::path::DecisionTime, + default_variable(), + ) + } + (Some(decision), Some(transaction)) => { + // One must be a point (pinned), the other a range (variable). + match (is_point(transaction), is_point(decision)) { + (Some(ms), _) => ( + sym::path::TransactionTime, + ms, + sym::path::DecisionTime, + decision.clone(), + ), + (_, Some(ms)) => ( + sym::path::DecisionTime, + ms, + sym::path::TransactionTime, + transaction.clone(), + ), + _ => panic!("when both axes are specified, one must be a point interval"), + } + } + }; + + let pinned = Value::Opaque(value::Opaque::new(pinned_axis, timestamp_value(pinned_ms))); + let variable = Value::Opaque(value::Opaque::new( + variable_axis_name, + axis_interval_to_value(interner, &variable_interval), + )); + + // "pinned" < "variable" lexicographically. + let fields = interner.symbols.intern_slice(&[sym::pinned, sym::variable]); + let values = vec![pinned, variable]; + + let wrapper_name = if pinned_axis == sym::path::TransactionTime { + sym::path::PinnedTransactionTimeTemporalAxes + } else { + sym::path::PinnedDecisionTimeTemporalAxes + }; + + Value::Opaque(value::Opaque::new( + wrapper_name, + Value::Struct(value::Struct::new(fields, values).expect("axes struct is valid")), + )) +} + +/// Builds the shared input set from seeded entity data and axis directives. +/// +/// Uses the decoder and the post-lowering type environment to construct +/// properly typed `Value`s for entity UUIDs and entity IDs. The input names +/// match what J-Expr test files reference via `["input", "", ""]`. +pub(crate) fn build_inputs<'heap>( + heap: &'heap Heap, + pipeline: &Pipeline<'heap>, + interner: &Interner<'heap>, + entities: &SeededEntities, + directives: &AxisDirectives, +) -> Inputs<'heap, Global> { + let mut inputs = Inputs::new(); + let decoder = Decoder::new(&pipeline.env, interner, Global); + let ty = TypeBuilder::synthetic(&pipeline.env); + let entity_uuid_type = entity::types::entity_uuid(&ty, None); + let entity_id_type = entity::types::entity_id(&ty, None); + + // Insert an EntityUuid-typed input. + let insert_uuid = |inputs: &mut Inputs<'heap, Global>, name: &str, uuid: &EntityUuid| { + let uuid_str = uuid.to_string(); + let value = decoder + .decode(entity_uuid_type, JsonValueRef::String(&uuid_str)) + .expect("could not decode EntityUuid input"); + + inputs.insert(heap.intern_symbol(name), value); + }; + + // Insert a full EntityId-typed input. + let insert_entity_id = + |inputs: &mut Inputs<'heap, Global>, + name: &str, + id: &type_system::knowledge::entity::EntityId| { + let json = serde_json::json!({ + "web_id": id.web_id.to_string(), + "entity_uuid": id.entity_uuid.to_string(), + "draft_id": id.draft_id.map(|draft| draft.to_string()), + }); + let value = decoder + .decode(entity_id_type, JsonValueRef::from(&json)) + .expect("could not decode EntityId input"); + + inputs.insert(heap.intern_symbol(name), value); + }; + + insert_uuid(&mut inputs, "alice_uuid", &entities.alice.entity_uuid); + insert_uuid(&mut inputs, "bob_uuid", &entities.bob.entity_uuid); + insert_uuid(&mut inputs, "org_uuid", &entities.organization.entity_uuid); + insert_uuid( + &mut inputs, + "friend_link_uuid", + &entities.friend_link.entity_uuid, + ); + insert_uuid( + &mut inputs, + "draft_alice_uuid", + &entities.draft_alice.entity_uuid, + ); + + insert_entity_id(&mut inputs, "alice_id", &entities.alice); + insert_entity_id(&mut inputs, "bob_id", &entities.bob); + insert_entity_id(&mut inputs, "org_id", &entities.organization); + insert_entity_id(&mut inputs, "friend_link_id", &entities.friend_link); + insert_entity_id(&mut inputs, "draft_alice_id", &entities.draft_alice); + + // WebId input (all seeded entities share the same web). + let web_id_type = web_types::web_id(&ty, None); + let web_id_value = decoder + .decode( + web_id_type, + JsonValueRef::String(&entities.alice.web_id.to_string()), + ) + .expect("could not decode WebId input"); + inputs.insert(heap.intern_symbol("web_id"), web_id_value); + + // String inputs for property-based filtering. + let string_type = ty.string(); + let alice_name = decoder + .decode(string_type, JsonValueRef::String("Alice")) + .expect("could not decode string input"); + inputs.insert(heap.intern_symbol("alice_name"), alice_name); + + // Temporal axes from directives (or default: unbounded decision time, + // far-future transaction pin). + inputs.insert( + heap.intern_symbol("temporal_axes"), + temporal_axes_from_directives(interner, directives), + ); + + inputs +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/main.rs b/libs/@local/hashql/eval/tests/orchestrator/main.rs new file mode 100644 index 00000000000..08301562a3c --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/main.rs @@ -0,0 +1,244 @@ +#![feature(allocator_api)] +extern crate alloc; + +use alloc::sync::Arc; + +use error_stack::{Report, ResultExt as _}; +use hash_graph_postgres_store::store::{AsClient as _, PostgresStore, PostgresStoreSettings}; +use hashql_compiletest::pipeline::Pipeline; +use hashql_core::heap::Heap; +use testcontainers::{ImageExt as _, ReuseDirective, runners::AsyncRunner as _}; +use testcontainers_modules::postgres::Postgres; +use tokio::runtime::{self, Runtime}; +use tokio_postgres::{Client, NoTls}; + +mod directives; +mod discover; +mod error; +mod execution; +mod inputs; +mod output; +mod programmatic; +mod seed; + +use self::{ + directives::{AxisDirectives, parse_directives}, + discover::{ + ProgrammaticBuilder, TestSource, discover_jexpr_tests, discover_programmatic_tests, + test_ui_dir, + }, + error::{SetupError, TestError}, + inputs::build_inputs, + output::{compare_or_bless, render_failure, render_success}, + seed::SeededEntities, +}; + +struct TestContext { + _container: testcontainers::ContainerAsync, + store: Arc>, + entities: SeededEntities, +} + +async fn setup() -> Result> { + let container = Postgres::default() + .with_user("hash") + .with_password("hash") + .with_db_name("hash") + .with_name("pgvector/pgvector") + .with_tag("0.8.2-pg18-trixie") + .with_reuse(ReuseDirective::CurrentSession) + .with_cmd([ + "postgres", + "-c", + "log_statement=all", + "-c", + "log_destination=stderr", + ]) + .start() + .await + .change_context(SetupError::Container)?; + + let host = container + .get_host() + .await + .change_context(SetupError::Container) + .attach("could not resolve container host")? + .to_string(); + let port = container + .get_host_port_ipv4(5432) + .await + .change_context(SetupError::Container) + .attach("could not resolve container port")?; + + let (client, connection) = tokio_postgres::Config::new() + .user("hash") + .password("hash") + .host(&host) + .port(port) + .dbname("hash") + .connect(NoTls) + .await + .change_context(SetupError::Connection)?; + tokio::spawn(connection); + + let mut store = PostgresStore::new(client, None, Arc::new(PostgresStoreSettings::default())); + let entities = seed::setup(&mut store).await?; + + Ok(TestContext { + _container: container, + store: Arc::new(store), + entities, + }) +} + +/// Runs a J-Expr test: parse, lower, build inputs, execute, compare output. +fn run_jexpr_test( + runtime: &Runtime, + context: &TestContext, + path: &std::path::Path, + expected_output: &std::path::Path, + bless: bool, +) -> Result<(), Report> { + let bytes = std::fs::read(path) + .change_context(TestError::ReadSource) + .attach_with(|| format!("{}", path.display()))?; + + let source = String::from_utf8_lossy(&bytes); + let axis_directives = parse_directives(&source); + let heap = Heap::new(); + let mut pipeline = Pipeline::new(&heap); + + // Lower first so the type environment is populated, then build inputs. + let mut lowered = match execution::lower(&mut pipeline, &bytes) { + Ok(lowered) => lowered, + Err(diagnostic) => { + let rendered = render_failure(&source, &pipeline, &diagnostic); + return Err(Report::new(TestError::Execution).attach(rendered)); + } + }; + + let inputs = build_inputs( + &heap, + &pipeline, + &lowered.interner, + &context.entities, + &axis_directives, + ); + + match execution::run( + &mut pipeline, + runtime, + context.store.as_client(), + &inputs, + &mut lowered, + ) { + Ok((value, events)) => { + let rendered = render_success(&source, &value, &events, &pipeline)?; + compare_or_bless(&rendered, expected_output, bless) + } + Err(diagnostic) => { + let rendered = render_failure(&source, &pipeline, &diagnostic); + Err(Report::new(TestError::Execution).attach(rendered)) + } + } +} + +/// Runs a programmatic test: build MIR directly, execute, compare output. +/// +/// Inputs are constructed from seeded entity data using the same +/// [`build_inputs`] helper as J-Expr tests. The programmatic builder +/// only constructs the MIR bodies; it references inputs via +/// `input.load!` statements. +fn run_programmatic_test( + runtime: &Runtime, + context: &TestContext, + builder: ProgrammaticBuilder, + expected_output: &std::path::Path, + bless: bool, +) -> Result<(), Report> { + let heap = Heap::new(); + let mut pipeline = Pipeline::new(&heap); + let (interner, entry, mut bodies) = builder(&pipeline); + + let inputs = build_inputs( + &heap, + &pipeline, + &interner, + &context.entities, + &AxisDirectives::default(), + ); + + // Programmatic tests have no J-Expr source, so diagnostics render + // without source context (all spans are synthetic). + let source = ""; + + match execution::execute( + &mut pipeline, + runtime, + context.store.as_client(), + &inputs, + &interner, + entry, + &mut bodies, + ) { + Ok((value, events)) => { + let rendered = render_success(source, &value, &events, &pipeline)?; + compare_or_bless(&rendered, expected_output, bless) + } + Err(diagnostic) => { + let rendered = render_failure(source, &pipeline, &diagnostic); + Err(Report::new(TestError::Execution).attach(rendered)) + } + } +} + +const PROGRAMMATIC_TESTS: &[(&str, ProgrammaticBuilder)] = &[ + ("property-access", programmatic::property_access), + ("property-arithmetic", programmatic::property_arithmetic), +]; + +fn main() -> Result<(), Report> { + let arguments = libtest_mimic::Arguments::from_args(); + let bless = std::env::args().any(|arg| arg == "--bless") || std::env::var("BLESS").is_ok(); + + let runtime = runtime::Builder::new_multi_thread() + .enable_all() + .build() + .change_context(SetupError::Container) + .attach("could not build tokio runtime")?; + let runtime = Arc::new(runtime); + + let context = runtime.block_on(setup())?; + let context = Arc::new(context); + + let ui_dir = test_ui_dir(); + let mut test_cases = discover_jexpr_tests(&ui_dir); + test_cases.extend(discover_programmatic_tests(&ui_dir, PROGRAMMATIC_TESTS)); + + let trials: Vec<_> = test_cases + .into_iter() + .map(|test_case| { + let context = Arc::clone(&context); + let runtime = Arc::clone(&runtime); + + libtest_mimic::Trial::test(&test_case.name, move || { + let result = match &test_case.source { + TestSource::JExpr { path } => { + run_jexpr_test(&runtime, &context, path, &test_case.expected_output, bless) + } + TestSource::Programmatic { builder } => run_programmatic_test( + &runtime, + &context, + *builder, + &test_case.expected_output, + bless, + ), + }; + + result.map_err(|report| format!("{report:?}").into()) + }) + }) + .collect(); + + libtest_mimic::run(&arguments, trials).exit(); +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/output.rs b/libs/@local/hashql/eval/tests/orchestrator/output.rs new file mode 100644 index 00000000000..0573c093acb --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/output.rs @@ -0,0 +1,217 @@ +use alloc::alloc::Global; +use std::{collections::HashMap, fs, path::Path, sync::LazyLock}; + +use error_stack::{Report, ResultExt as _}; +use hashql_compiletest::pipeline::Pipeline; +use hashql_core::span::SpanId; +use hashql_diagnostics::{ + Source, Sources, + diagnostic::{ + BoxedDiagnostic, + render::{ColorDepth, Format, RenderOptions}, + }, +}; +use hashql_eval::orchestrator::{Event, codec::Serde}; +use hashql_mir::interpret::value::Value; +use regex::Regex; +use similar_asserts::SimpleDiff; + +use crate::error::TestError; + +/// Renders a single diagnostic to a plain-text string using the pipeline's +/// span table for source resolution. +fn render_diagnostic( + source: &str, + pipeline: &Pipeline<'_>, + diagnostic: &BoxedDiagnostic<'_, SpanId>, +) -> String { + let mut sources = Sources::new(); + sources.push(Source::new(source)); + + let mut options = RenderOptions::new(Format::Ansi, &sources); + options.color_depth = ColorDepth::Monochrome; + + diagnostic.render(options, &mut &pipeline.spans) +} + +/// Renders accumulated warnings from the pipeline into a single string. +/// +/// Returns `None` if there are no warnings. +fn render_warnings(source: &str, pipeline: &Pipeline<'_>) -> Option { + if pipeline.diagnostics.is_empty() { + return None; + } + + let mut output = String::new(); + + for diagnostic in pipeline.diagnostics.iter() { + if !output.is_empty() { + output.push_str("\n\n"); + } + output.push_str(&render_diagnostic(source, pipeline, diagnostic)); + } + + Some(output) +} + +static UUID_RE: LazyLock = LazyLock::new(|| { + Regex::new("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") + .expect("UUID regex is valid") +}); + +static TIMESTAMP_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})") + .expect("timestamp regex is valid") +}); + +static EPOCH_MS_RE: LazyLock = LazyLock::new(|| { + Regex::new(r#"("(?:start|end)":\s*)\d{13}"#).expect("epoch millis regex is valid") +}); + +/// Replaces UUIDs with positional placeholders (``, ``, ...) +/// and ISO timestamps with ``. +/// +/// The same UUID always maps to the same placeholder within a single output, +/// preserving structural assertions (e.g. two fields referencing the same +/// entity get the same `` tag). +fn normalize(input: &str) -> String { + let mut uuid_map: HashMap = HashMap::new(); + + let after_uuids = UUID_RE.replace_all(input, |caps: ®ex::Captures<'_>| { + let uuid = caps[0].to_owned(); + let count = uuid_map.len(); + let index = *uuid_map.entry(uuid).or_insert(count); + format!("") + }); + + let after_timestamps = TIMESTAMP_RE.replace_all(&after_uuids, ""); + + EPOCH_MS_RE + .replace_all(&after_timestamps, "${1}") + .into_owned() +} + +/// Renders the complete test output: the JSON value followed by any warnings. +/// +/// The format is: +/// ```text +/// +/// --- +/// +/// +/// +/// ``` +/// +/// The `---` separator and warnings section only appear when warnings exist. +/// +/// # Errors +/// +/// Returns [`TestError::Serialization`] if the value cannot be serialized to +/// JSON. +pub(crate) fn render_success( + source: &str, + value: &Value<'_, Global>, + events: &[Event], + pipeline: &Pipeline<'_>, +) -> Result> { + let json = + serde_json::to_string_pretty(&Serde(value)).change_context(TestError::Serialization)?; + + let mut output = normalize(&json); + + if !events.is_empty() { + output.push_str("\n---\n"); + for event in events { + output.push_str(&event.to_string()); + output.push('\n'); + } + } + + if let Some(warnings) = render_warnings(source, pipeline) { + output.push_str("\n---\n"); + output.push_str(&warnings); + } + + Ok(output) +} + +/// Renders a compilation or execution failure as a test error message. +/// +/// Includes the rendered diagnostic and any accumulated warnings from +/// earlier pipeline stages. +pub(crate) fn render_failure( + source: &str, + pipeline: &Pipeline<'_>, + diagnostic: &BoxedDiagnostic<'_, SpanId>, +) -> String { + let mut output = render_diagnostic(source, pipeline, diagnostic); + + if let Some(warnings) = render_warnings(source, pipeline) { + output.push_str("\n\nalso emitted warnings:\n"); + output.push_str(&warnings); + } + + output +} + +/// Compares rendered output against the expected `.stdout` file. +/// +/// If `bless` is true, writes the actual output to the file instead of +/// comparing. +/// +/// # Errors +/// +/// Returns [`TestError::OutputMismatch`] when the actual output differs from +/// the expected content, with the diff attached to the report. +pub(crate) fn compare_or_bless( + actual: &str, + expected_path: &Path, + bless: bool, +) -> Result<(), Report> { + if bless { + if let Some(parent) = expected_path.parent() { + fs::create_dir_all(parent) + .change_context(TestError::OutputMismatch) + .attach_with(|| format!("could not create directory {}", parent.display()))?; + } + + fs::write(expected_path, actual) + .change_context(TestError::OutputMismatch) + .attach_with(|| { + format!( + "could not write blessed output to {}", + expected_path.display() + ) + })?; + + return Ok(()); + } + + let expected = match fs::read_to_string(expected_path) { + Ok(contents) => contents, + Err(error) if error.kind() == std::io::ErrorKind::NotFound => { + return Err(Report::new(TestError::OutputMismatch) + .attach(format!( + "expected output file {} does not exist; run with --bless to create it", + expected_path.display() + )) + .attach(format!("actual output:\n{actual}"))); + } + Err(error) => { + return Err(Report::new(error) + .change_context(TestError::OutputMismatch) + .attach(format!("could not read {}", expected_path.display()))); + } + }; + + if actual == expected { + return Ok(()); + } + + let diff = SimpleDiff::from_str(&expected, actual, "expected", "actual"); + + Err(Report::new(TestError::OutputMismatch).attach(format!( + "output mismatch for {}\n\n{diff}\n\nrun with --bless to update the expected output", + expected_path.display() + ))) +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/programmatic.rs b/libs/@local/hashql/eval/tests/orchestrator/programmatic.rs new file mode 100644 index 00000000000..0f5f2cc5050 --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/programmatic.rs @@ -0,0 +1,225 @@ +use hashql_compiletest::pipeline::Pipeline; +use hashql_core::{ + heap, + module::std_lib::graph::types::knowledge::entity, + r#type::{TypeBuilder, TypeId}, +}; +use hashql_hir::node::{HirId, operation::InputOp}; +use hashql_mir::{ + body::{ + Body, Source, + operand::Operand, + terminator::{GraphRead, GraphReadBody, GraphReadHead, GraphReadTail, TerminatorKind}, + }, + builder::BodyBuilder, + def::{DefId, DefIdVec}, + intern::Interner, + op, +}; + +/// 1.2: Filter entities where the "name" property equals the `alice_name` input. +/// +/// Constructs a graph read with a filter body that projects +/// `vertex.properties.` and compares against a string input. +/// This exercises the property hydration path, which requires the MIR builder +/// because property field names are URLs that the HIR cannot resolve. +pub(crate) fn property_access<'heap>( + pipeline: &Pipeline<'heap>, +) -> (Interner<'heap>, DefId, DefIdVec>) { + let heap = pipeline.heap; + let interner = Interner::new(heap); + let ty = TypeBuilder::synthetic(&pipeline.env); + + let unknown_ty = ty.unknown(); + let bool_ty = ty.boolean(); + let unit_ty = ty.tuple([] as [TypeId; 0]); + + // Entity: properties are un-narrowed. + let entity_ty = entity::types::entity(&ty, unknown_ty, None); + + let entry_id = DefId::new(0); + let filter_id = DefId::new(1); + + // Entry body: load temporal_axes, graph read with filter, return result. + let entry_body = { + let mut builder = BodyBuilder::new(&interner); + + let axis = builder.local("axis", unknown_ty); + let env_local = builder.local("env", unit_ty); + let graph_result = builder.local("graph_result", unknown_ty); + + let bb0 = builder.reserve_block([]); + let bb1 = builder.reserve_block([graph_result.local]); + + builder + .build_block(bb0) + .assign_place(axis, |rv| { + rv.input(InputOp::Load { required: true }, "temporal_axes") + }) + .assign_place(env_local, |rv| rv.tuple([] as [Operand<'_>; 0])) + .finish_with_terminator(TerminatorKind::GraphRead(GraphRead { + head: GraphReadHead::Entity { + axis: Operand::Place(axis), + }, + body: { + let mut body = heap::Vec::new_in(heap); + body.push(GraphReadBody::Filter(filter_id, env_local.local)); + body + }, + tail: GraphReadTail::Collect, + target: bb1, + })); + + builder.build_block(bb1).ret(graph_result); + + let mut body = builder.finish(0, unknown_ty); + body.id = entry_id; + body.source = Source::Closure(HirId::PLACEHOLDER, None); + body + }; + + // Filter body: fn(env: (), vertex: Entity) -> Bool + let filter_body = { + let mut builder = BodyBuilder::new(&interner); + + let _env = builder.local("env", unit_ty); + let vertex = builder.local("vertex", entity_ty); + let props = + builder.place(|place| place.from(vertex).field_by_name("properties", unknown_ty)); + let name_value = builder.place(|place| { + place.from(props).field_by_name( + "https://blockprotocol.org/@alice/types/property-type/name/", + unknown_ty, + ) + }); + let alice_name = builder.local("alice_name", unknown_ty); + let result = builder.local("result", bool_ty); + + let bb0 = builder.reserve_block([]); + + builder + .build_block(bb0) + .assign_place(alice_name, |rv| { + rv.input(InputOp::Load { required: true }, "alice_name") + }) + .assign_place(result, |rv| rv.binary(name_value, op![==], alice_name)) + .ret(result); + + let mut body = builder.finish(2, bool_ty); + body.id = filter_id; + body.source = Source::GraphReadFilter(HirId::PLACEHOLDER); + body + }; + + // Entry must be pushed first (DefId 0), filter second (DefId 1). + let mut bodies = DefIdVec::new(); + let id0 = bodies.push(entry_body); + let id1 = bodies.push(filter_body); + debug_assert_eq!(id0, entry_id); + debug_assert_eq!(id1, filter_id); + + (interner, entry_id, bodies) +} + +/// Filter entities where `age + 5 > 30`. +/// +/// Exercises NULL propagation through arithmetic on a missing JSONB key: +/// only Bob has an `age` property (42), so `42 + 5 = 47 > 30` passes. +/// All other entities lack the key, producing NULL that propagates through +/// the addition and comparison, then gets rejected by the COALESCE at the +/// continuation return point. +pub(crate) fn property_arithmetic<'heap>( + pipeline: &Pipeline<'heap>, +) -> (Interner<'heap>, DefId, DefIdVec>) { + let heap = pipeline.heap; + let interner = Interner::new(heap); + let ty = TypeBuilder::synthetic(&pipeline.env); + + let unknown_ty = ty.unknown(); + let bool_ty = ty.boolean(); + let int_ty = ty.integer(); + let unit_ty = ty.tuple([] as [TypeId; 0]); + + let entity_ty = entity::types::entity(&ty, unknown_ty, None); + + let entry_id = DefId::new(0); + let filter_id = DefId::new(1); + + let entry_body = { + let mut builder = BodyBuilder::new(&interner); + + let axis = builder.local("axis", unknown_ty); + let env_local = builder.local("env", unit_ty); + let graph_result = builder.local("graph_result", unknown_ty); + + let bb0 = builder.reserve_block([]); + let bb1 = builder.reserve_block([graph_result.local]); + + builder + .build_block(bb0) + .assign_place(axis, |rv| { + rv.input(InputOp::Load { required: true }, "temporal_axes") + }) + .assign_place(env_local, |rv| rv.tuple([] as [Operand<'_>; 0])) + .finish_with_terminator(TerminatorKind::GraphRead(GraphRead { + head: GraphReadHead::Entity { + axis: Operand::Place(axis), + }, + body: { + let mut body = heap::Vec::new_in(heap); + body.push(GraphReadBody::Filter(filter_id, env_local.local)); + body + }, + tail: GraphReadTail::Collect, + target: bb1, + })); + + builder.build_block(bb1).ret(graph_result); + + let mut body = builder.finish(0, unknown_ty); + body.id = entry_id; + body.source = Source::Closure(HirId::PLACEHOLDER, None); + body + }; + + // Filter body: (vertex.properties. + 5) > 30 + let filter_body = { + let mut builder = BodyBuilder::new(&interner); + + let _env = builder.local("env", unit_ty); + let vertex = builder.local("vertex", entity_ty); + let props = + builder.place(|place| place.from(vertex).field_by_name("properties", unknown_ty)); + let age_value = builder.place(|place| { + place.from(props).field_by_name( + "https://blockprotocol.org/@alice/types/property-type/age/", + unknown_ty, + ) + }); + let sum = builder.local("sum", int_ty); + let result = builder.local("result", bool_ty); + let five = builder.const_int(5); + let thirty = builder.const_int(30); + + let bb0 = builder.reserve_block([]); + + builder + .build_block(bb0) + .assign_place(sum, |rv| rv.binary(age_value, op![+], five)) + .assign_place(result, |rv| rv.binary(sum, op![>], thirty)) + .ret(result); + + let mut body = builder.finish(2, bool_ty); + body.id = filter_id; + body.source = Source::GraphReadFilter(HirId::PLACEHOLDER); + body + }; + + let mut bodies = DefIdVec::new(); + let id0 = bodies.push(entry_body); + let id1 = bodies.push(filter_body); + debug_assert_eq!(id0, entry_id); + debug_assert_eq!(id1, filter_id); + + (interner, entry_id, bodies) +} diff --git a/libs/@local/hashql/eval/tests/orchestrator/seed.rs b/libs/@local/hashql/eval/tests/orchestrator/seed.rs new file mode 100644 index 00000000000..cbffde7137b --- /dev/null +++ b/libs/@local/hashql/eval/tests/orchestrator/seed.rs @@ -0,0 +1,381 @@ +use std::collections::HashMap; + +use error_stack::{Report, ResultExt as _}; +use hash_graph_authorization::policies::store::{PolicyStore as _, PrincipalStore as _}; +use hash_graph_postgres_store::store::{AsClient as _, PostgresStore}; +use hash_graph_store::{ + account::{AccountStore as _, CreateUserActorParams}, + data_type::{CreateDataTypeParams, DataTypeStore as _}, + entity::{CreateEntityParams, EntityStore as _}, + entity_type::{CreateEntityTypeParams, EntityTypeStore as _}, + migration::StoreMigration as _, + property_type::{CreatePropertyTypeParams, PropertyTypeStore as _}, + query::ConflictBehavior, +}; +use hash_graph_test_data::{data_type, entity, entity_type, property_type}; +use tokio_postgres::Client; +use type_system::{ + knowledge::{ + Confidence, + entity::{EntityId, LinkData, provenance::ProvidedEntityEditionProvenance}, + property::{PropertyObject, PropertyObjectWithMetadata, metadata::PropertyProvenance}, + }, + ontology::{ + data_type::DataType, + entity_type::EntityType, + id::VersionedUrl, + property_type::PropertyType, + provenance::{OntologyOwnership, ProvidedOntologyEditionProvenance}, + }, + principal::{ + actor::{ActorEntityUuid, ActorType, MachineId}, + actor_group::WebId, + }, + provenance::{OriginProvenance, OriginType}, +}; + +use crate::error::SetupError; + +/// Entity IDs created during seeding, needed by tests to construct queries +/// and provide inputs. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub(crate) struct SeededEntities { + pub alice: EntityId, + pub bob: EntityId, + pub organization: EntityId, + pub friend_link: EntityId, + pub draft_alice: EntityId, +} + +const SEED_KEY: &str = "orchestrator_test_seed"; + +fn entity_type_id(json: &str) -> VersionedUrl { + serde_json::from_str::(json) + .expect("could not parse entity type") + .id +} + +const fn entity_provenance() -> ProvidedEntityEditionProvenance { + ProvidedEntityEditionProvenance { + actor_type: ActorType::User, + origin: OriginProvenance::from_empty_type(OriginType::Api), + sources: Vec::new(), + } +} + +/// Ensures the state table exists and returns the previously seeded entities +/// if seeding was already completed. +async fn load_existing_seed( + store: &PostgresStore, +) -> Result, Report> { + let client = store.as_client(); + + client + .execute( + "CREATE TABLE IF NOT EXISTS _orchestrator_test_state ( + key TEXT PRIMARY KEY, + value JSONB + )", + &[], + ) + .await + .change_context(SetupError::Seed) + .attach("could not create state table")?; + + let row = client + .query_opt( + "SELECT value FROM _orchestrator_test_state WHERE key = $1", + &[&SEED_KEY], + ) + .await + .change_context(SetupError::Seed) + .attach("could not query seed state")?; + + match row { + Some(row) => { + let value: serde_json::Value = row.get(0); + let entities: SeededEntities = serde_json::from_value(value) + .change_context(SetupError::Seed) + .attach("could not deserialize stored seed state")?; + Ok(Some(entities)) + } + None => Ok(None), + } +} + +async fn save_seed( + store: &PostgresStore, + entities: &SeededEntities, +) -> Result<(), Report> { + let value = serde_json::to_value(entities) + .change_context(SetupError::Seed) + .attach("could not serialize seed state")?; + + store + .as_client() + .execute( + "INSERT INTO _orchestrator_test_state (key, value) VALUES ($1, $2)", + &[&SEED_KEY, &value], + ) + .await + .change_context(SetupError::Seed) + .attach("could not persist seed state")?; + + Ok(()) +} + +/// Connects to the database, runs migrations, and seeds test data if needed. +/// +/// On a reused container where seeding already completed, returns the +/// previously stored entity IDs without creating duplicates. +pub(crate) async fn setup( + store: &mut PostgresStore, +) -> Result> { + store + .run_migrations() + .await + .change_context(SetupError::Migration)?; + store + .seed_system_policies() + .await + .change_context(SetupError::Seed) + .attach("could not seed system policies")?; + + if let Some(entities) = load_existing_seed(store).await? { + return Ok(entities); + } + + let entities = seed_data(store).await?; + save_seed(store, &entities).await?; + + Ok(entities) +} + +/// Seeds all ontology types (data types, property types, entity types). +async fn seed_ontology( + store: &mut PostgresStore, + actor_id: ActorEntityUuid, + ownership: &OntologyOwnership, +) -> Result<(), Report> { + let ontology_provenance = ProvidedOntologyEditionProvenance { + actor_type: ActorType::User, + origin: OriginProvenance::from_empty_type(OriginType::Api), + sources: Vec::new(), + }; + + store + .create_data_types( + actor_id, + [ + data_type::VALUE_V1, + data_type::TEXT_V1, + data_type::NUMBER_V1, + ] + .into_iter() + .map(|json| CreateDataTypeParams { + schema: serde_json::from_str::(json).expect("could not parse data type"), + ownership: ownership.clone(), + conflict_behavior: ConflictBehavior::Skip, + provenance: ontology_provenance.clone(), + conversions: HashMap::new(), + }), + ) + .await + .change_context(SetupError::Seed) + .attach("could not seed data types")?; + + store + .create_property_types( + actor_id, + [ + // Leaf property types (no property type refs). + property_type::NAME_V1, + property_type::AGE_V1, + property_type::FAVORITE_FILM_V1, + property_type::FAVORITE_SONG_V1, + property_type::HOBBY_V1, + // Composite (refs leaf property types above). + property_type::INTERESTS_V1, + ] + .into_iter() + .map(|json| CreatePropertyTypeParams { + schema: serde_json::from_str::(json) + .expect("could not parse property type"), + ownership: ownership.clone(), + conflict_behavior: ConflictBehavior::Skip, + provenance: ontology_provenance.clone(), + }), + ) + .await + .change_context(SetupError::Seed) + .attach("could not seed property types")?; + + store + .create_entity_types( + actor_id, + [ + entity_type::LINK_V1, + entity_type::link::FRIEND_OF_V1, + entity_type::link::ACQUAINTANCE_OF_V1, + entity_type::PERSON_V1, + entity_type::ORGANIZATION_V1, + ] + .into_iter() + .map(|json| CreateEntityTypeParams { + schema: serde_json::from_str::(json) + .expect("could not parse entity type"), + ownership: ownership.clone(), + conflict_behavior: ConflictBehavior::Skip, + provenance: ontology_provenance.clone(), + }), + ) + .await + .change_context(SetupError::Seed) + .attach("could not seed entity types")?; + + Ok(()) +} + +/// Creates a non-link entity from a property JSON fixture and entity type JSON. +async fn create_entity( + store: &mut PostgresStore, + actor_id: ActorEntityUuid, + web_id: WebId, + entity_type_json: &str, + properties_json: &str, + draft: bool, +) -> Result> { + let properties: PropertyObject = + serde_json::from_str(properties_json).expect("could not parse entity properties"); + + let entity = store + .create_entity( + actor_id, + CreateEntityParams { + web_id, + entity_uuid: None, + decision_time: None, + entity_type_ids: std::collections::HashSet::from([entity_type_id( + entity_type_json, + )]), + properties: PropertyObjectWithMetadata::from_parts(properties, None) + .expect("could not create property metadata"), + confidence: None, + link_data: None, + draft, + policies: Vec::new(), + provenance: entity_provenance(), + }, + ) + .await + .change_context(SetupError::Seed)?; + + Ok(entity.metadata.record_id.entity_id) +} + +async fn seed_data( + store: &mut PostgresStore, +) -> Result> { + let system_account_id: MachineId = store + .get_or_create_system_machine("h") + .await + .change_context(SetupError::Seed) + .attach("could not create system machine")?; + let user_id = store + .create_user_actor( + system_account_id.into(), + CreateUserActorParams { + user_id: None, + shortname: Some("orchestrator-test".to_owned()), + registration_complete: true, + }, + ) + .await + .change_context(SetupError::Seed) + .attach("could not create test user")? + .user_id; + + let actor_id: ActorEntityUuid = user_id.into(); + let web_id: WebId = user_id.into(); + let ownership = OntologyOwnership::Local { web_id }; + + seed_ontology(store, actor_id, &ownership).await?; + + let alice = create_entity( + store, + actor_id, + web_id, + entity_type::PERSON_V1, + entity::PERSON_ALICE_V1, + false, + ) + .await?; + + let bob = create_entity( + store, + actor_id, + web_id, + entity_type::PERSON_V1, + entity::PERSON_BOB_V1, + false, + ) + .await?; + + let organization = create_entity( + store, + actor_id, + web_id, + entity_type::ORGANIZATION_V1, + entity::ORGANIZATION_V1, + false, + ) + .await?; + + let draft_alice = create_entity( + store, + actor_id, + web_id, + entity_type::PERSON_V1, + entity::PERSON_ALICE_V1, + true, + ) + .await?; + + let friend_link = store + .create_entity( + actor_id, + CreateEntityParams { + web_id, + entity_uuid: None, + decision_time: None, + entity_type_ids: std::collections::HashSet::from([entity_type_id( + entity_type::link::FRIEND_OF_V1, + )]), + properties: PropertyObjectWithMetadata::from_parts(PropertyObject::empty(), None) + .expect("could not create property metadata"), + confidence: None, + link_data: Some(LinkData { + left_entity_id: alice, + right_entity_id: bob, + left_entity_confidence: Confidence::new(0.9), + left_entity_provenance: PropertyProvenance::default(), + right_entity_confidence: Confidence::new(0.8), + right_entity_provenance: PropertyProvenance::default(), + }), + draft: false, + policies: Vec::new(), + provenance: entity_provenance(), + }, + ) + .await + .change_context(SetupError::Seed) + .attach("could not create friend-of link entity")?; + + Ok(SeededEntities { + alice, + bob, + organization, + friend_link: friend_link.metadata.record_id.entity_id, + draft_alice, + }) +} diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/draft-entity.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/draft-entity.jsonc new file mode 100644 index 00000000000..08646f21f68 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/draft-entity.jsonc @@ -0,0 +1,14 @@ +// Select the draft entity by UUID. Verifies EntityPath::DraftId produces +// Optional::Value (non-null draft_id) rather than Optional::Skipped. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "draft_alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/draft-entity.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/draft-entity.stdout new file mode 100644 index 00000000000..14e3bc7b908 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/draft-entity.stdout @@ -0,0 +1,11 @@ +[ + {} +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-entity-id.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-entity-id.jsonc new file mode 100644 index 00000000000..437ac66e268 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-entity-id.jsonc @@ -0,0 +1,14 @@ +// Filter by full EntityId input. Verifies EntityId struct decoding +// (web_id, entity_uuid, draft_id) through the parameter binding path. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", + "vertex.metadata.record_id.entity_id", + ["input", "alice_id", "::graph::types::knowledge::entity::EntityId"] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-entity-id.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-entity-id.stdout new file mode 100644 index 00000000000..72222e8bcdc --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-entity-id.stdout @@ -0,0 +1,40 @@ +[ + { + "metadata": { + "record_id": { + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + } + } + } +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target interpreter +filter accepted: body 2 +row accepted +row received +filter started: body 2 +island entered: body 2, island 0, target interpreter +filter rejected: body 2 +row rejected +row received +filter started: body 2 +island entered: body 2, island 0, target interpreter +filter rejected: body 2 +row rejected +row received +filter started: body 2 +island entered: body 2, island 0, target interpreter +filter rejected: body 2 +row rejected +row received +filter started: body 2 +island entered: body 2, island 0, target interpreter +filter rejected: body 2 +row rejected diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-uuid.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-uuid.jsonc new file mode 100644 index 00000000000..bdaf20bfb3c --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-uuid.jsonc @@ -0,0 +1,13 @@ +// Filter entities by entity_uuid matching Alice. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-uuid.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-uuid.stdout new file mode 100644 index 00000000000..14e3bc7b908 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-by-uuid.stdout @@ -0,0 +1,11 @@ +[ + {} +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-diamond-cfg.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-diamond-cfg.jsonc new file mode 100644 index 00000000000..1b160c23cd9 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-diamond-cfg.jsonc @@ -0,0 +1,26 @@ +// Diamond CFG in filter: discriminant depends on the vertex. +// If entity_uuid matches Alice, compare against Alice's full EntityId. +// Otherwise compare entity_uuid against Bob's EntityUuid. +// Expected result: Alice (first arm) and Bob (second arm). +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["if", + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ], + ["==", + "vertex.metadata.record_id.entity_id", + ["input", "alice_id", "::graph::types::knowledge::entity::EntityId"] + ], + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "bob_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-diamond-cfg.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-diamond-cfg.stdout new file mode 100644 index 00000000000..41b0688b5c7 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-diamond-cfg.stdout @@ -0,0 +1,39 @@ +[ + { + "metadata": { + "record_id": { + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + } + } + }, + { + "metadata": { + "record_id": { + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + } + } + } +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation flushed: body 2, island 0 +island entered: body 2, island 1, target interpreter +filter accepted: body 2 +row accepted +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-false.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-false.jsonc new file mode 100644 index 00000000000..b50a8342226 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-false.jsonc @@ -0,0 +1,10 @@ +// Filter that rejects all entities. Result should be an empty list. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + {"#literal": false} + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-false.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-false.stdout new file mode 100644 index 00000000000..aac4e2f4a36 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-false.stdout @@ -0,0 +1,28 @@ +[] +--- +query executed: body 2, block bb0 +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter rejected: body 1 +row rejected +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter rejected: body 1 +row rejected +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter rejected: body 1 +row rejected +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter rejected: body 1 +row rejected +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter rejected: body 1 +row rejected diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-not-equal.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-not-equal.jsonc new file mode 100644 index 00000000000..7a82823fc31 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-not-equal.jsonc @@ -0,0 +1,13 @@ +// Filter entities where entity_uuid != Alice. Should exclude Alice. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["!=", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-not-equal.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-not-equal.stdout new file mode 100644 index 00000000000..61037802f8d --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-not-equal.stdout @@ -0,0 +1,32 @@ +[ + {}, + {}, + {}, + {} +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-sequential.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-sequential.jsonc new file mode 100644 index 00000000000..1a74826fde5 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-sequential.jsonc @@ -0,0 +1,40 @@ +// Two sequential filters on the same graph read. +// First filter: exclude non-person entities (org, draft, link) by +// requiring entity_uuid != org_uuid AND entity_uuid != draft_alice_uuid +// AND entity_uuid != friend_link_uuid. Keeps Alice and Bob. +// Second filter: keep only Alice (entity_uuid == alice_uuid). +// Net result: only Alice survives. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["if", + ["!=", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "org_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ], + ["if", + ["!=", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "draft_alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ], + ["!=", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "friend_link_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ], + { "#literal": false } + ], + { "#literal": false } + ] + ] + ], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-sequential.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-sequential.stdout new file mode 100644 index 00000000000..00e00935892 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/filter-sequential.stdout @@ -0,0 +1,15 @@ +[ + {} +] +--- +query executed: body 5, block bb0 +row received +filter started: body 3 +island entered: body 3, island 0, target postgres +continuation implicit true: body 3 +filter accepted: body 3 +filter started: body 4 +island entered: body 4, island 0, target postgres +continuation implicit true: body 4 +filter accepted: body 4 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/has-link-data.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/has-link-data.jsonc new file mode 100644 index 00000000000..c1ba55cc743 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/has-link-data.jsonc @@ -0,0 +1,12 @@ +// Filter entities where link_data is present (link entities only). +// Verifies PartialLinkData hydration from real LEFT JOIN columns: +// entity IDs, confidence, provenance. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["!=", "vertex.link_data", ["None"]] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/has-link-data.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/has-link-data.stdout new file mode 100644 index 00000000000..f4a041e9b37 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/has-link-data.stdout @@ -0,0 +1,95 @@ +[ + { + "link_data": { + "left_entity_confidence": 0.9, + "left_entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + }, + "left_entity_provenance": {}, + "right_entity_confidence": 0.8, + "right_entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + }, + "right_entity_provenance": {} + }, + "metadata": { + "archived": false, + "confidence": null, + "entity_type_ids": [ + { + "base_url": "https://blockprotocol.org/@alice/types/entity-type/friend-of/", + "version": "1" + } + ], + "property_metadata": { + "value": {} + }, + "provenance": { + "edition": { + "actorType": "user", + "createdById": "", + "origin": { + "type": "api" + } + }, + "inferred": { + "createdAtDecisionTime": "", + "createdAtTransactionTime": "", + "createdById": "", + "firstNonDraftCreatedAtDecisionTime": "", + "firstNonDraftCreatedAtTransactionTime": "" + } + }, + "record_id": { + "edition_id": "", + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + }, + "temporal_versioning": { + "decision_time": { + "end": null, + "start": + }, + "transaction_time": { + "end": null, + "start": + } + } + }, + "properties": {} + } +] +--- +query executed: body 5, block bb0 +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter rejected: body 4 +row rejected +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter rejected: body 4 +row rejected +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter rejected: body 4 +row rejected +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter rejected: body 4 +row rejected +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter accepted: body 4 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/let-binding.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/let-binding.jsonc new file mode 100644 index 00000000000..f831079d4a3 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/let-binding.jsonc @@ -0,0 +1,16 @@ +// Let binding propagation into filter body. +// biome-ignore format: readability +["let", "target", + ["input", "alice_uuid", "::graph::types::knowledge::entity::EntityUuid"], + ["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + "target" + ] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/let-binding.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/let-binding.stdout new file mode 100644 index 00000000000..14e3bc7b908 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/let-binding.stdout @@ -0,0 +1,11 @@ +[ + {} +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/metadata-leaf-fields.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/metadata-leaf-fields.jsonc new file mode 100644 index 00000000000..135b899d7d8 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/metadata-leaf-fields.jsonc @@ -0,0 +1,25 @@ +// Filter using three different metadata leaf column types: +// entity_uuid (UUID/TEXT), web_id (UUID/TEXT), archived (BOOL). +// All seeded entities share the same web_id. Combining all three +// conditions selects only non-archived Alice. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["&&", + ["&&", + ["==", + "vertex.metadata.record_id.entity_id.entity_uuid", + ["input", "alice_uuid", "::graph::types::knowledge::entity::EntityUuid"] + ], + ["==", + "vertex.metadata.record_id.entity_id.web_id", + ["input", "web_id", "::graph::types::principal::actor_group::web::WebId"] + ] + ], + ["==", "vertex.metadata.archived", { "#literal": false }] + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/metadata-leaf-fields.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/metadata-leaf-fields.stdout new file mode 100644 index 00000000000..14e3bc7b908 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/metadata-leaf-fields.stdout @@ -0,0 +1,11 @@ +[ + {} +] +--- +query executed: body 3, block bb0 +row received +filter started: body 2 +island entered: body 2, island 0, target postgres +continuation implicit true: body 2 +filter accepted: body 2 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/null-link-data.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/null-link-data.jsonc new file mode 100644 index 00000000000..2c5a51053a5 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/null-link-data.jsonc @@ -0,0 +1,11 @@ +// Filter entities where link_data is None (non-link entities). +// Verifies Optional::Null comparison against real NULL LEFT JOIN columns. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", "vertex.link_data", ["None"]] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/null-link-data.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/null-link-data.stdout new file mode 100644 index 00000000000..9297ac31ae9 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/null-link-data.stdout @@ -0,0 +1,289 @@ +[ + { + "link_data": null, + "metadata": { + "archived": false, + "confidence": null, + "entity_type_ids": [ + { + "base_url": "https://blockprotocol.org/@alice/types/entity-type/person/", + "version": "1" + } + ], + "property_metadata": { + "value": { + "https://blockprotocol.org/@alice/types/property-type/name/": { + "metadata": { + "canonical": { + "https://blockprotocol.org/@blockprotocol/types/data-type/text/": "Alice" + }, + "dataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1", + "originalDataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1" + } + } + } + }, + "provenance": { + "edition": { + "actorType": "user", + "createdById": "", + "origin": { + "type": "api" + } + }, + "inferred": { + "createdAtDecisionTime": "", + "createdAtTransactionTime": "", + "createdById": "", + "firstNonDraftCreatedAtDecisionTime": "", + "firstNonDraftCreatedAtTransactionTime": "" + } + }, + "record_id": { + "edition_id": "", + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + }, + "temporal_versioning": { + "decision_time": { + "end": null, + "start": + }, + "transaction_time": { + "end": null, + "start": + } + } + }, + "properties": { + "https://blockprotocol.org/@alice/types/property-type/name/": "Alice" + } + }, + { + "link_data": null, + "metadata": { + "archived": false, + "confidence": null, + "entity_type_ids": [ + { + "base_url": "https://blockprotocol.org/@alice/types/entity-type/person/", + "version": "1" + } + ], + "property_metadata": { + "value": { + "https://blockprotocol.org/@alice/types/property-type/age/": { + "metadata": { + "canonical": { + "https://blockprotocol.org/@blockprotocol/types/data-type/number/": 42.0 + }, + "dataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/number/v/1", + "originalDataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/number/v/1" + } + }, + "https://blockprotocol.org/@alice/types/property-type/name/": { + "metadata": { + "canonical": { + "https://blockprotocol.org/@blockprotocol/types/data-type/text/": "Bob" + }, + "dataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1", + "originalDataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1" + } + } + } + }, + "provenance": { + "edition": { + "actorType": "user", + "createdById": "", + "origin": { + "type": "api" + } + }, + "inferred": { + "createdAtDecisionTime": "", + "createdAtTransactionTime": "", + "createdById": "", + "firstNonDraftCreatedAtDecisionTime": "", + "firstNonDraftCreatedAtTransactionTime": "" + } + }, + "record_id": { + "edition_id": "", + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + }, + "temporal_versioning": { + "decision_time": { + "end": null, + "start": + }, + "transaction_time": { + "end": null, + "start": + } + } + }, + "properties": { + "https://blockprotocol.org/@alice/types/property-type/age/": 42.0, + "https://blockprotocol.org/@alice/types/property-type/name/": "Bob" + } + }, + { + "link_data": null, + "metadata": { + "archived": false, + "confidence": null, + "entity_type_ids": [ + { + "base_url": "https://blockprotocol.org/@alice/types/entity-type/organization/", + "version": "1" + } + ], + "property_metadata": { + "value": { + "https://blockprotocol.org/@alice/types/property-type/name/": { + "metadata": { + "canonical": { + "https://blockprotocol.org/@blockprotocol/types/data-type/text/": "HASH, Ltd" + }, + "dataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1", + "originalDataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1" + } + } + } + }, + "provenance": { + "edition": { + "actorType": "user", + "createdById": "", + "origin": { + "type": "api" + } + }, + "inferred": { + "createdAtDecisionTime": "", + "createdAtTransactionTime": "", + "createdById": "", + "firstNonDraftCreatedAtDecisionTime": "", + "firstNonDraftCreatedAtTransactionTime": "" + } + }, + "record_id": { + "edition_id": "", + "entity_id": { + "draft_id": null, + "entity_uuid": "", + "web_id": "" + } + }, + "temporal_versioning": { + "decision_time": { + "end": null, + "start": + }, + "transaction_time": { + "end": null, + "start": + } + } + }, + "properties": { + "https://blockprotocol.org/@alice/types/property-type/name/": "HASH, Ltd" + } + }, + { + "link_data": null, + "metadata": { + "archived": false, + "confidence": null, + "entity_type_ids": [ + { + "base_url": "https://blockprotocol.org/@alice/types/entity-type/person/", + "version": "1" + } + ], + "property_metadata": { + "value": { + "https://blockprotocol.org/@alice/types/property-type/name/": { + "metadata": { + "canonical": { + "https://blockprotocol.org/@blockprotocol/types/data-type/text/": "Alice" + }, + "dataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1", + "originalDataTypeId": "https://blockprotocol.org/@blockprotocol/types/data-type/text/v/1" + } + } + } + }, + "provenance": { + "edition": { + "actorType": "user", + "createdById": "", + "origin": { + "type": "api" + } + }, + "inferred": { + "createdAtDecisionTime": "", + "createdAtTransactionTime": "", + "createdById": "" + } + }, + "record_id": { + "edition_id": "", + "entity_id": { + "draft_id": "", + "entity_uuid": "", + "web_id": "" + } + }, + "temporal_versioning": { + "decision_time": { + "end": null, + "start": + }, + "transaction_time": { + "end": null, + "start": + } + } + }, + "properties": { + "https://blockprotocol.org/@alice/types/property-type/name/": "Alice" + } + } +] +--- +query executed: body 5, block bb0 +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter accepted: body 4 +row accepted +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter accepted: body 4 +row accepted +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter accepted: body 4 +row accepted +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter accepted: body 4 +row accepted +row received +filter started: body 4 +island entered: body 4, island 0, target interpreter +filter rejected: body 4 +row rejected diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/organization-type.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/organization-type.jsonc new file mode 100644 index 00000000000..4bf593907ed --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/organization-type.jsonc @@ -0,0 +1,25 @@ +// Filter entities by entity_type_ids matching the Organization type. +// Only the organization entity should survive. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + ["==", + "vertex.metadata.entity_type_ids", + { "#list": [ + ["::graph::types::ontology::VersionedUrl", { "#struct": { + "base_url": ["::graph::types::ontology::BaseUrl", + ["::core::url::Url", + { "#literal": "https://blockprotocol.org/@alice/types/entity-type/organization/" } + ] + ], + "version": ["::graph::types::ontology::OntologyTypeVersion", + { "#literal": "1" } + ] + }}] + ]} + ] + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/organization-type.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/organization-type.stdout new file mode 100644 index 00000000000..2e88f4d97e4 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/organization-type.stdout @@ -0,0 +1,11 @@ +[ + {} +] +--- +query executed: body 16, block bb0 +row received +filter started: body 15 +island entered: body 15, island 0, target postgres +continuation implicit true: body 15 +filter accepted: body 15 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/pinned-decision-time.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/pinned-decision-time.jsonc new file mode 100644 index 00000000000..01d2b3290d8 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/pinned-decision-time.jsonc @@ -0,0 +1,12 @@ +//@ axis[decision] = (946684800000) +// Pin decision time to 2000-01-01T00:00:00Z. +// Verifies TemporalInterval wire encoding is accepted by PostgreSQL. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + {"#literal": true} + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/pinned-decision-time.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/pinned-decision-time.stdout new file mode 100644 index 00000000000..0a280baf2e7 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/pinned-decision-time.stdout @@ -0,0 +1,3 @@ +[] +--- +query executed: body 2, block bb0 diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/simple-read.jsonc b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/simple-read.jsonc new file mode 100644 index 00000000000..57692475193 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/simple-read.jsonc @@ -0,0 +1,10 @@ +// All entities, trivial filter. Baseline for the full pipeline. +// biome-ignore format: readability +["::graph::tail::collect", + ["::graph::body::filter", + ["::graph::head::entities", ["input", "temporal_axes", "_"]], + ["fn", { "#tuple": [] }, { "#struct": { "vertex": "_" } }, "_", + {"#literal": true} + ] + ] +] diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/simple-read.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/simple-read.stdout new file mode 100644 index 00000000000..de0a09441b1 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/jsonc/simple-read.stdout @@ -0,0 +1,34 @@ +[ + {}, + {}, + {}, + {}, + {} +] +--- +query executed: body 2, block bb0 +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter accepted: body 1 +row accepted +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter accepted: body 1 +row accepted +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter accepted: body 1 +row accepted +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter accepted: body 1 +row accepted +row received +filter started: body 1 +island entered: body 1, island 0, target interpreter +filter accepted: body 1 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/programmatic/property-access.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/programmatic/property-access.stdout new file mode 100644 index 00000000000..30727c313fa --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/programmatic/property-access.stdout @@ -0,0 +1,18 @@ +[ + {}, + {} +] +--- +query executed: body 0, block bb0 +row received +filter started: body 1 +island entered: body 1, island 0, target postgres +continuation implicit true: body 1 +filter accepted: body 1 +row accepted +row received +filter started: body 1 +island entered: body 1, island 0, target postgres +continuation implicit true: body 1 +filter accepted: body 1 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/orchestrator/programmatic/property-arithmetic.stdout b/libs/@local/hashql/eval/tests/ui/orchestrator/programmatic/property-arithmetic.stdout new file mode 100644 index 00000000000..98a4a63bdf5 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/orchestrator/programmatic/property-arithmetic.stdout @@ -0,0 +1,11 @@ +[ + {} +] +--- +query executed: body 0, block bb0 +row received +filter started: body 1 +island entered: body 1, island 0, target postgres +continuation implicit true: body 1 +filter accepted: body 1 +row accepted diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_bitand_bigint_cast.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_bitand_bigint_cast.snap index a9989a854ec..835cd62fae5 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_bitand_bigint_cast.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_bitand_bigint_cast.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW((((($1)::bigint) & (($2)::bigint))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((((($1::jsonb))::bigint) & ((($2::jsonb))::bigint))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_sub_numeric_cast.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_sub_numeric_cast.snap index 76fc4713692..505deb51e9a 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_sub_numeric_cast.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/binary_sub_numeric_cast.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW((((($1)::numeric) - (($2)::numeric))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((((($1::jsonb))::numeric) - ((($2::jsonb))::numeric))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/data_island_provides_without_lateral.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/data_island_provides_without_lateral.snap index be49473f65f..29859b9025d 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/data_island_provides_without_lateral.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/data_island_provides_without_lateral.snap @@ -4,16 +4,16 @@ expression: query_report.to_string() --- ===================================== SQL ====================================== -SELECT "entity_editions_0_0_1"."properties" AS "properties", jsonb_build_object($3, jsonb_build_object($4, "entity_temporal_metadata_0_0_0"."web_id", $5, "entity_temporal_metadata_0_0_0"."entity_uuid", $6, "entity_temporal_metadata_0_0_0"."draft_id"), $6, "entity_temporal_metadata_0_0_0"."draft_id") AS "record_id", jsonb_build_object($7, "entity_temporal_metadata_0_0_0"."decision_time", $8, "entity_temporal_metadata_0_0_0"."transaction_time") AS "temporal_versioning", "entity_is_of_type_ids_0_0_2"."entity_type_ids" AS "entity_type_ids", "entity_editions_0_0_1"."archived" AS "archived", "entity_editions_0_0_1"."confidence" AS "confidence", "entity_ids_0_0_3"."provenance" AS "provenance_inferred", "entity_editions_0_0_1"."provenance" AS "provenance_edition", "entity_editions_0_0_1"."property_metadata" AS "property_metadata", "entity_has_left_entity_0_0_4"."left_web_id" AS "left_entity_web_id", "entity_has_left_entity_0_0_4"."left_entity_uuid" AS "left_entity_uuid", "entity_has_right_entity_0_0_5"."right_web_id" AS "right_entity_web_id", "entity_has_right_entity_0_0_5"."right_entity_uuid" AS "right_entity_uuid", "entity_has_left_entity_0_0_4"."confidence" AS "left_entity_confidence", "entity_has_right_entity_0_0_5"."confidence" AS "right_entity_confidence", "entity_has_left_entity_0_0_4"."provenance" AS "left_entity_provenance", "entity_has_right_entity_0_0_5"."provenance" AS "right_entity_provenance" +SELECT "entity_editions_0_0_1"."properties" AS "properties", jsonb_build_object(($3::text), jsonb_build_object(($4::text), "entity_temporal_metadata_0_0_0"."web_id", ($5::text), "entity_temporal_metadata_0_0_0"."entity_uuid", ($6::text), "entity_temporal_metadata_0_0_0"."draft_id"), ($7::text), "entity_temporal_metadata_0_0_0"."entity_edition_id") AS "record_id", jsonb_build_object(($8::text), jsonb_build_object(($9::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8, ($10::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."decision_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8 END), ($11::text), jsonb_build_object(($9::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."transaction_time")) * 1000)::int8, ($10::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."transaction_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."transaction_time")) * 1000)::int8 END)) AS "temporal_versioning", "entity_is_of_type_ids_0_0_2"."entity_type_ids" AS "entity_type_ids", "entity_editions_0_0_1"."archived" AS "archived", "entity_editions_0_0_1"."confidence" AS "confidence", "entity_ids_0_0_3"."provenance" AS "provenance_inferred", "entity_editions_0_0_1"."provenance" AS "provenance_edition", "entity_editions_0_0_1"."property_metadata" AS "property_metadata", "entity_has_left_entity_0_0_4"."left_web_id" AS "left_entity_web_id", "entity_has_left_entity_0_0_4"."left_entity_uuid" AS "left_entity_uuid", "entity_has_right_entity_0_0_5"."right_web_id" AS "right_entity_web_id", "entity_has_right_entity_0_0_5"."right_entity_uuid" AS "right_entity_uuid", "entity_has_left_entity_0_0_4"."confidence" AS "left_entity_confidence", "entity_has_right_entity_0_0_5"."confidence" AS "right_entity_confidence", "entity_has_left_entity_0_0_4"."provenance" AS "left_entity_provenance", "entity_has_right_entity_0_0_5"."provenance" AS "right_entity_provenance" FROM "entity_temporal_metadata" AS "entity_temporal_metadata_0_0_0" INNER JOIN "entity_editions" AS "entity_editions_0_0_1" ON "entity_editions_0_0_1"."entity_edition_id" = "entity_temporal_metadata_0_0_0"."entity_edition_id" INNER JOIN "entity_ids" AS "entity_ids_0_0_3" ON "entity_ids_0_0_3"."web_id" = "entity_temporal_metadata_0_0_0"."web_id" AND "entity_ids_0_0_3"."entity_uuid" = "entity_temporal_metadata_0_0_0"."entity_uuid" -LEFT OUTER JOIN LATERAL (SELECT jsonb_agg(jsonb_build_object($9, "b", $10, "v")) AS "entity_type_ids" +LEFT OUTER JOIN LATERAL (SELECT jsonb_agg(jsonb_build_object(($12::text), "b", ($13::text), "v")) AS "entity_type_ids" FROM "entity_is_of_type_ids" AS "eit" -CROSS JOIN UNNEST("eit"."base_urls", "eit"."versions") AS "u"("b", "v") +CROSS JOIN UNNEST("eit"."base_urls", ("eit"."versions"::text[])) AS "u"("b", "v") WHERE "eit"."entity_edition_id" = "entity_temporal_metadata_0_0_0"."entity_edition_id") AS "entity_is_of_type_ids_0_0_2" ON TRUE LEFT OUTER JOIN "entity_has_left_entity" AS "entity_has_left_entity_0_0_4" @@ -22,7 +22,7 @@ LEFT OUTER JOIN "entity_has_left_entity" AS "entity_has_left_entity_0_0_4" LEFT OUTER JOIN "entity_has_right_entity" AS "entity_has_right_entity_0_0_5" ON "entity_has_right_entity_0_0_5"."web_id" = "entity_temporal_metadata_0_0_0"."web_id" AND "entity_has_right_entity_0_0_5"."entity_uuid" = "entity_temporal_metadata_0_0_0"."entity_uuid" -WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && $1 AND "entity_temporal_metadata_0_0_0"."decision_time" && $2 +WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && ($1::tstzrange) AND "entity_temporal_metadata_0_0_0"."decision_time" && ($2::tstzrange) ================================== Parameters ================================== $1: TemporalAxis(Transaction) @@ -31,7 +31,10 @@ $3: Symbol(entity_id) $4: Symbol(web_id) $5: Symbol(entity_uuid) $6: Symbol(draft_id) -$7: Symbol(decision_time) -$8: Symbol(transaction_time) -$9: Symbol(base_url) -$10: Symbol(version) +$7: Symbol(edition_id) +$8: Symbol(decision_time) +$9: Symbol(start) +$10: Symbol(end) +$11: Symbol(transaction_time) +$12: Symbol(base_url) +$13: Symbol(version) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/diamond_cfg_merge.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/diamond_cfg_merge.snap index 0fdc8b8332b..ce95e99dd42 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/diamond_cfg_merge.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/diamond_cfg_merge.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -CASE WHEN (($1)::int) = 0 THEN (ROW(((0)::boolean), NULL, NULL, NULL)::continuation) WHEN (($1)::int) = 1 THEN (ROW(((1)::boolean), NULL, NULL, NULL)::continuation) END +CASE WHEN ((($1::jsonb))::int) IS NULL THEN (ROW(COALESCE(((FALSE)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($1::jsonb))::int) = 0 THEN (ROW(COALESCE(((0)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($1::jsonb))::int) = 1 THEN (ROW(COALESCE(((1)::boolean), FALSE), NULL, NULL, NULL)::continuation) END diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/dynamic_index_projection.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/dynamic_index_projection.snap index 3ae7a2f88c4..bcb8bcf5e31 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/dynamic_index_projection.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/dynamic_index_projection.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((jsonb_extract_path(jsonb_build_array(10, 20, 30), (($1)::text)))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((jsonb_extract_path(jsonb_build_array(10, 20, 30), ((($1::jsonb))::text)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/field_by_name_projection.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/field_by_name_projection.snap index 29f95402f4b..71094fb8d98 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/field_by_name_projection.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/field_by_name_projection.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((jsonb_extract_path(jsonb_build_object($1, 10, $2, 20), (($1)::text)))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((jsonb_extract_path(jsonb_build_object(($1::text), 10, ($2::text), 20), ((($1::text))::text)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/field_index_projection.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/field_index_projection.snap index 9254d49ff99..f66cabccbbf 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/field_index_projection.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/field_index_projection.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((jsonb_extract_path(jsonb_build_array(10, 20), ((0)::text)))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((jsonb_extract_path(jsonb_build_array(10, 20), ((0)::text)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_goto.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_goto.snap index cac5ac55385..efce69a224c 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_goto.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_goto.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(NULL, 1, ARRAY[8]::int[], ARRAY[jsonb_build_object($1, jsonb_build_object($2, "entity_temporal_metadata_0_0_0"."web_id", $3, "entity_temporal_metadata_0_0_0"."entity_uuid", $4, "entity_temporal_metadata_0_0_0"."draft_id"), $4, "entity_temporal_metadata_0_0_0"."draft_id")]::jsonb[])::continuation) +(ROW(NULL, 1, ARRAY[8]::int[], ARRAY[jsonb_build_object(($1::text), jsonb_build_object(($2::text), "entity_temporal_metadata_0_0_0"."web_id", ($3::text), "entity_temporal_metadata_0_0_0"."entity_uuid", ($4::text), "entity_temporal_metadata_0_0_0"."draft_id"), ($5::text), "entity_temporal_metadata_0_0_0"."entity_edition_id")]::jsonb[])::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_switch_int.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_switch_int.snap index ce9f2c2a23f..819313c4652 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_switch_int.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_switch_int.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -CASE WHEN (($7)::int) = 0 THEN (ROW(NULL, 2, ARRAY[]::int[], ARRAY[]::jsonb[])::continuation) WHEN (($7)::int) = 1 THEN (ROW(((1)::boolean), NULL, NULL, NULL)::continuation) END +CASE WHEN ((($10::jsonb))::int) IS NULL THEN (ROW(COALESCE(((FALSE)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($10::jsonb))::int) = 0 THEN (ROW(NULL, 2, ARRAY[]::int[], ARRAY[]::jsonb[])::continuation) WHEN ((($10::jsonb))::int) = 1 THEN (ROW(COALESCE(((1)::boolean), FALSE), NULL, NULL, NULL)::continuation) END diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_with_live_out.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_with_live_out.snap index e0da571eacc..393d7e37651 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_with_live_out.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/island_exit_with_live_out.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(NULL, 1, ARRAY[7]::int[], ARRAY[jsonb_build_object($1, jsonb_build_object($2, "entity_temporal_metadata_0_0_0"."web_id", $3, "entity_temporal_metadata_0_0_0"."entity_uuid", $4, "entity_temporal_metadata_0_0_0"."draft_id"), $4, "entity_temporal_metadata_0_0_0"."draft_id")]::jsonb[])::continuation) +(ROW(NULL, 1, ARRAY[7]::int[], ARRAY[jsonb_build_object(($1::text), jsonb_build_object(($2::text), "entity_temporal_metadata_0_0_0"."web_id", ($3::text), "entity_temporal_metadata_0_0_0"."entity_uuid", ($4::text), "entity_temporal_metadata_0_0_0"."draft_id"), ($5::text), "entity_temporal_metadata_0_0_0"."entity_edition_id")]::jsonb[])::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/left_entity_filter.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/left_entity_filter.snap index f67d27c8a61..663752ddf3f 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/left_entity_filter.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/left_entity_filter.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW((("entity_has_left_entity_0_0_1"."left_entity_uuid" = $1)::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((to_jsonb("entity_has_left_entity_0_0_1"."left_entity_uuid") = to_jsonb(($1::jsonb)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/nested_property_access.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/nested_property_access.snap index 5df5bbbed9f..7901cb57be4 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/nested_property_access.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/nested_property_access.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((jsonb_extract_path("entity_editions_0_0_1"."properties", (($1)::text), (($2)::text)) = $3)::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((to_jsonb(jsonb_extract_path("entity_editions_0_0_1"."properties", ((($1::text))::text), ((($2::text))::text))) = to_jsonb(($3::jsonb)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/property_field_equality.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/property_field_equality.snap index b0a43648597..176806be5db 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/property_field_equality.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/property_field_equality.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((jsonb_extract_path("entity_editions_0_0_1"."properties", (($1)::text)) = $2)::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((to_jsonb(jsonb_extract_path("entity_editions_0_0_1"."properties", ((($1::text))::text))) = to_jsonb(($2::jsonb)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/property_mask.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/property_mask.snap index 5b401378fc2..8b8b8ab6555 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/property_mask.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/property_mask.snap @@ -4,16 +4,16 @@ expression: report.to_string() --- ===================================== SQL ====================================== -SELECT ("entity_editions_0_0_1"."properties" - $99) AS "properties", jsonb_build_object($3, jsonb_build_object($4, "entity_temporal_metadata_0_0_0"."web_id", $5, "entity_temporal_metadata_0_0_0"."entity_uuid", $6, "entity_temporal_metadata_0_0_0"."draft_id"), $6, "entity_temporal_metadata_0_0_0"."draft_id") AS "record_id", jsonb_build_object($7, "entity_temporal_metadata_0_0_0"."decision_time", $8, "entity_temporal_metadata_0_0_0"."transaction_time") AS "temporal_versioning", "entity_is_of_type_ids_0_0_2"."entity_type_ids" AS "entity_type_ids", "entity_editions_0_0_1"."archived" AS "archived", "entity_editions_0_0_1"."confidence" AS "confidence", "entity_ids_0_0_3"."provenance" AS "provenance_inferred", "entity_editions_0_0_1"."provenance" AS "provenance_edition", ("entity_editions_0_0_1"."property_metadata" - $99) AS "property_metadata", "entity_has_left_entity_0_0_4"."left_web_id" AS "left_entity_web_id", "entity_has_left_entity_0_0_4"."left_entity_uuid" AS "left_entity_uuid", "entity_has_right_entity_0_0_5"."right_web_id" AS "right_entity_web_id", "entity_has_right_entity_0_0_5"."right_entity_uuid" AS "right_entity_uuid", "entity_has_left_entity_0_0_4"."confidence" AS "left_entity_confidence", "entity_has_right_entity_0_0_5"."confidence" AS "right_entity_confidence", "entity_has_left_entity_0_0_4"."provenance" AS "left_entity_provenance", "entity_has_right_entity_0_0_5"."provenance" AS "right_entity_provenance", ("continuation_0_0"."row")."block" AS "continuation_0_0_block", ("continuation_0_0"."row")."locals" AS "continuation_0_0_locals", ("continuation_0_0"."row")."values" AS "continuation_0_0_values" +SELECT ("entity_editions_0_0_1"."properties" - $99) AS "properties", jsonb_build_object(($3::text), jsonb_build_object(($4::text), "entity_temporal_metadata_0_0_0"."web_id", ($5::text), "entity_temporal_metadata_0_0_0"."entity_uuid", ($6::text), "entity_temporal_metadata_0_0_0"."draft_id"), ($7::text), "entity_temporal_metadata_0_0_0"."entity_edition_id") AS "record_id", jsonb_build_object(($8::text), jsonb_build_object(($9::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8, ($10::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."decision_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8 END), ($11::text), jsonb_build_object(($9::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."transaction_time")) * 1000)::int8, ($10::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."transaction_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."transaction_time")) * 1000)::int8 END)) AS "temporal_versioning", "entity_is_of_type_ids_0_0_2"."entity_type_ids" AS "entity_type_ids", "entity_editions_0_0_1"."archived" AS "archived", "entity_editions_0_0_1"."confidence" AS "confidence", "entity_ids_0_0_3"."provenance" AS "provenance_inferred", "entity_editions_0_0_1"."provenance" AS "provenance_edition", ("entity_editions_0_0_1"."property_metadata" - $99) AS "property_metadata", "entity_has_left_entity_0_0_4"."left_web_id" AS "left_entity_web_id", "entity_has_left_entity_0_0_4"."left_entity_uuid" AS "left_entity_uuid", "entity_has_right_entity_0_0_5"."right_web_id" AS "right_entity_web_id", "entity_has_right_entity_0_0_5"."right_entity_uuid" AS "right_entity_uuid", "entity_has_left_entity_0_0_4"."confidence" AS "left_entity_confidence", "entity_has_right_entity_0_0_5"."confidence" AS "right_entity_confidence", "entity_has_left_entity_0_0_4"."provenance" AS "left_entity_provenance", "entity_has_right_entity_0_0_5"."provenance" AS "right_entity_provenance", ("continuation_0_0"."row")."block" AS "continuation_0_0_block", ("continuation_0_0"."row")."locals" AS "continuation_0_0_locals", ("continuation_0_0"."row")."values" AS "continuation_0_0_values" FROM "entity_temporal_metadata" AS "entity_temporal_metadata_0_0_0" INNER JOIN "entity_editions" AS "entity_editions_0_0_1" ON "entity_editions_0_0_1"."entity_edition_id" = "entity_temporal_metadata_0_0_0"."entity_edition_id" INNER JOIN "entity_ids" AS "entity_ids_0_0_3" ON "entity_ids_0_0_3"."web_id" = "entity_temporal_metadata_0_0_0"."web_id" AND "entity_ids_0_0_3"."entity_uuid" = "entity_temporal_metadata_0_0_0"."entity_uuid" -LEFT OUTER JOIN LATERAL (SELECT jsonb_agg(jsonb_build_object($9, "b", $10, "v")) AS "entity_type_ids" +LEFT OUTER JOIN LATERAL (SELECT jsonb_agg(jsonb_build_object(($12::text), "b", ($13::text), "v")) AS "entity_type_ids" FROM "entity_is_of_type_ids" AS "eit" -CROSS JOIN UNNEST("eit"."base_urls", "eit"."versions") AS "u"("b", "v") +CROSS JOIN UNNEST("eit"."base_urls", ("eit"."versions"::text[])) AS "u"("b", "v") WHERE "eit"."entity_edition_id" = "entity_temporal_metadata_0_0_0"."entity_edition_id") AS "entity_is_of_type_ids_0_0_2" ON TRUE LEFT OUTER JOIN "entity_has_left_entity" AS "entity_has_left_entity_0_0_4" @@ -24,7 +24,7 @@ LEFT OUTER JOIN "entity_has_right_entity" AS "entity_has_right_entity_0_0_5" AND "entity_has_right_entity_0_0_5"."entity_uuid" = "entity_temporal_metadata_0_0_0"."entity_uuid" CROSS JOIN LATERAL (SELECT (ROW(NULL, 1, ARRAY[]::int[], ARRAY[]::jsonb[])::continuation) AS "row" OFFSET 0) AS "continuation_0_0" -WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && $1 AND "entity_temporal_metadata_0_0_0"."decision_time" && $2 AND ("continuation_0_0"."row")."filter" IS NOT FALSE +WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && ($1::tstzrange) AND "entity_temporal_metadata_0_0_0"."decision_time" && ($2::tstzrange) AND ("continuation_0_0"."row")."filter" IS NOT FALSE ================================== Parameters ================================== $1: TemporalAxis(Transaction) @@ -33,7 +33,10 @@ $3: Symbol(entity_id) $4: Symbol(web_id) $5: Symbol(entity_uuid) $6: Symbol(draft_id) -$7: Symbol(decision_time) -$8: Symbol(transaction_time) -$9: Symbol(base_url) -$10: Symbol(version) +$7: Symbol(edition_id) +$8: Symbol(decision_time) +$9: Symbol(start) +$10: Symbol(end) +$11: Symbol(transaction_time) +$12: Symbol(base_url) +$13: Symbol(version) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/provides_drives_select_and_joins.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/provides_drives_select_and_joins.snap index f41041674b5..23acb01cbf7 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/provides_drives_select_and_joins.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/provides_drives_select_and_joins.snap @@ -4,16 +4,16 @@ expression: report.to_string() --- ===================================== SQL ====================================== -SELECT "entity_editions_0_0_1"."properties" AS "properties", jsonb_build_object($3, jsonb_build_object($4, "entity_temporal_metadata_0_0_0"."web_id", $5, "entity_temporal_metadata_0_0_0"."entity_uuid", $6, "entity_temporal_metadata_0_0_0"."draft_id"), $6, "entity_temporal_metadata_0_0_0"."draft_id") AS "record_id", jsonb_build_object($7, "entity_temporal_metadata_0_0_0"."decision_time", $8, "entity_temporal_metadata_0_0_0"."transaction_time") AS "temporal_versioning", "entity_is_of_type_ids_0_0_2"."entity_type_ids" AS "entity_type_ids", "entity_editions_0_0_1"."archived" AS "archived", "entity_editions_0_0_1"."confidence" AS "confidence", "entity_ids_0_0_3"."provenance" AS "provenance_inferred", "entity_editions_0_0_1"."provenance" AS "provenance_edition", "entity_editions_0_0_1"."property_metadata" AS "property_metadata", "entity_has_left_entity_0_0_4"."left_web_id" AS "left_entity_web_id", "entity_has_left_entity_0_0_4"."left_entity_uuid" AS "left_entity_uuid", "entity_has_right_entity_0_0_5"."right_web_id" AS "right_entity_web_id", "entity_has_right_entity_0_0_5"."right_entity_uuid" AS "right_entity_uuid", "entity_has_left_entity_0_0_4"."confidence" AS "left_entity_confidence", "entity_has_right_entity_0_0_5"."confidence" AS "right_entity_confidence", "entity_has_left_entity_0_0_4"."provenance" AS "left_entity_provenance", "entity_has_right_entity_0_0_5"."provenance" AS "right_entity_provenance" +SELECT "entity_editions_0_0_1"."properties" AS "properties", jsonb_build_object(($3::text), jsonb_build_object(($4::text), "entity_temporal_metadata_0_0_0"."web_id", ($5::text), "entity_temporal_metadata_0_0_0"."entity_uuid", ($6::text), "entity_temporal_metadata_0_0_0"."draft_id"), ($7::text), "entity_temporal_metadata_0_0_0"."entity_edition_id") AS "record_id", jsonb_build_object(($8::text), jsonb_build_object(($9::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8, ($10::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."decision_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8 END), ($11::text), jsonb_build_object(($9::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."transaction_time")) * 1000)::int8, ($10::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."transaction_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."transaction_time")) * 1000)::int8 END)) AS "temporal_versioning", "entity_is_of_type_ids_0_0_2"."entity_type_ids" AS "entity_type_ids", "entity_editions_0_0_1"."archived" AS "archived", "entity_editions_0_0_1"."confidence" AS "confidence", "entity_ids_0_0_3"."provenance" AS "provenance_inferred", "entity_editions_0_0_1"."provenance" AS "provenance_edition", "entity_editions_0_0_1"."property_metadata" AS "property_metadata", "entity_has_left_entity_0_0_4"."left_web_id" AS "left_entity_web_id", "entity_has_left_entity_0_0_4"."left_entity_uuid" AS "left_entity_uuid", "entity_has_right_entity_0_0_5"."right_web_id" AS "right_entity_web_id", "entity_has_right_entity_0_0_5"."right_entity_uuid" AS "right_entity_uuid", "entity_has_left_entity_0_0_4"."confidence" AS "left_entity_confidence", "entity_has_right_entity_0_0_5"."confidence" AS "right_entity_confidence", "entity_has_left_entity_0_0_4"."provenance" AS "left_entity_provenance", "entity_has_right_entity_0_0_5"."provenance" AS "right_entity_provenance" FROM "entity_temporal_metadata" AS "entity_temporal_metadata_0_0_0" INNER JOIN "entity_editions" AS "entity_editions_0_0_1" ON "entity_editions_0_0_1"."entity_edition_id" = "entity_temporal_metadata_0_0_0"."entity_edition_id" INNER JOIN "entity_ids" AS "entity_ids_0_0_3" ON "entity_ids_0_0_3"."web_id" = "entity_temporal_metadata_0_0_0"."web_id" AND "entity_ids_0_0_3"."entity_uuid" = "entity_temporal_metadata_0_0_0"."entity_uuid" -LEFT OUTER JOIN LATERAL (SELECT jsonb_agg(jsonb_build_object($9, "b", $10, "v")) AS "entity_type_ids" +LEFT OUTER JOIN LATERAL (SELECT jsonb_agg(jsonb_build_object(($12::text), "b", ($13::text), "v")) AS "entity_type_ids" FROM "entity_is_of_type_ids" AS "eit" -CROSS JOIN UNNEST("eit"."base_urls", "eit"."versions") AS "u"("b", "v") +CROSS JOIN UNNEST("eit"."base_urls", ("eit"."versions"::text[])) AS "u"("b", "v") WHERE "eit"."entity_edition_id" = "entity_temporal_metadata_0_0_0"."entity_edition_id") AS "entity_is_of_type_ids_0_0_2" ON TRUE LEFT OUTER JOIN "entity_has_left_entity" AS "entity_has_left_entity_0_0_4" @@ -22,7 +22,7 @@ LEFT OUTER JOIN "entity_has_left_entity" AS "entity_has_left_entity_0_0_4" LEFT OUTER JOIN "entity_has_right_entity" AS "entity_has_right_entity_0_0_5" ON "entity_has_right_entity_0_0_5"."web_id" = "entity_temporal_metadata_0_0_0"."web_id" AND "entity_has_right_entity_0_0_5"."entity_uuid" = "entity_temporal_metadata_0_0_0"."entity_uuid" -WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && $1 AND "entity_temporal_metadata_0_0_0"."decision_time" && $2 +WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && ($1::tstzrange) AND "entity_temporal_metadata_0_0_0"."decision_time" && ($2::tstzrange) ================================== Parameters ================================== $1: TemporalAxis(Transaction) @@ -31,7 +31,10 @@ $3: Symbol(entity_id) $4: Symbol(web_id) $5: Symbol(entity_uuid) $6: Symbol(draft_id) -$7: Symbol(decision_time) -$8: Symbol(transaction_time) -$9: Symbol(base_url) -$10: Symbol(version) +$7: Symbol(edition_id) +$8: Symbol(decision_time) +$9: Symbol(start) +$10: Symbol(end) +$11: Symbol(transaction_time) +$12: Symbol(base_url) +$13: Symbol(version) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/straight_line_goto_chain.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/straight_line_goto_chain.snap index 6b65928d0dd..97280fe34a5 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/straight_line_goto_chain.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/straight_line_goto_chain.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW((($1)::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((($1::jsonb))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/switch_int_many_branches.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/switch_int_many_branches.snap index 614ce12afeb..71a13ebe8ad 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/switch_int_many_branches.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/switch_int_many_branches.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -CASE WHEN (($1)::int) = 0 THEN (ROW(((1)::boolean), NULL, NULL, NULL)::continuation) WHEN (($1)::int) = 1 THEN (ROW(((0)::boolean), NULL, NULL, NULL)::continuation) WHEN (($1)::int) = 2 THEN (ROW(((1)::boolean), NULL, NULL, NULL)::continuation) WHEN (($1)::int) = 3 THEN (ROW(((0)::boolean), NULL, NULL, NULL)::continuation) ELSE (ROW(((1)::boolean), NULL, NULL, NULL)::continuation) END +CASE WHEN ((($1::jsonb))::int) IS NULL THEN (ROW(COALESCE(((FALSE)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($1::jsonb))::int) = 0 THEN (ROW(COALESCE(((1)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($1::jsonb))::int) = 1 THEN (ROW(COALESCE(((0)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($1::jsonb))::int) = 2 THEN (ROW(COALESCE(((1)::boolean), FALSE), NULL, NULL, NULL)::continuation) WHEN ((($1::jsonb))::int) = 3 THEN (ROW(COALESCE(((0)::boolean), FALSE), NULL, NULL, NULL)::continuation) ELSE (ROW(COALESCE(((1)::boolean), FALSE), NULL, NULL, NULL)::continuation) END diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/temporal_decision_time_interval.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/temporal_decision_time_interval.snap new file mode 100644 index 00000000000..e62670cb3d0 --- /dev/null +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/temporal_decision_time_interval.snap @@ -0,0 +1,15 @@ +--- +source: libs/@local/hashql/eval/src/postgres/filter/tests.rs +expression: report.to_string() +--- +===================================== SQL ====================================== + +SELECT jsonb_build_object(($3::text), (extract(epoch from lower("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8, ($4::text), CASE WHEN upper_inf("entity_temporal_metadata_0_0_0"."decision_time") THEN NULL ELSE (extract(epoch from upper("entity_temporal_metadata_0_0_0"."decision_time")) * 1000)::int8 END) AS "decision_time" +FROM "entity_temporal_metadata" AS "entity_temporal_metadata_0_0_0" +WHERE "entity_temporal_metadata_0_0_0"."transaction_time" && ($1::tstzrange) AND "entity_temporal_metadata_0_0_0"."decision_time" && ($2::tstzrange) +================================== Parameters ================================== + +$1: TemporalAxis(Transaction) +$2: TemporalAxis(Decision) +$3: Symbol(start) +$4: Symbol(end) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_bitnot.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_bitnot.snap index 908979c8712..dc01e495757 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_bitnot.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_bitnot.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((~($1))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((~(($1::jsonb)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_neg.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_neg.snap index 6a5f3c81199..8aec2670c44 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_neg.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_neg.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((-($1))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((-(($1::jsonb)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_not.snap b/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_not.snap index a06e9a75e62..1d92276af2c 100644 --- a/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_not.snap +++ b/libs/@local/hashql/eval/tests/ui/postgres/filter/unary_not.snap @@ -4,4 +4,4 @@ expression: report.to_string() --- ==================== Island (entry: bb0, target: postgres) ===================== -(ROW(((NOT($1))::boolean), NULL, NULL, NULL)::continuation) +(ROW(COALESCE(((NOT(($1::jsonb)))::boolean), FALSE), NULL, NULL, NULL)::continuation) diff --git a/libs/@local/hashql/mir/benches/interpret.rs b/libs/@local/hashql/mir/benches/interpret.rs index cc65af23087..c1d86696304 100644 --- a/libs/@local/hashql/mir/benches/interpret.rs +++ b/libs/@local/hashql/mir/benches/interpret.rs @@ -10,7 +10,6 @@ use alloc::alloc::Global; use codspeed_criterion_compat::{BenchmarkId, Criterion, criterion_group, criterion_main}; use hashql_core::{ - collections::FastHashMap, heap::{ResetAllocator as _, Scratch}, r#type::environment::Environment, }; @@ -22,7 +21,7 @@ use hashql_mir::{ def::{DefId, DefIdSlice}, intern::Interner, interpret::{ - CallStack, Runtime, RuntimeConfig, + CallStack, Inputs, Runtime, RuntimeConfig, value::{Int, Value}, }, pass::{ @@ -93,17 +92,14 @@ fn fibonacci_recursive(criterion: &mut Criterion) { run_bencher(bencher, create_fibonacci_body, |_, bodies, scratch| { let scratch = &*scratch; let bodies = DefIdSlice::from_raw(bodies); + let inputs = Inputs::new_in(scratch); - let mut runtime = Runtime::new_in( - RuntimeConfig::default(), - bodies, - FastHashMap::default(), - scratch, - ); + let mut runtime = + Runtime::new_in(RuntimeConfig::default(), bodies, &inputs, scratch); let callstack = CallStack::new(&runtime, DefId::new(0), [Value::Integer(Int::from(*n))]); - let Ok(Value::Integer(int)) = runtime.run(callstack) else { + let Ok(Value::Integer(int)) = runtime.run(callstack, |_| unreachable!()) else { unreachable!() }; diff --git a/libs/@local/hashql/mir/package.json b/libs/@local/hashql/mir/package.json index c4513eecab0..d701248bc8d 100644 --- a/libs/@local/hashql/mir/package.json +++ b/libs/@local/hashql/mir/package.json @@ -9,7 +9,7 @@ "fix:clippy": "just clippy --fix", "lint:clippy": "just clippy", "test:codspeed": "cargo codspeed run -p hashql-mir", - "test:miri": "cargo miri nextest run -- changed_bitor interpret::locals::tests pass::execution::block_partitioned_vec::tests pass::execution::cost::tests", + "test:miri": "cargo miri nextest run -- changed_bitor interpret::locals::tests interpret::value::r#struct::tests pass::execution::block_partitioned_vec::tests pass::execution::cost::tests", "test:unit": "mise run test:unit @rust/hashql-mir" }, "dependencies": { diff --git a/libs/@local/hashql/mir/src/interpret/error.rs b/libs/@local/hashql/mir/src/interpret/error.rs index dd88d3f34ab..0bbd9c766db 100644 --- a/libs/@local/hashql/mir/src/interpret/error.rs +++ b/libs/@local/hashql/mir/src/interpret/error.rs @@ -28,8 +28,25 @@ use crate::body::{ /// Type alias for interpreter diagnostics. /// /// The default severity kind is [`Severity`], which allows any severity level. -pub(crate) type InterpretDiagnostic = - Diagnostic; +pub type InterpretDiagnostic = Diagnostic; + +/// Diagnostic subcategory for errors that occur while fulfilling a suspension. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct SuspensionDiagnosticCategory(pub &'static TerminalDiagnosticCategory); + +impl DiagnosticCategory for SuspensionDiagnosticCategory { + fn id(&self) -> Cow<'_, str> { + Cow::Borrowed("suspension") + } + + fn name(&self) -> Cow<'_, str> { + Cow::Borrowed("Suspension") + } + + fn subcategory(&self) -> Option<&dyn DiagnosticCategory> { + Some(self.0) + } +} // Terminal categories for ICEs const LOCAL_ACCESS: TerminalDiagnosticCategory = TerminalDiagnosticCategory { @@ -89,6 +106,8 @@ pub enum InterpretDiagnosticCategory { RuntimeLimit, /// Required input not provided. InputResolution, + /// Error from fulfilling a suspension (e.g. database query failure). + Suspension(SuspensionDiagnosticCategory), } impl DiagnosticCategory for InterpretDiagnosticCategory { @@ -109,6 +128,7 @@ impl DiagnosticCategory for InterpretDiagnosticCategory { Self::BoundsCheck => Some(&BOUNDS_CHECK), Self::RuntimeLimit => Some(&RUNTIME_LIMIT), Self::InputResolution => Some(&INPUT_RESOLUTION), + Self::Suspension(category) => Some(category), } } } @@ -130,6 +150,7 @@ impl TypeName { /// Creates a type name from a static string. /// /// Used for simple type names like "Integer", "String", etc. + #[must_use] pub const fn terse(str: &'static str) -> Self { Self::Static(Cow::Borrowed(str)) } @@ -191,7 +212,7 @@ pub struct UnaryTypeMismatch<'heap, A: Allocator> { /// A few variants represent legitimate runtime errors that can occur in valid /// programs (marked in their documentation). #[derive(Debug, Clone)] -pub enum RuntimeError<'heap, A: Allocator> { +pub enum RuntimeError<'heap, E, A: Allocator> { /// Attempted to read an uninitialized local variable. /// /// This is an ICE: MIR construction should ensure locals are initialized @@ -204,30 +225,42 @@ pub enum RuntimeError<'heap, A: Allocator> { /// Index operation used an invalid type for the index. /// /// This is an ICE: type checking should ensure index types are valid. - InvalidIndexType { base: TypeName, index: TypeName }, + InvalidIndexType { + base: TypeName, + index: TypeName, + }, /// Subscript operation applied to a non-subscriptable type. /// /// This is an ICE: type checking should ensure subscript targets are /// lists or dicts. - InvalidSubscriptType { base: TypeName }, + InvalidSubscriptType { + base: TypeName, + }, /// Field projection applied to a non-projectable type. /// /// This is an ICE: type checking should ensure projection targets are /// structs or tuples. - InvalidProjectionType { base: TypeName }, + InvalidProjectionType { + base: TypeName, + }, /// Named field projection applied to a non-struct type. /// /// This is an ICE: type checking should ensure named field access is /// only used on structs. - InvalidProjectionByNameType { base: TypeName }, + InvalidProjectionByNameType { + base: TypeName, + }, /// Field index does not exist on the aggregate type. /// /// This is an ICE: type checking should validate field indices. - UnknownField { base: TypeName, field: FieldIndex }, + UnknownField { + base: TypeName, + field: FieldIndex, + }, /// Field name does not exist on the struct type. /// @@ -241,19 +274,26 @@ pub enum RuntimeError<'heap, A: Allocator> { /// /// This is an ICE: MIR construction should ensure aggregates have the /// correct number of values for their fields. - StructFieldLengthMismatch { values: usize, fields: usize }, + StructFieldLengthMismatch { + values: usize, + fields: usize, + }, /// Switch discriminant has a non-integer type. /// /// This is an ICE: type checking should ensure switch discriminants /// are integers. - InvalidDiscriminantType { r#type: TypeName }, + InvalidDiscriminantType { + r#type: TypeName, + }, /// Switch discriminant value has no matching branch. /// /// This is an ICE: MIR construction should ensure all possible /// discriminant values have corresponding branches. - InvalidDiscriminant { value: Int }, + InvalidDiscriminant { + value: Int, + }, /// Execution reached unreachable code. /// @@ -277,7 +317,9 @@ pub enum RuntimeError<'heap, A: Allocator> { /// /// This is an ICE: type checking should ensure only function pointers /// are called. - ApplyNonPointer { r#type: TypeName }, + ApplyNonPointer { + r#type: TypeName, + }, /// Attempted to step execution with an empty callstack. /// @@ -288,23 +330,49 @@ pub enum RuntimeError<'heap, A: Allocator> { /// /// This is currently a user-facing error but may become an ICE once /// bounds checking is implemented in program analysis. - OutOfRange { length: usize, index: Int }, + OutOfRange { + length: usize, + index: Int, + }, /// Required input was not provided to the runtime. /// /// This is currently a user-facing error but may become an ICE once /// input validation is implemented in program analysis. - InputNotFound { name: Symbol<'heap> }, + InputNotFound { + name: Symbol<'heap>, + }, /// Recursion depth exceeded the configured limit. /// /// This is a user-facing error that occurs when a program recurses /// too deeply, likely due to infinite recursion or deeply nested /// data structures. - RecursionLimitExceeded { limit: usize }, + RecursionLimitExceeded { + limit: usize, + }, + + /// Value has the wrong runtime type. + /// + /// This is an ICE: type checking should ensure values have the + /// correct types at all usage sites. + UnexpectedValueType { + expected: TypeName, + actual: TypeName, + }, + + /// Opaque constructor name does not match any expected constructor. + /// + /// This is an ICE: type checking should ensure opaque values carry + /// a constructor name from the expected set for the encoded sum type. + InvalidConstructor { + name: Symbol<'heap>, + }, + + Suspension(E), } -impl RuntimeError<'_, A> { +impl RuntimeError<'_, E, A> { /// Converts this runtime error into a diagnostic using the provided callstack. /// /// The callstack provides span information for error localization. The first @@ -313,11 +381,12 @@ impl RuntimeError<'_, A> { pub fn into_diagnostic( self, callstack: impl IntoIterator, + on_suspension: impl FnOnce(E) -> InterpretDiagnostic, ) -> InterpretDiagnostic { let mut spans = callstack.into_iter(); let primary_span = spans.next().unwrap_or(SpanId::SYNTHETIC); - let mut diagnostic = self.make_diagnostic(primary_span); + let mut diagnostic = self.make_diagnostic(primary_span, on_suspension); // Add callstack frames as secondary labels for span in spans { @@ -327,7 +396,11 @@ impl RuntimeError<'_, A> { diagnostic } - fn make_diagnostic(self, span: SpanId) -> InterpretDiagnostic { + fn make_diagnostic( + self, + span: SpanId, + on_suspension: impl FnOnce(E) -> InterpretDiagnostic, + ) -> InterpretDiagnostic { match self { Self::UninitializedLocal { local, decl } => uninitialized_local(span, local, decl), Self::InvalidIndexType { base, index } => invalid_index_type(span, &base, &index), @@ -351,6 +424,61 @@ impl RuntimeError<'_, A> { Self::OutOfRange { length, index } => out_of_range(span, length, index), Self::InputNotFound { name } => input_not_found(span, name), Self::RecursionLimitExceeded { limit } => recursion_limit_exceeded(span, limit), + Self::UnexpectedValueType { expected, actual } => { + unexpected_value_type(span, &expected, &actual) + } + Self::InvalidConstructor { name } => invalid_constructor(span, name), + Self::Suspension(suspension) => on_suspension(suspension), + } + } +} + +impl<'heap, A: Allocator> RuntimeError<'heap, !, A> { + /// Widens the suspension type from `!` to any `S`. + /// + /// Useful when composing interpreter operations (which cannot suspend) with + /// bridge operations (which can). The `Suspension` variant is uninhabited, + /// so this is a no-op at runtime. + #[must_use] + #[inline] + pub fn widen(self) -> RuntimeError<'heap, S, A> { + match self { + Self::UninitializedLocal { local, decl } => { + RuntimeError::UninitializedLocal { local, decl } + } + Self::InvalidIndexType { base, index } => { + RuntimeError::InvalidIndexType { base, index } + } + Self::InvalidSubscriptType { base } => RuntimeError::InvalidSubscriptType { base }, + Self::InvalidProjectionType { base } => RuntimeError::InvalidProjectionType { base }, + Self::InvalidProjectionByNameType { base } => { + RuntimeError::InvalidProjectionByNameType { base } + } + Self::UnknownField { base, field } => RuntimeError::UnknownField { base, field }, + Self::UnknownFieldByName { base, field } => { + RuntimeError::UnknownFieldByName { base, field } + } + Self::StructFieldLengthMismatch { values, fields } => { + RuntimeError::StructFieldLengthMismatch { values, fields } + } + Self::InvalidDiscriminantType { r#type } => { + RuntimeError::InvalidDiscriminantType { r#type } + } + Self::InvalidDiscriminant { value } => RuntimeError::InvalidDiscriminant { value }, + Self::UnreachableReached => RuntimeError::UnreachableReached, + Self::BinaryTypeMismatch(mismatch) => RuntimeError::BinaryTypeMismatch(mismatch), + Self::UnaryTypeMismatch(mismatch) => RuntimeError::UnaryTypeMismatch(mismatch), + Self::ApplyNonPointer { r#type } => RuntimeError::ApplyNonPointer { r#type }, + Self::CallstackEmpty => RuntimeError::CallstackEmpty, + Self::OutOfRange { length, index } => RuntimeError::OutOfRange { length, index }, + Self::InputNotFound { name } => RuntimeError::InputNotFound { name }, + Self::RecursionLimitExceeded { limit } => { + RuntimeError::RecursionLimitExceeded { limit } + } + Self::UnexpectedValueType { expected, actual } => { + RuntimeError::UnexpectedValueType { expected, actual } + } + Self::InvalidConstructor { name } => RuntimeError::InvalidConstructor { name }, } } } @@ -541,6 +669,36 @@ fn apply_non_pointer(span: SpanId, r#type: &TypeName) -> InterpretDiagnostic { diagnostic } +fn unexpected_value_type( + span: SpanId, + expected: &TypeName, + actual: &TypeName, +) -> InterpretDiagnostic { + let mut diagnostic = + Diagnostic::new(InterpretDiagnosticCategory::TypeInvariant, Severity::Bug).primary( + Label::new(span, format!("expected `{expected}`, found `{actual}`")), + ); + + diagnostic.add_message(Message::help( + "type checking should have ensured the value has the correct type", + )); + + diagnostic +} + +fn invalid_constructor(span: SpanId, name: Symbol) -> InterpretDiagnostic { + let mut diagnostic = + Diagnostic::new(InterpretDiagnosticCategory::TypeInvariant, Severity::Bug).primary( + Label::new(span, format!("unrecognized opaque constructor `{name}`")), + ); + + diagnostic.add_message(Message::help( + "type checking should have ensured the opaque constructor is from the expected set", + )); + + diagnostic +} + // ============================================================================= // ICE: Structural Invariant // ============================================================================= diff --git a/libs/@local/hashql/mir/src/interpret/inputs.rs b/libs/@local/hashql/mir/src/interpret/inputs.rs new file mode 100644 index 00000000000..134567cee8c --- /dev/null +++ b/libs/@local/hashql/mir/src/interpret/inputs.rs @@ -0,0 +1,207 @@ +//! External input values for the interpreter. +//! +//! HashQL is a referentially transparent functional language — the same query with the same inputs +//! always produces the same result. [`Inputs`] is the mechanism for injecting values that vary +//! between executions, serving as the functional equivalent of environment variables. +//! +//! In J-Expr syntax, inputs are declared with `["input", "name", "Type"]` and optionally given +//! defaults via `["input", "name", "Type", {"#literal": value}]`. At runtime, the interpreter +//! resolves these declarations against the [`Inputs`] provided to the [`Runtime`]. +//! +//! [`Runtime`]: super::Runtime + +use alloc::alloc::Global; +use core::alloc::Allocator; + +use hashql_core::{ + collections::{ + fast_hash_map, fast_hash_map_in, fast_hash_map_with_capacity, + fast_hash_map_with_capacity_in, + }, + symbol::Symbol, +}; + +use super::value::Value; + +/// External input values available during interpretation. +/// +/// Maps input names (as interned [`Symbol`]s) to their runtime [`Value`]s. The interpreter +/// consults this map when evaluating [`InputOp::Load`] (retrieve a value) and +/// [`InputOp::Exists`] (test whether a value was provided). +/// +/// # Examples +/// +/// ``` +/// use hashql_core::symbol::sym; +/// use hashql_mir::interpret::{ +/// Inputs, +/// value::{Int, Value}, +/// }; +/// +/// let mut inputs = Inputs::new(); +/// inputs.insert(sym::foo, Value::Integer(Int::from(42_i64))); +/// +/// assert!(inputs.contains(sym::foo)); +/// assert_eq!( +/// inputs.get(sym::foo), +/// Some(&Value::Integer(Int::from(42_i64))) +/// ); +/// assert!(!inputs.contains(sym::bar)); +/// ``` +/// +/// [`InputOp::Load`]: hashql_hir::node::operation::InputOp::Load +/// [`InputOp::Exists`]: hashql_hir::node::operation::InputOp::Exists +pub struct Inputs<'heap, A: Allocator = Global> { + inner: hashql_core::collections::FastHashMap, Value<'heap, A>, A>, +} + +impl Inputs<'_> { + /// Creates an empty input set. + /// + /// # Examples + /// + /// ``` + /// use hashql_mir::interpret::Inputs; + /// + /// let inputs = Inputs::new(); + /// assert!(inputs.is_empty()); + /// ``` + #[inline] + #[must_use] + pub fn new() -> Self { + Self { + inner: fast_hash_map(), + } + } + + /// Creates an empty input set with the given capacity. + /// + /// # Examples + /// + /// ``` + /// use hashql_mir::interpret::Inputs; + /// + /// let inputs = Inputs::with_capacity(8); + /// assert!(inputs.is_empty()); + /// ``` + #[inline] + #[must_use] + pub fn with_capacity(capacity: usize) -> Self { + Self { + inner: fast_hash_map_with_capacity(capacity), + } + } +} + +impl Default for Inputs<'_> { + fn default() -> Self { + Self::new() + } +} + +impl<'heap, A: Allocator> Inputs<'heap, A> { + /// Creates an empty input set in the given allocator. + #[inline] + #[must_use] + pub fn new_in(alloc: A) -> Self + where + A: Clone, + { + Self { + inner: fast_hash_map_in(alloc), + } + } + + /// Creates an empty input set with the given capacity in the given allocator. + #[inline] + #[must_use] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self + where + A: Clone, + { + Self { + inner: fast_hash_map_with_capacity_in(capacity, alloc), + } + } + + /// Inserts an input value, returning the previous value if the name was already present. + /// + /// # Examples + /// + /// ``` + /// use hashql_core::symbol::sym; + /// use hashql_mir::interpret::{Inputs, value::Value}; + /// + /// let mut inputs = Inputs::new(); + /// assert!(inputs.insert(sym::foo, Value::Unit).is_none()); + /// assert!(inputs.insert(sym::foo, Value::Unit).is_some()); + /// ``` + #[inline] + pub fn insert( + &mut self, + name: Symbol<'heap>, + value: Value<'heap, A>, + ) -> Option> { + self.inner.insert(name, value) + } + + /// Returns the value for the given input name. + /// + /// # Examples + /// + /// ``` + /// use hashql_core::symbol::sym; + /// use hashql_mir::interpret::{ + /// Inputs, + /// value::{Int, Value}, + /// }; + /// + /// let mut inputs = Inputs::new(); + /// inputs.insert(sym::foo, Value::Integer(Int::from(10_i64))); + /// + /// assert_eq!( + /// inputs.get(sym::foo), + /// Some(&Value::Integer(Int::from(10_i64))) + /// ); + /// assert_eq!(inputs.get(sym::bar), None); + /// ``` + #[inline] + #[must_use] + pub fn get(&self, name: Symbol<'heap>) -> Option<&Value<'heap, A>> { + self.inner.get(&name) + } + + /// Returns whether an input with the given name has been provided. + /// + /// # Examples + /// + /// ``` + /// use hashql_core::symbol::sym; + /// use hashql_mir::interpret::{Inputs, value::Value}; + /// + /// let mut inputs = Inputs::new(); + /// inputs.insert(sym::foo, Value::Unit); + /// + /// assert!(inputs.contains(sym::foo)); + /// assert!(!inputs.contains(sym::bar)); + /// ``` + #[inline] + #[must_use] + pub fn contains(&self, name: Symbol<'heap>) -> bool { + self.inner.contains_key(&name) + } + + /// Returns the number of inputs. + #[inline] + #[must_use] + pub fn len(&self) -> usize { + self.inner.len() + } + + /// Returns whether the input set is empty. + #[inline] + #[must_use] + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } +} diff --git a/libs/@local/hashql/mir/src/interpret/locals.rs b/libs/@local/hashql/mir/src/interpret/locals.rs index 13fcf958c13..f35dc1dccf4 100644 --- a/libs/@local/hashql/mir/src/interpret/locals.rs +++ b/libs/@local/hashql/mir/src/interpret/locals.rs @@ -32,7 +32,7 @@ use crate::{ /// /// Stores the values of local variables during interpretation of a function. /// Locals are indexed by [`Local`] and may be uninitialized. -pub(crate) struct Locals<'ctx, 'heap, A: Allocator> { +pub struct Locals<'ctx, 'heap, A: Allocator> { /// Allocator for creating new values. alloc: A, /// Local variable declarations (for error reporting). @@ -89,7 +89,7 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// Returns [`RuntimeError::UninitializedLocal`] if the local has not been /// initialized. #[inline] - pub(crate) fn local(&self, local: Local) -> Result<&Value<'heap, A>, RuntimeError<'heap, A>> { + pub fn local(&self, local: Local) -> Result<&Value<'heap, A>, RuntimeError<'heap, E, A>> { self.inner.get(local).ok_or_else(|| { let decl = self.decl[local]; RuntimeError::UninitializedLocal { local, decl } @@ -98,7 +98,7 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// Gets a mutable reference to a local variable's value. #[inline] - pub(crate) fn local_mut(&mut self, local: Local) -> &mut Value<'heap, A> { + pub fn local_mut(&mut self, local: Local) -> &mut Value<'heap, A> { self.inner.fill_until(local, || Value::Unit) } @@ -107,10 +107,10 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// Follows the chain of projections (field access, indexing) to reach /// the final value. #[inline] - pub(crate) fn place( + pub(crate) fn place( &self, Place { local, projections }: &Place<'heap>, - ) -> Result<&Value<'heap, A>, RuntimeError<'heap, A>> { + ) -> Result<&Value<'heap, A>, RuntimeError<'heap, E, A>> { let mut value = self.local(*local)?; for projection in projections { @@ -137,11 +137,11 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// the final value. Index projections are evaluated before the mutable /// borrow to avoid borrowing conflicts. #[inline] - pub(crate) fn place_mut( + pub(crate) fn place_mut( &mut self, place: Place<'heap>, scratch: &mut Scratch<'heap, A>, - ) -> Result<&mut Value<'heap, A>, RuntimeError<'heap, A>> + ) -> Result<&mut Value<'heap, A>, RuntimeError<'heap, E, A>> where A: Clone, { @@ -184,10 +184,15 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// /// - For place operands: evaluates the place and borrows the value /// - For constant operands: converts the constant to a value - pub(crate) fn operand( + /// + /// # Errors + /// + /// - [`RuntimeError`] if the local is unassigned, or does is malformed, such that indexing + /// operations failed. + pub fn operand( &self, operand: &Operand<'heap>, - ) -> Result>, RuntimeError<'heap, A>> + ) -> Result>, RuntimeError<'heap, E, A>> where A: Clone, { @@ -203,11 +208,11 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// /// The caller must ensure that `operands` and `slice` have the same length. #[expect(unsafe_code, clippy::mem_forget)] - unsafe fn write_operands( + unsafe fn write_operands( &self, slice: &mut [MaybeUninit>], operands: &[Operand<'heap>], - ) -> Result<(), RuntimeError<'heap, A>> + ) -> Result<(), RuntimeError<'heap, E, A>> where A: Clone, { @@ -256,10 +261,10 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// /// Returns [`Value::Unit`] for empty tuples. #[expect(unsafe_code, clippy::panic_in_result_fn)] - fn aggregate_tuple( + fn aggregate_tuple( &self, operands: &IdSlice>, - ) -> Result, RuntimeError<'heap, A>> + ) -> Result, RuntimeError<'heap, E, A>> where A: Clone, { @@ -290,11 +295,11 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// Returns [`RuntimeError::StructFieldLengthMismatch`] if the number of /// fields does not match the number of operands. #[expect(unsafe_code, clippy::panic_in_result_fn)] - fn aggregate_struct( + fn aggregate_struct( &self, fields: Interned<'heap, [Symbol<'heap>]>, operands: &IdSlice>, - ) -> Result, RuntimeError<'heap, A>> + ) -> Result, RuntimeError<'heap, E, A>> where A: Clone, { @@ -326,10 +331,10 @@ impl<'ctx, 'heap, A: Allocator> Locals<'ctx, 'heap, A> { /// Dispatches to the appropriate construction method based on the aggregate /// kind and evaluates all operands to build the result. #[expect(clippy::integer_division_remainder_used)] - pub(crate) fn aggregate( + pub(crate) fn aggregate( &self, Aggregate { kind, operands }: &Aggregate<'heap>, - ) -> Result, RuntimeError<'heap, A>> + ) -> Result, RuntimeError<'heap, E, A>> where A: Clone, { @@ -476,7 +481,7 @@ mod tests { // SAFETY: The buffer has not been written to yet and operands == buf unsafe { locals - .write_operands(&mut buf, &operands) + .write_operands::(&mut buf, &operands) .expect("write_operands should not fail"); } @@ -519,7 +524,7 @@ mod tests { ]; // SAFETY: The buffer has not been written to yet and operands == buf - let result = unsafe { locals.write_operands(&mut buf, &operands) }; + let result = unsafe { locals.write_operands::(&mut buf, &operands) }; assert_matches!(result, Err(RuntimeError::UninitializedLocal{local, ..}) if local == Local::new(1)); // IMPORTANT: Do not read from `buf` here. On error, the internal Guard has @@ -545,12 +550,12 @@ mod tests { // SAFETY: The buffer is empty, so no writes are performed. unsafe { locals - .write_operands(&mut buf, &operands) + .write_operands::(&mut buf, &operands) .expect("should not fail"); } let value = locals - .aggregate_tuple(IdSlice::from_raw(&[])) + .aggregate_tuple::(IdSlice::from_raw(&[])) .expect("should not fail"); assert_eq!(value, Value::Unit); } @@ -580,7 +585,7 @@ mod tests { ]; let value = locals - .aggregate_tuple(IdSlice::from_raw(&operands)) + .aggregate_tuple::(IdSlice::from_raw(&operands)) .expect("aggregate_tuple should succeed"); let Value::Tuple(tuple) = value else { @@ -626,7 +631,7 @@ mod tests { ]; let value = locals - .aggregate_struct(fields, IdSlice::from_raw(&operands)) + .aggregate_struct::(fields, IdSlice::from_raw(&operands)) .expect("aggregate_struct should succeed"); let Value::Struct(r#struct) = value else { diff --git a/libs/@local/hashql/mir/src/interpret/mod.rs b/libs/@local/hashql/mir/src/interpret/mod.rs index 84abc3de58d..789d75e0535 100644 --- a/libs/@local/hashql/mir/src/interpret/mod.rs +++ b/libs/@local/hashql/mir/src/interpret/mod.rs @@ -5,21 +5,36 @@ //! The interpreter uses a stack-based execution model: //! //! - **[`Runtime`]**: The main interpreter engine that holds configuration, function bodies, and -//! input values. It provides the [`Runtime::run`] method for execution. +//! input values. //! - **[`CallStack`]**: Manages the call frames during execution, tracking local variables, current //! block position, and statement index for each function call. //! - **[`value`]**: Runtime value representation including primitives (integers, numbers, strings), //! aggregates (structs, tuples), and collections (lists, dicts). +//! - **[`suspension`]**: Types for the suspend/resume protocol when the interpreter needs external +//! data. +//! +//! # Execution +//! +//! For simple synchronous execution, [`Runtime::run`] drives interpretation to completion, +//! handling suspensions via a closure. For async or multi-backend orchestration, use +//! [`Runtime::start`] and [`Runtime::resume`] to manually drive the suspend/resume loop. //! //! [`Severity::Bug`]: hashql_diagnostics::severity::Severity::Bug //! [`Severity::Error`]: hashql_diagnostics::severity::Severity::Error -mod error; +pub mod error; +mod inputs; mod locals; mod runtime; mod scratch; +pub mod suspension; #[cfg(test)] mod tests; pub mod value; -pub use runtime::{CallStack, Runtime, RuntimeConfig}; +pub use self::{ + error::{RuntimeError, TypeName}, + inputs::Inputs, + locals::Locals, + runtime::{CallStack, Runtime, RuntimeConfig, Yield}, +}; diff --git a/libs/@local/hashql/mir/src/interpret/runtime.rs b/libs/@local/hashql/mir/src/interpret/runtime.rs index aad3cb82061..933453a9e72 100644 --- a/libs/@local/hashql/mir/src/interpret/runtime.rs +++ b/libs/@local/hashql/mir/src/interpret/runtime.rs @@ -9,6 +9,7 @@ //! - [`Runtime`]: The main interpreter, holding configuration, function bodies, and inputs //! - [`RuntimeConfig`]: Configuration options like recursion limits //! - [`CallStack`]: Manages call frames during execution +//! - [`Yield`]: Returned by the interpreter, containing either a final value or a suspension //! //! # Execution Model //! @@ -19,17 +20,37 @@ //! 3. Following terminators to navigate between blocks //! 4. Pushing/popping call frames for function calls and returns //! 5. Returning the final value when the entry function returns +//! +//! # Suspension and Continuation +//! +//! When the interpreter encounters a [`GraphRead`] terminator, it cannot make +//! further progress without external data (e.g., a database query result). Rather +//! than making the interpreter async, it uses a **suspend/resume** protocol: +//! +//! 1. Call [`Runtime::start`] to begin interpretation +//! 2. If it returns [`Yield::Suspension`], inspect the [`Suspension`] to determine what data is +//! needed +//! 3. Fulfill the request and call [`Runtime::resume`] with the resulting [`Continuation`] +//! 4. Repeat until [`Yield::Return`] is received +//! +//! For callers that can handle suspensions synchronously, [`Runtime::run`] provides +//! a convenience wrapper that drives the loop with a closure. +//! +//! [`GraphRead`]: crate::body::terminator::GraphRead +//! [`Continuation`]: super::suspension::Continuation use alloc::{alloc::Global, borrow::Cow}; use core::{alloc::Allocator, debug_assert_matches, hint::cold_path, ops::ControlFlow}; -use hashql_core::{collections::FastHashMap, span::SpanId, symbol::Symbol}; +use hashql_core::span::SpanId; use hashql_hir::node::operation::{InputOp, UnOp}; use super::{ + Inputs, error::{BinaryTypeMismatch, InterpretDiagnostic, RuntimeError, TypeName, UnaryTypeMismatch}, locals::Locals, scratch::Scratch, + suspension::{Continuation, Suspension}, value::{Int, Value}, }; use crate::{ @@ -38,26 +59,56 @@ use crate::{ basic_block::{BasicBlock, BasicBlockId}, rvalue::{Apply, BinOp, Binary, Input, RValue, Unary}, statement::{Assign, StatementKind}, - terminator::{Goto, Return, SwitchInt, Target, TerminatorKind}, + terminator::{Goto, GraphReadHead, Return, SwitchInt, Target, TerminatorKind}, }, def::{DefId, DefIdSlice}, + interpret::suspension::{self, GraphReadSuspension}, }; +/// Creates a new call frame for the given body with the provided arguments. +fn make_frame_in<'ctx, 'heap, E, A: Allocator + Clone>( + body: &'ctx Body<'heap>, + args: impl ExactSizeIterator, E>>, + alloc: A, +) -> Result, E> { + let locals = Locals::new_in(body, args, alloc)?; + + Ok(Frame { + locals, + body, + current_block: CurrentBlock { + id: BasicBlockId::START, + block: &body.basic_blocks[BasicBlockId::START], + }, + current_statement: 0, + }) +} + +/// The current basic block being executed within a frame. +/// +/// Caches both the [`BasicBlockId`] and a direct reference to the [`BasicBlock`] +/// to avoid repeated indexing into the body's block storage during execution. +#[derive(Debug, Copy, Clone)] +pub(super) struct CurrentBlock<'ctx, 'heap> { + pub id: BasicBlockId, + pub block: &'ctx BasicBlock<'heap>, +} + /// A single call frame in the interpreter's call stack. /// /// Each frame represents an active function call and tracks: /// - Local variable storage /// - The function body being executed /// - Current position (block and statement index) -struct Frame<'ctx, 'heap, A: Allocator> { +pub(super) struct Frame<'ctx, 'heap, A: Allocator> { /// Local variable storage for this function call. - locals: Locals<'ctx, 'heap, A>, + pub locals: Locals<'ctx, 'heap, A>, /// The MIR body being executed. - body: &'ctx Body<'heap>, + pub body: &'ctx Body<'heap>, /// The current basic block. - current_block: &'ctx BasicBlock<'heap>, + pub current_block: CurrentBlock<'ctx, 'heap>, /// Index of the next statement to execute in the current block. - current_statement: usize, + pub current_statement: usize, } /// The call stack for the MIR interpreter. @@ -66,8 +117,12 @@ struct Frame<'ctx, 'heap, A: Allocator> { /// /// The call stack also provides [`unwind`](Self::unwind) for error reporting, /// which walks the stack to collect span information for diagnostics. +#[expect( + clippy::field_scoped_visibility_modifiers, + reason = "used when resolving the suspension" +)] pub struct CallStack<'ctx, 'heap, A: Allocator = Global> { - frames: Vec, A>, + pub(super) frames: Vec, A>, } impl<'ctx, 'heap, A: Allocator> CallStack<'ctx, 'heap, A> { @@ -91,6 +146,26 @@ impl<'ctx, 'heap, A: Allocator> CallStack<'ctx, 'heap, A> { Self { frames } } + /// Creates a new call stack with an initial call to the given body. + /// + /// # Errors + /// + /// Returns `E` if any argument in `args` is an `Err`. + pub fn new_in( + body: &'ctx Body<'heap>, + args: impl IntoIterator, E>, IntoIter: ExactSizeIterator>, + alloc: A, + ) -> Result + where + A: Allocator + Clone, + { + let frame = make_frame_in(body, args.into_iter(), alloc.clone())?; + let mut frames = Vec::new_in(alloc); + frames.push(frame); + + Ok(Self { frames }) + } + /// Unwinds the call stack to produce a trace of definition IDs and spans. /// /// Returns an iterator over `(DefId, SpanId)` pairs, starting from the @@ -102,15 +177,98 @@ impl<'ctx, 'heap, A: Allocator> CallStack<'ctx, 'heap, A> { pub fn unwind(&self) -> impl Iterator { self.frames.iter().rev().map(|frame| { let body = frame.body.id; - let span = if frame.current_statement >= frame.current_block.statements.len() { - frame.current_block.terminator.span + let span = if frame.current_statement >= frame.current_block.block.statements.len() { + frame.current_block.block.terminator.span } else { - frame.current_block.statements[frame.current_statement].span + frame.current_block.block.statements[frame.current_statement].span }; (body, span) }) } + + /// Returns the local variable storage for the innermost active call. + /// + /// # Errors + /// + /// Returns [`RuntimeError::CallstackEmpty`] if there are no active calls. + pub fn locals( + &self, + ) -> Result<&Locals<'ctx, 'heap, A>, RuntimeError<'heap, E, R>> { + self.frames + .last() + .ok_or(RuntimeError::CallstackEmpty) + .map(|frame| &frame.locals) + } + + /// Returns mutable access to the local variable storage for the current call. + /// + /// # Errors + /// + /// Returns [`RuntimeError::CallstackEmpty`] if there are no active calls. + pub fn locals_mut( + &mut self, + ) -> Result<&mut Locals<'ctx, 'heap, A>, RuntimeError<'heap, !, R>> { + self.frames + .last_mut() + .ok_or(RuntimeError::CallstackEmpty) + .map(|frame| &mut frame.locals) + } + + /// Returns the [`BasicBlockId`] of the current block. + /// + /// # Errors + /// + /// Returns [`RuntimeError::CallstackEmpty`] if there are no active calls. + pub fn current_block(&self) -> Result> { + self.frames + .last() + .map(|frame| frame.current_block.id) + .ok_or(RuntimeError::CallstackEmpty) + } + + /// Sets the current block and resets the statement counter to zero. + /// + /// The caller must ensure that `block_id` is a valid transition target + /// in the current execution context. The block itself is bounds-checked + /// against the body's block storage, but reachability is not verified. + /// + /// # Errors + /// + /// Returns [`RuntimeError::CallstackEmpty`] if there are no active calls. + pub fn set_current_block_unchecked( + &mut self, + block_id: BasicBlockId, + ) -> Result<(), RuntimeError<'heap, E, A>> { + let frame = self.frames.last_mut().ok_or(RuntimeError::CallstackEmpty)?; + + let block = &frame.body.basic_blocks[block_id]; + + frame.current_block = CurrentBlock { + id: block_id, + block, + }; + frame.current_statement = 0; + + Ok(()) + } +} + +/// Result of running the interpreter until it can no longer make progress. +/// +/// The interpreter either completes with a final value or suspends at a point +/// where it needs external data (such as a database query result) before it +/// can continue. +#[derive(Debug)] +pub enum Yield<'ctx, 'heap, A: Allocator> { + /// The entry function returned a value. Interpretation is complete. + Return(Value<'heap, A>), + /// The interpreter suspended and needs external data to continue. + /// + /// The caller should inspect the [`Suspension`] to determine what is needed, + /// fulfill the request, and call [`Runtime::resume`] with the resulting + /// [`Continuation`]. + Suspension(Suspension<'ctx, 'heap>), } /// Internal signal indicating whether to pop the current frame after a terminator. @@ -167,7 +325,7 @@ pub struct Runtime<'ctx, 'heap, A: Allocator = Global> { /// All available function bodies, indexed by [`DefId`]. bodies: &'ctx DefIdSlice>, /// Input values available for [`InputOp::Load`] operations. - inputs: FastHashMap, Value<'heap, A>>, + inputs: &'ctx Inputs<'heap, A>, scratch: Scratch<'heap, A>, } @@ -182,18 +340,21 @@ impl<'ctx, 'heap> Runtime<'ctx, 'heap> { pub fn new( config: RuntimeConfig, bodies: &'ctx DefIdSlice>, - inputs: FastHashMap, Value<'heap>>, + inputs: &'ctx Inputs<'heap>, ) -> Self { Self::new_in(config, bodies, inputs, Global) } } impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { + /// Creates a new runtime with the given configuration, bodies, inputs, and allocator. + /// + /// See [`Runtime::new`] for details on the parameters. #[must_use] pub fn new_in( config: RuntimeConfig, bodies: &'ctx DefIdSlice>, - inputs: FastHashMap, Value<'heap, A>>, + inputs: &'ctx Inputs<'heap, A>, alloc: A, ) -> Self { Self { @@ -210,26 +371,20 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { func: DefId, args: impl ExactSizeIterator, E>>, ) -> Result, E> { - let body = &self.bodies[func]; - - let locals = Locals::new_in(body, args, self.alloc.clone())?; - - Ok(Frame { - locals, - body, - current_block: &body.basic_blocks[BasicBlockId::START], - current_statement: 0, - }) + make_frame_in(&self.bodies[func], args, self.alloc.clone()) } #[inline] - fn step_terminator_goto( + fn step_terminator_goto( &mut self, frame: &mut Frame<'ctx, 'heap, A>, Target { block, args }: Target<'heap>, - ) -> Result<(), RuntimeError<'heap, A>> { + ) -> Result<(), RuntimeError<'heap, E, A>> { if args.is_empty() { - frame.current_block = &frame.body.basic_blocks[block]; + frame.current_block = CurrentBlock { + id: block, + block: &frame.body.basic_blocks[block], + }; frame.current_statement = 0; return Ok(()); } @@ -259,17 +414,20 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { frame.locals.insert(param, value); } - frame.current_block = &frame.body.basic_blocks[block]; + frame.current_block = CurrentBlock { + id: block, + block: &frame.body.basic_blocks[block], + }; frame.current_statement = 0; Ok(()) } - fn step_terminator( + fn step_terminator( &mut self, stack: &mut [Frame<'ctx, 'heap, A>], frame: &mut Frame<'ctx, 'heap, A>, - ) -> Result, PopFrame>, RuntimeError<'heap, A>> { - let terminator = &frame.current_block.terminator.kind; + ) -> Result, PopFrame>, RuntimeError<'heap, E, A>> { + let terminator = &frame.current_block.block.terminator.kind; match terminator { &TerminatorKind::Goto(Goto { target }) => { @@ -308,12 +466,12 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { // one that we break on. cold_path(); - return Ok(ControlFlow::Break(value)); + return Ok(ControlFlow::Break(Yield::Return(value))); }; // The caller is suspended at an `Assign` statement with an `Apply` rvalue. // We write the return value to the LHS of that assignment and resume. - let statement = &caller.current_block.statements[caller.current_statement]; + let statement = &caller.current_block.block.statements[caller.current_statement]; let StatementKind::Assign(Assign { lhs, rhs }) = &statement.kind else { unreachable!("we can only be called from an apply"); }; @@ -326,17 +484,30 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { Ok(ControlFlow::Continue(PopFrame::Yes)) } - TerminatorKind::GraphRead(_) => { - unimplemented!("GraphRead terminator not implemented") + TerminatorKind::GraphRead(read) => { + let axis = match read.head { + GraphReadHead::Entity { axis } => frame.locals.operand(&axis)?, + }; + + let axis = suspension::extract_axis(&axis)?; + + Ok(ControlFlow::Break(Yield::Suspension( + Suspension::GraphRead(GraphReadSuspension { + body: frame.body.id, + block: frame.current_block.id, + read, + axis, + }), + ))) } TerminatorKind::Unreachable => Err(RuntimeError::UnreachableReached), } } - fn eval_rvalue_binary( + fn eval_rvalue_binary( frame: &Frame<'ctx, 'heap, A>, Binary { op, left, right }: &Binary<'heap>, - ) -> Result, RuntimeError<'heap, A>> { + ) -> Result, RuntimeError<'heap, E, A>> { let lhs = frame.locals.operand(left)?; let rhs = frame.locals.operand(right)?; @@ -422,10 +593,10 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { } } - fn eval_rvalue_unary( + fn eval_rvalue_unary( frame: &Frame<'ctx, 'heap, A>, Unary { op, operand }: &Unary<'heap>, - ) -> Result, RuntimeError<'heap, A>> { + ) -> Result, RuntimeError<'heap, E, A>> { let operand = frame.locals.operand(operand)?; match op { @@ -501,29 +672,29 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { } } - fn eval_rvalue_input( + fn eval_rvalue_input( &self, Input { op, name }: &Input<'heap>, - ) -> Result, RuntimeError<'heap, A>> { + ) -> Result, RuntimeError<'heap, E, A>> { match op { // `required` is used only by static control-flow analysis; at runtime we always // error if the input is missing. - InputOp::Load { required: _ } => self.inputs.get(name).map_or_else( + InputOp::Load { required: _ } => self.inputs.get(*name).map_or_else( || Err(RuntimeError::InputNotFound { name: *name }), |value| Ok(value.clone()), ), - InputOp::Exists => Ok(Value::Integer(self.inputs.contains_key(name).into())), + InputOp::Exists => Ok(Value::Integer(self.inputs.contains(*name).into())), } } - fn eval_rvalue_apply( + fn eval_rvalue_apply( &self, frame: &Frame<'ctx, 'heap, A>, Apply { function, arguments, }: &Apply<'heap>, - ) -> Result, RuntimeError<'heap, A>> { + ) -> Result, RuntimeError<'heap, E, A>> { let function = frame.locals.operand(function)?; let Value::Pointer(pointer) = function.as_ref() else { return Err(RuntimeError::ApplyNonPointer { @@ -539,11 +710,12 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { ) } - fn eval_rvalue( + fn eval_rvalue( &self, frame: &Frame<'ctx, 'heap, A>, rvalue: &RValue<'heap>, - ) -> Result, Value<'heap, A>>, RuntimeError<'heap, A>> { + ) -> Result, Value<'heap, A>>, RuntimeError<'heap, E, A>> + { match rvalue { RValue::Load(operand) => frame .locals @@ -564,11 +736,11 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { } } - fn step_statement_assign( + fn step_statement_assign( &mut self, frame: &mut Frame<'ctx, 'heap, A>, Assign { lhs, rhs }: &Assign<'heap>, - ) -> Result>, RuntimeError<'heap, A>> { + ) -> Result>, RuntimeError<'heap, E, A>> { let value = self.eval_rvalue(frame, rhs)?; let value = match value { ControlFlow::Continue(value) => value, @@ -581,15 +753,15 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { Ok(None) } - fn step( + fn step( &mut self, callstack: &mut CallStack<'ctx, 'heap, A>, - ) -> Result>, RuntimeError<'heap, A>> { + ) -> Result>, RuntimeError<'heap, E, A>> { let Some((frame, stack)) = callstack.frames.split_last_mut() else { return Err(RuntimeError::CallstackEmpty); }; - if frame.current_statement >= frame.current_block.statements.len() { + if frame.current_statement >= frame.current_block.block.statements.len() { let next = self.step_terminator(stack, frame)?; return match next { @@ -603,7 +775,7 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { }; } - let statement = &frame.current_block.statements[frame.current_statement]; + let statement = &frame.current_block.block.statements[frame.current_statement]; let next_frame = match &statement.kind { StatementKind::Assign(assign) => self.step_statement_assign(frame, assign)?, StatementKind::Nop | StatementKind::StorageLive(_) | StatementKind::StorageDead(_) => { @@ -626,40 +798,176 @@ impl<'ctx, 'heap, A: Allocator + Clone> Runtime<'ctx, 'heap, A> { Ok(ControlFlow::Continue(())) } - /// Executes the MIR starting from the given call stack. + /// Steps the interpreter until it either returns a value or suspends. + /// + /// This is the low-level driver loop. It does **not** clear scratch state, + /// so callers must call [`reset`](Self::reset) before the first invocation. + /// Prefer [`start`](Self::start) for the initial invocation and + /// [`resume`](Self::resume) after fulfilling a suspension. + /// + /// # Errors + /// + /// Returns a runtime error if interpretation fails. + pub fn run_until_suspension( + &mut self, + callstack: &mut CallStack<'ctx, 'heap, A>, + ) -> Result, RuntimeError<'heap, E, A>> { + loop { + let next = self.step(callstack)?; + if let ControlFlow::Break(value) = next { + return Ok(value); + } + } + } + + /// Runs the interpreter until it hits a backend transition point. /// - /// Runs the interpreter until the entry function returns or an error occurs. - /// The call stack should be initialized with [`CallStack::new`] pointing to - /// the entry function. + /// The `continue` callback is invoked at each block boundary in the outermost + /// call frame. It receives the [`BasicBlockId`] just entered and returns + /// whether execution should continue on this backend. When it returns `false`, + /// the method returns [`ControlFlow::Break`] without executing any statements + /// in that block. /// - /// # Returns + /// # Return value /// - /// The value returned by the entry function. + /// - [`ControlFlow::Break(())`]: transition point reached. The callstack is positioned at the + /// block where `continue` returned `false`. + /// - [`ControlFlow::Continue(Yield::Return(v))`]: interpretation completed. + /// - [`ControlFlow::Continue(Yield::Suspension(s))`]: interpreter suspended for external data. + /// Apply the continuation and call this method again. /// /// # Errors /// - /// Returns a diagnostic if any runtime error occurs. The diagnostic includes - /// the error message and a call stack trace for error localization. + /// Returns a runtime error if interpretation fails. + pub fn run_until_transition( + &mut self, + callstack: &mut CallStack<'ctx, 'heap, A>, + mut r#continue: impl FnMut(BasicBlockId) -> bool, + ) -> Result>, RuntimeError<'heap, E, A>> { + loop { + // Check if we've entered a new block in the outermost frame. This must happen + // *before* stepping so that block transitions from `Continuation::apply` (which + // sets `current_statement = 0` on the target block) are visible on re-entry. + // During nested calls (multiple frames) the interpreter runs freely; only + // top-level block boundaries are transition candidates. + if let [frame] = &*callstack.frames + && frame.current_statement == 0 + && !r#continue(frame.current_block.id) + { + return Ok(ControlFlow::Break(frame.current_block.id)); + } + + let next = self.step(callstack)?; + if let ControlFlow::Break(value) = next { + return Ok(ControlFlow::Continue(value)); + } + } + } + + fn try_run( + &mut self, + callstack: &mut CallStack<'ctx, 'heap, A>, + mut on_suspension: impl FnMut( + Suspension<'ctx, 'heap>, + ) + -> Result, RuntimeError<'heap, !, A>>, + ) -> Result, RuntimeError<'heap, !, A>> { + self.scratch.clear(); + + loop { + match self.run_until_suspension(callstack)? { + Yield::Return(value) => return Ok(value), + Yield::Suspension(suspension) => { + let continuation = on_suspension(suspension)?; + continuation.apply(callstack)?; + } + } + } + } + + /// Runs the interpreter to completion, handling suspensions inline. + /// + /// This is a convenience method for callers that can handle all suspensions + /// synchronously via a closure. The `on_suspension` callback receives each + /// [`Suspension`], fulfills it, and returns the corresponding [`Continuation`]. + /// + /// For async or more complex orchestration, use [`start`](Self::start) and + /// [`resume`](Self::resume) instead. + /// + /// # Errors + /// + /// Returns a diagnostic if interpretation fails or if `on_suspension` returns + /// an error. pub fn run( &mut self, mut callstack: CallStack<'ctx, 'heap, A>, + on_suspension: impl FnMut( + Suspension<'ctx, 'heap>, + ) + -> Result, RuntimeError<'heap, !, A>>, ) -> Result, InterpretDiagnostic> { + self.try_run(&mut callstack, on_suspension) + .map_err(|error| { + let spans = callstack.unwind(); + + error.into_diagnostic(spans.map(|(_, span)| span), |suspension| suspension) + }) + } + + /// Clears ephemeral scratch state. + /// + /// Called automatically by [`start`](Self::start). Callers using the lower-level + /// [`run_until_suspension`](Self::run_until_suspension) directly must call this + /// before the first invocation. + pub fn reset(&mut self) { self.scratch.clear(); + } - loop { - let result = self.step(&mut callstack); - let next = match result { - Ok(next) => next, - Err(error) => { - let spans = callstack.unwind(); + /// Begins interpretation from the given call stack. + /// + /// Clears scratch state and runs until the interpreter either returns + /// or suspends. This should be used for the initial invocation; use + /// [`resume`](Self::resume) to continue after a suspension. + /// + /// # Errors + /// + /// Returns a diagnostic if any runtime error occurs during interpretation. + pub fn start( + &mut self, + callstack: &mut CallStack<'ctx, 'heap, A>, + ) -> Result, InterpretDiagnostic> { + self.reset(); + self.run_until_suspension(callstack).map_err(|error| { + let spans = callstack.unwind(); - return Err(error.into_diagnostic(spans.map(|(_, span)| span))); - } - }; + error.into_diagnostic(spans.map(|(_, span)| span), |suspension| suspension) + }) + } - if let ControlFlow::Break(value) = next { - return Ok(value); - } - } + /// Continues interpretation after a suspension has been fulfilled. + /// + /// Resolves the [`Continuation`] into the call stack and resumes stepping + /// until the interpreter returns or suspends again. + /// + /// # Errors + /// + /// Returns a diagnostic if the continuation is invalid or if a runtime + /// error occurs during interpretation. + pub fn resume( + &mut self, + callstack: &mut CallStack<'ctx, 'heap, A>, + continuation: Continuation<'ctx, 'heap, A>, + ) -> Result, InterpretDiagnostic> { + continuation.apply(callstack).map_err(|error| { + let spans = callstack.unwind(); + + error.into_diagnostic(spans.map(|(_, span)| span), |suspension| suspension) + })?; + + self.run_until_suspension(callstack).map_err(|error| { + let spans = callstack.unwind(); + + error.into_diagnostic(spans.map(|(_, span)| span), |suspension| suspension) + }) } } diff --git a/libs/@local/hashql/mir/src/interpret/suspension/graph_read.rs b/libs/@local/hashql/mir/src/interpret/suspension/graph_read.rs new file mode 100644 index 00000000000..316d18fcfcc --- /dev/null +++ b/libs/@local/hashql/mir/src/interpret/suspension/graph_read.rs @@ -0,0 +1,134 @@ +use core::{alloc::Allocator, ops::Bound}; + +use hashql_core::symbol::sym; + +use super::temporal::{TemporalAxesInterval, TemporalInterval, Timestamp}; +use crate::interpret::{RuntimeError, TypeName, value::Value}; + +fn extract_timestamp<'heap, E, A: Allocator>( + value: &Value<'heap, A>, +) -> Result> { + let Value::Opaque(opaque) = value else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Opaque"), + actual: value.type_name().into(), + }); + }; + debug_assert_eq!(opaque.name(), sym::path::Timestamp); + + let &Value::Integer(timestamp) = opaque.value() else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Integer"), + actual: opaque.value().type_name().into(), + }); + }; + + Ok(Timestamp::from(timestamp)) +} + +fn extract_bound<'heap, E, A: Allocator>( + value: &Value<'heap, A>, +) -> Result, RuntimeError<'heap, E, A>> { + let Value::Opaque(bound) = value else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Opaque"), + actual: value.type_name().into(), + }); + }; + + let make_bound = match bound.name().as_constant() { + Some(sym::path::UnboundedTemporalBound::CONST) => return Ok(Bound::Unbounded), + Some(sym::path::InclusiveTemporalBound::CONST) => Bound::Included, + Some(sym::path::ExclusiveTemporalBound::CONST) => Bound::Excluded, + _ => { + return Err(RuntimeError::InvalidConstructor { name: bound.name() }); + } + }; + + let value = extract_timestamp(bound.value())?; + Ok(make_bound(value)) +} + +fn extract_interval<'heap, E, A: Allocator>( + value: &Value<'heap, A>, +) -> Result<(Bound, Bound), RuntimeError<'heap, E, A>> { + let Value::Opaque(opaque) = value else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Opaque"), + actual: value.type_name().into(), + }); + }; + debug_assert_eq!(opaque.name(), sym::path::Interval); + + let value = opaque.value(); + + let start = value.project_by_name(sym::start)?; + let end = value.project_by_name(sym::end)?; + + let start = extract_bound(start)?; + let end = extract_bound(end)?; + + Ok((start, end)) +} + +pub(crate) fn extract_axis<'heap, E, A: Allocator>( + value: &Value<'heap, A>, +) -> Result> { + let Value::Opaque(opaque) = value else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Opaque"), + actual: value.type_name().into(), + }); + }; + + // The resulting value must be a `QueryTemporalAxes`, this means it's either a + // `PinnedTransactionTimeTemporalAxes` or `PinnedDecisionTimeTemporalAxes`. + let (pinned, variable) = match opaque.name().as_constant() { + Some( + sym::path::PinnedTransactionTimeTemporalAxes::CONST + | sym::path::PinnedDecisionTimeTemporalAxes::CONST, + ) => { + let value = opaque.value(); + + let pinned = value.project_by_name(sym::pinned)?; + let variable = value.project_by_name(sym::variable)?; + + (pinned, variable) + } + _ => { + return Err(RuntimeError::InvalidConstructor { + name: opaque.name(), + }); + } + }; + + let Value::Opaque(pinned) = pinned else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Opaque"), + actual: pinned.type_name().into(), + }); + }; + let Value::Opaque(variable) = variable else { + return Err(RuntimeError::UnexpectedValueType { + expected: TypeName::terse("Opaque"), + actual: variable.type_name().into(), + }); + }; + + let timestamp = extract_timestamp(pinned.value())?; + let interval = extract_interval(variable.value())?; + + match pinned.name().as_constant() { + Some(sym::path::TransactionTime::CONST) => Ok(TemporalAxesInterval { + transaction_time: TemporalInterval::point(timestamp), + decision_time: TemporalInterval::interval(interval), + }), + Some(sym::path::DecisionTime::CONST) => Ok(TemporalAxesInterval { + transaction_time: TemporalInterval::interval(interval), + decision_time: TemporalInterval::point(timestamp), + }), + _ => Err(RuntimeError::InvalidConstructor { + name: pinned.name(), + }), + } +} diff --git a/libs/@local/hashql/mir/src/interpret/suspension/mod.rs b/libs/@local/hashql/mir/src/interpret/suspension/mod.rs new file mode 100644 index 00000000000..e86885d5343 --- /dev/null +++ b/libs/@local/hashql/mir/src/interpret/suspension/mod.rs @@ -0,0 +1,138 @@ +//! Suspension and continuation types for interpreter yield points. +//! +//! When the interpreter encounters an operation that requires external data +//! (such as a graph database query), it suspends execution and yields a +//! [`Suspension`] describing what it needs. The caller fulfills the request +//! and constructs a [`Continuation`] to resume interpretation. +//! +//! # Protocol +//! +//! 1. [`Runtime::start`] or [`Runtime::resume`] returns [`Yield::Suspension`] +//! 2. The caller inspects the [`Suspension`] variant to determine what is needed +//! 3. The caller fulfills the request and calls [`GraphReadSuspension::resolve`] to produce a +//! [`Continuation`] +//! 4. The caller passes the [`Continuation`] to [`Runtime::resume`] +//! +//! [`Runtime::start`]: super::runtime::Runtime::start +//! [`Runtime::resume`]: super::runtime::Runtime::resume +//! [`Yield::Suspension`]: super::runtime::Yield::Suspension + +mod graph_read; +mod temporal; + +use core::{alloc::Allocator, debug_assert_matches}; + +pub(crate) use self::graph_read::extract_axis; +pub use self::temporal::{TemporalAxesInterval, TemporalInterval, Timestamp}; +use super::{CallStack, RuntimeError, value::Value}; +use crate::{ + body::{basic_block::BasicBlockId, terminator::GraphRead}, + def::DefId, + interpret::runtime::CurrentBlock, +}; + +/// A request for external data that the interpreter cannot produce on its own. +/// +/// The caller must inspect the variant, fulfill the request, and pass +/// the result back via [`Runtime::resume`](super::runtime::Runtime::resume). +#[derive(Debug)] +pub enum Suspension<'ctx, 'heap> { + /// The interpreter needs the result of a graph read operation. + GraphRead(GraphReadSuspension<'ctx, 'heap>), +} + +/// Suspension state for a [`GraphRead`] terminator. +/// +/// Contains the MIR graph read definition and the evaluated temporal axis, +/// which together provide everything the caller needs to execute the query. +/// +/// Call [`resolve`](Self::resolve) with the query result to produce a +/// [`Continuation`] for resuming the interpreter. +#[derive(Debug)] +pub struct GraphReadSuspension<'ctx, 'heap> { + pub body: DefId, + pub block: BasicBlockId, + + /// The graph read terminator that caused the suspension. + pub read: &'ctx GraphRead<'heap>, + /// The evaluated temporal axis for the query. + pub axis: TemporalAxesInterval, +} + +impl<'ctx, 'heap> GraphReadSuspension<'ctx, 'heap> { + /// Resolves this suspension with the query result, producing a [`Continuation`]. + pub const fn resolve( + self, + value: Value<'heap, A>, + ) -> Continuation<'ctx, 'heap, A> { + Continuation::GraphRead(GraphReadContinuation { + read: self.read, + value, + }) + } +} + +/// The fulfilled result of a [`Suspension`], ready to be fed back into the +/// interpreter via [`Runtime::resume`](super::runtime::Runtime::resume). +pub enum Continuation<'ctx, 'heap, A: Allocator> { + /// Fulfilled result of a [`GraphRead`] suspension. + GraphRead(GraphReadContinuation<'ctx, 'heap, A>), +} + +impl<'ctx, 'heap, A: Allocator> Continuation<'ctx, 'heap, A> { + /// Applies a [`Continuation`] to the suspended call stack. + /// + /// Writes the continuation's result value into the target block's parameter + /// and advances the frame to that block. + /// + /// # Errors + /// + /// Returns [`RuntimeError::CallstackEmpty`] if the call stack has no frames. + pub fn apply( + self, + callstack: &mut CallStack<'ctx, 'heap, A>, + ) -> Result<(), RuntimeError<'heap, E, A>> { + match self { + Continuation::GraphRead(GraphReadContinuation { read, value }) => { + let Some(frame) = callstack.frames.last_mut() else { + return Err(RuntimeError::CallstackEmpty); + }; + + #[cfg(debug_assertions)] + { + use crate::body::terminator::TerminatorKind; + + let current_block = frame.current_block; + let current_statement = frame.current_statement; + debug_assert_eq!(current_block.block.statements.len(), current_statement); + + debug_assert_matches!( + current_block.block.terminator.kind, + TerminatorKind::GraphRead(_) + ); + } + + let next_block = &frame.body.basic_blocks[read.target]; + let params = next_block.params; + debug_assert_eq!(params.len(), 1); + + frame.locals.insert(params[0], value); + + frame.current_block = CurrentBlock { + id: read.target, + block: next_block, + }; + frame.current_statement = 0; + + Ok(()) + } + } + } +} + +/// Carries the result of a graph read query back to the interpreter. +#[expect(clippy::field_scoped_visibility_modifiers)] +pub struct GraphReadContinuation<'ctx, 'heap, A: Allocator> { + pub(crate) read: &'ctx GraphRead<'heap>, + pub(crate) value: Value<'heap, A>, +} diff --git a/libs/@local/hashql/mir/src/interpret/suspension/temporal.rs b/libs/@local/hashql/mir/src/interpret/suspension/temporal.rs new file mode 100644 index 00000000000..5a9f27e6659 --- /dev/null +++ b/libs/@local/hashql/mir/src/interpret/suspension/temporal.rs @@ -0,0 +1,63 @@ +//! Temporal types for bi-temporal graph queries. +//! +//! These types represent evaluated temporal axes that are extracted from +//! interpreter [`Value`]s during [`GraphRead`] suspension. They provide +//! a concrete, backend-agnostic representation of the temporal context +//! needed to execute a graph query. +//! +//! [`Value`]: crate::interpret::value::Value +//! [`GraphRead`]: crate::body::terminator::GraphRead + +use core::ops::Bound; + +use crate::interpret::value::Int; + +/// An evaluated timestamp value, in milliseconds since the Unix epoch. +#[derive(Debug, Copy, Clone)] +pub struct Timestamp(Int); + +impl From for Timestamp { + fn from(value: Int) -> Self { + Self(value) + } +} + +impl From for Int { + fn from(value: Timestamp) -> Self { + value.0 + } +} + +/// A half-open or closed interval over [`Timestamp`]s. +#[derive(Debug, Clone)] +pub struct TemporalInterval { + pub start: Bound, + pub end: Bound, +} + +impl TemporalInterval { + /// Creates a point interval `[value, value]`. + pub(crate) const fn point(value: Timestamp) -> Self { + Self { + start: Bound::Included(value), + end: Bound::Included(value), + } + } + + /// Creates an interval from explicit bounds. + pub(crate) const fn interval((start, end): (Bound, Bound)) -> Self { + Self { start, end } + } +} + +/// The evaluated temporal axes for a bi-temporal graph query. +/// +/// HashQL's graph store is bi-temporal: every fact is tracked along both +/// a decision time axis (when the fact was decided) and a transaction time +/// axis (when it was recorded). A query must specify intervals on both axes +/// to determine which version of the data is visible. +#[derive(Debug, Clone)] +pub struct TemporalAxesInterval { + pub decision_time: TemporalInterval, + pub transaction_time: TemporalInterval, +} diff --git a/libs/@local/hashql/mir/src/interpret/tests.rs b/libs/@local/hashql/mir/src/interpret/tests.rs index d902a107b1e..263b08ad8ed 100644 --- a/libs/@local/hashql/mir/src/interpret/tests.rs +++ b/libs/@local/hashql/mir/src/interpret/tests.rs @@ -14,18 +14,22 @@ clippy::similar_names )] +use alloc::rc::Rc; +use core::{assert_matches, ops::ControlFlow}; + use hashql_core::{ - collections::FastHashMap, - heap::{FromIteratorIn as _, Heap}, + heap::{self, FromIteratorIn as _, Heap}, id::{Id as _, IdVec}, - symbol::Symbol, - r#type::{TypeId, environment::Environment}, + symbol::sym, + r#type::{TypeBuilder, TypeId, environment::Environment}, }; use super::{ - CallStack, Runtime, RuntimeConfig, + CallStack, Inputs, Runtime, RuntimeConfig, error::InterpretDiagnostic, - value::{Int, Num, Value}, + runtime::Yield, + suspension::Suspension, + value::{Int, Num, Opaque, Struct, Value}, }; use crate::{ body::{ @@ -33,29 +37,32 @@ use crate::{ constant::Constant, operand::Operand, rvalue::{Aggregate, AggregateKind, RValue}, + terminator::{GraphRead, GraphReadHead, GraphReadTail, TerminatorKind}, }, builder::{BodyBuilder, body}, def::{DefId, DefIdSlice}, intern::Interner, interpret::error::InterpretDiagnosticCategory, + op, }; fn run_body(body: Body<'_>) -> Result, InterpretDiagnostic> { - run_body_with_inputs(body, FastHashMap::default()) + run_body_with_inputs(body, Inputs::new()) } +#[expect(clippy::needless_pass_by_value)] fn run_body_with_inputs<'heap>( body: Body<'heap>, - inputs: FastHashMap, Value<'heap>>, + inputs: Inputs<'heap>, ) -> Result, InterpretDiagnostic> { assert_eq!(body.id, DefId::new(0)); let bodies = [body]; let bodies = DefIdSlice::from_raw(&bodies); - let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, inputs); + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); let callstack = CallStack::new(&runtime, DefId::new(0), []); - runtime.run(callstack) + runtime.run(callstack, |_| unreachable!()) } fn run_bodies<'heap>( @@ -63,10 +70,11 @@ fn run_bodies<'heap>( entry: DefId, args: impl IntoIterator, IntoIter: ExactSizeIterator>, ) -> Result, InterpretDiagnostic> { - let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, FastHashMap::default()); + let inputs = Inputs::default(); + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); let callstack = CallStack::new(&runtime, entry, args); - runtime.run(callstack) + runtime.run(callstack, |_| unreachable!()) } // ============================================================================= @@ -164,14 +172,17 @@ fn entry_function_with_args() { let bodies = [body]; let bodies = DefIdSlice::from_raw(&bodies); - let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, FastHashMap::default()); + let inputs = Inputs::default(); + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); let args = [ Value::Integer(Int::from(10_i128)), Value::Integer(Int::from(20_i128)), ]; let callstack = CallStack::new(&runtime, DefId::new(0), args); - let result = runtime.run(callstack).expect("should succeed"); + let result = runtime + .run(callstack, |_| unreachable!()) + .expect("should succeed"); assert_eq!(result, Value::Integer(Int::from(true))); } @@ -976,6 +987,80 @@ fn struct_projection() { assert_eq!(result, Value::Integer(Int::from(200_i128))); } +#[test] +fn opaque_struct_projection_by_name() { + use hashql_core::symbol::sym; + + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/0 -> Int { + decl inner: (x: Int, y: Int), wrapped: [Opaque sym::path::Entity; ?], result: Int; + @proj y_field = wrapped.y: Int; + + bb0() { + inner = struct x: 100, y: 200; + wrapped = opaque (sym::path::Entity), inner; + result = load y_field; + return result; + } + }); + + let result = run_body(body).expect("should succeed"); + assert_eq!(result, Value::Integer(Int::from(200_i128))); +} + +#[test] +fn opaque_tuple_projection_by_index() { + use hashql_core::symbol::sym; + + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/0 -> Int { + decl inner: (Int, Int), wrapped: [Opaque sym::path::Entity; ?], result: Int; + @proj second = wrapped.1: Int; + + bb0() { + inner = tuple 10, 20; + wrapped = opaque (sym::path::Entity), inner; + result = load second; + return result; + } + }); + + let result = run_body(body).expect("should succeed"); + assert_eq!(result, Value::Integer(Int::from(20_i128))); +} + +#[test] +fn nested_opaque_projection_by_name() { + use hashql_core::symbol::sym; + + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/0 -> Int { + decl inner: (val: Int), mid: [Opaque sym::path::EntityId; ?], + outer: [Opaque sym::path::Entity; ?], result: Int; + @proj val_field = outer.val: Int; + + bb0() { + inner = struct val: 42; + mid = opaque (sym::path::EntityId), inner; + outer = opaque (sym::path::Entity), mid; + result = load val_field; + return result; + } + }); + + let result = run_body(body).expect("should succeed"); + assert_eq!(result, Value::Integer(Int::from(42_i128))); +} + // ============================================================================= // Input Operations // ============================================================================= @@ -995,7 +1080,7 @@ fn input_load_returns_value() { } }); - let mut inputs = FastHashMap::default(); + let mut inputs = Inputs::default(); inputs.insert( heap.intern_symbol("my_input"), Value::Integer(Int::from(999_i128)), @@ -1020,7 +1105,7 @@ fn input_exists_returns_true() { } }); - let mut inputs = FastHashMap::default(); + let mut inputs = Inputs::default(); inputs.insert( heap.intern_symbol("my_input"), Value::Integer(Int::from(1_i128)), @@ -1094,13 +1179,14 @@ fn recursion_limit_exceeded() { let bodies = [body]; let bodies = DefIdSlice::from_raw(&bodies); + let inputs = Inputs::default(); let config = RuntimeConfig { recursion_limit: 5 }; - let mut runtime = Runtime::new(config, bodies, FastHashMap::default()); + let mut runtime = Runtime::new(config, bodies, &inputs); let callstack = CallStack::new(&runtime, DefId::new(0), []); let result = runtime - .run(callstack) + .run(callstack, |_| unreachable!()) .expect_err("should fail with recursion limit"); assert_eq!(result.category, InterpretDiagnosticCategory::RuntimeLimit); } @@ -1595,3 +1681,560 @@ fn ice_struct_field_length_mismatch() { InterpretDiagnosticCategory::StructuralInvariant ); } + +// ============================================================================= +// Helpers for suspension tests +// ============================================================================= + +/// Constructs a minimal valid temporal axes value for `PinnedTransactionTimeTemporalAxes`. +/// +/// The structure mirrors the HashQL type system's temporal axes representation: +/// +/// ```text +/// Opaque(PinnedTransactionTimeTemporalAxes, +/// Struct { pinned, variable } +/// ) +/// ``` +/// +/// where `pinned` = `Opaque(TransactionTime, Opaque(Timestamp, Integer(pinned_ms)))` and +/// `variable` wraps an interval with inclusive start and unbounded end. +fn make_temporal_axes<'heap>( + interner: &Interner<'heap>, + pinned_ms: i128, + variable_start_ms: i128, +) -> Value<'heap> { + // Timestamp(Integer) + let pinned_timestamp = Value::Opaque(Opaque::new( + sym::path::Timestamp, + Rc::new(Value::Integer(Int::from(pinned_ms))), + )); + + // TransactionTime(Timestamp) + let pinned = Value::Opaque(Opaque::new( + sym::path::TransactionTime, + Rc::new(pinned_timestamp), + )); + + // Variable interval start: InclusiveTemporalBound(Timestamp(Integer)) + let start_timestamp = Value::Opaque(Opaque::new( + sym::path::Timestamp, + Rc::new(Value::Integer(Int::from(variable_start_ms))), + )); + let start_bound = Value::Opaque(Opaque::new( + sym::path::InclusiveTemporalBound, + Rc::new(start_timestamp), + )); + + // Variable interval end: UnboundedTemporalBound(Unit) + let end_bound = Value::Opaque(Opaque::new( + sym::path::UnboundedTemporalBound, + Rc::new(Value::Unit), + )); + + // Interval(Struct { start, end }) + let interval_fields = interner.symbols.intern_slice(&[sym::end, sym::start]); + let interval_struct = Struct::new_unchecked(interval_fields, Rc::new([end_bound, start_bound])); + let interval = Value::Opaque(Opaque::new( + sym::path::Interval, + Rc::new(Value::Struct(interval_struct)), + )); + + // DecisionTime(Interval(...)) + let variable = Value::Opaque(Opaque::new(sym::path::DecisionTime, Rc::new(interval))); + + // PinnedTransactionTimeTemporalAxes(Struct { pinned, variable }) + let axes_fields = interner.symbols.intern_slice(&[sym::pinned, sym::variable]); + let axes_struct = Struct::new_unchecked(axes_fields, Rc::new([pinned, variable])); + + Value::Opaque(Opaque::new( + sym::path::PinnedTransactionTimeTemporalAxes, + Rc::new(Value::Struct(axes_struct)), + )) +} + +/// Builds a body: `bb0` loads axis from input, `GraphRead → bb1`, `bb1` returns the result. +/// +/// Must be called with `DefId::new(0)` and an "axis" input containing a temporal axes value. +fn make_graph_read_body<'heap>( + heap: &'heap Heap, + interner: &Interner<'heap>, + env: &Environment<'heap>, +) -> Body<'heap> { + let int_ty = TypeBuilder::synthetic(env).integer(); + let mut builder = BodyBuilder::new(interner); + + let axis = builder.local("axis", int_ty); + let graph_result = builder.local("graph_result", int_ty); + + let bb0 = builder.reserve_block([]); + let bb1 = builder.reserve_block([graph_result.local]); + + builder + .build_block(bb0) + .assign_place(axis, |rv| { + rv.input( + hashql_hir::node::operation::InputOp::Load { required: true }, + "axis", + ) + }) + .finish_with_terminator(TerminatorKind::GraphRead(GraphRead { + head: GraphReadHead::Entity { + axis: Operand::Place(axis), + }, + body: heap::Vec::new_in(heap), + tail: GraphReadTail::Collect, + target: bb1, + })); + + builder.build_block(bb1).ret(graph_result); + + let mut body = builder.finish(0, int_ty); + body.id = DefId::new(0); + + body +} + +fn run_graph_read_body<'heap>( + heap: &'heap Heap, + interner: &Interner<'heap>, + env: &Environment<'heap>, + result_value: &Value<'heap>, +) -> Result, InterpretDiagnostic> { + let body = make_graph_read_body(heap, interner, env); + let axis_value = make_temporal_axes(interner, 1000, 500); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + + let mut inputs = Inputs::default(); + inputs.insert(heap.intern_symbol("axis"), axis_value); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let callstack = CallStack::new(&runtime, DefId::new(0), []); + + runtime.run(callstack, |suspension| { + let Suspension::GraphRead(graph_read) = suspension; + Ok(graph_read.resolve(result_value.clone())) + }) +} + +// ============================================================================= +// Suspension / Continuation Protocol +// ============================================================================= + +#[test] +fn start_suspend_resume_return() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = make_graph_read_body(&heap, &interner, &env); + let axis_value = make_temporal_axes(&interner, 1000, 500); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + + let mut inputs = Inputs::default(); + inputs.insert(heap.intern_symbol("axis"), axis_value); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let mut callstack = CallStack::new(&runtime, DefId::new(0), []); + + // start → should suspend at the GraphRead + let result = runtime.start(&mut callstack).expect("start should succeed"); + let Yield::Suspension(Suspension::GraphRead(suspension)) = result else { + panic!("expected GraphRead suspension, got return"); + }; + + // Resolve with a value and resume + let continuation = suspension.resolve(Value::Integer(Int::from(42_i128))); + let result = runtime + .resume(&mut callstack, continuation) + .expect("resume should succeed"); + + let Yield::Return(value) = result else { + panic!("expected return after resume, got suspension"); + }; + assert_eq!(value, Value::Integer(Int::from(42_i128))); +} + +#[test] +fn run_with_suspension_handler() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let result = run_graph_read_body(&heap, &interner, &env, &Value::Integer(Int::from(99_i128))) + .expect("should succeed"); + assert_eq!(result, Value::Integer(Int::from(99_i128))); +} + +#[test] +fn multi_suspension_round_trip() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // Build a body with two sequential GraphReads: + // bb0: load axis, GraphRead → bb1 + // bb1: receive first result, GraphRead → bb2 + // bb2: receive second result, add first + second, return + let int_ty = TypeBuilder::synthetic(&env).integer(); + let mut builder = BodyBuilder::new(&interner); + + let axis = builder.local("axis", int_ty); + let first_result = builder.local("first_result", int_ty); + let second_result = builder.local("second_result", int_ty); + let sum = builder.local("sum", int_ty); + + let bb0 = builder.reserve_block([]); + let bb1 = builder.reserve_block([first_result.local]); + let bb2 = builder.reserve_block([second_result.local]); + + builder + .build_block(bb0) + .assign_place(axis, |rv| { + rv.input( + hashql_hir::node::operation::InputOp::Load { required: true }, + "axis", + ) + }) + .finish_with_terminator(TerminatorKind::GraphRead(GraphRead { + head: GraphReadHead::Entity { + axis: Operand::Place(axis), + }, + body: heap::Vec::new_in(&heap), + tail: GraphReadTail::Collect, + target: bb1, + })); + + builder + .build_block(bb1) + .finish_with_terminator(TerminatorKind::GraphRead(GraphRead { + head: GraphReadHead::Entity { + axis: Operand::Place(axis), + }, + body: heap::Vec::new_in(&heap), + tail: GraphReadTail::Collect, + target: bb2, + })); + + builder + .build_block(bb2) + .assign_place(sum, |rv| rv.binary(first_result, op![+], second_result)) + .ret(sum); + + let mut body = builder.finish(0, int_ty); + body.id = DefId::new(0); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + + let mut inputs = Inputs::default(); + inputs.insert( + heap.intern_symbol("axis"), + make_temporal_axes(&interner, 1000, 500), + ); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let mut callstack = CallStack::new(&runtime, DefId::new(0), []); + + // First suspension + let result = runtime.start(&mut callstack).expect("start should succeed"); + let Yield::Suspension(Suspension::GraphRead(suspension)) = result else { + panic!("expected first GraphRead suspension"); + }; + let continuation = suspension.resolve(Value::Integer(Int::from(10_i128))); + + // Second suspension + let result = runtime + .resume(&mut callstack, continuation) + .expect("first resume should succeed"); + let Yield::Suspension(Suspension::GraphRead(suspension)) = result else { + panic!("expected second GraphRead suspension"); + }; + let continuation = suspension.resolve(Value::Integer(Int::from(32_i128))); + + // Final return: 10 + 32 = 42 + let result = runtime + .resume(&mut callstack, continuation) + .expect("second resume should succeed"); + let Yield::Return(value) = result else { + panic!("expected return after second resume"); + }; + assert_eq!(value, Value::Integer(Int::from(42_i128))); +} + +// ============================================================================= +// run_until_transition +// ============================================================================= + +#[test] +fn transition_breaks_at_target_block() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // bb0 → bb1 → bb2 (return) + // Transition fires on bb1 (continue returns false). + let body = body!(interner, env; fn@0/0 -> Int { + decl x: Int; + + bb0() { + goto bb1(); + }, + bb1() { + goto bb2(42); + }, + bb2(x) { + return x; + } + }); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + let inputs = Inputs::default(); + + let bb1 = crate::body::basic_block::BasicBlockId::new(1); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let mut callstack = CallStack::new(&runtime, DefId::new(0), []); + runtime.reset(); + + let result = runtime.run_until_transition::(&mut callstack, |block| block != bb1); + assert_matches!(result, Ok(ControlFlow::Break(_))); + + // Callstack should be positioned at bb1 + let current = callstack + .current_block::<()>() + .expect("callstack should not be empty"); + assert_eq!(current, bb1); +} + +#[test] +fn transition_runs_to_completion_when_continue_always_true() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/0 -> Int { + decl; + + bb0() { + goto bb1(); + }, + bb1() { + return 42; + } + }); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + let inputs = Inputs::default(); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let mut callstack = CallStack::new(&runtime, DefId::new(0), []); + runtime.reset(); + + let result = runtime.run_until_transition::(&mut callstack, |_| true); + assert_matches!(result, Ok(ControlFlow::Continue(Yield::Return(value))) if value == Value::Integer(Int::from(42_i128))); +} + +#[test] +fn transition_fires_on_reentry_after_continuation_apply() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // bb0: load axis, GraphRead → bb1 + // bb1: return result + // Transition fires on bb1 (continue returns false on bb1). + let int_ty = TypeBuilder::synthetic(&env).integer(); + let mut builder = BodyBuilder::new(&interner); + + let axis = builder.local("axis", int_ty); + let graph_result = builder.local("graph_result", int_ty); + + let bb0 = builder.reserve_block([]); + let bb1 = builder.reserve_block([graph_result.local]); + + let bb1_id = bb1; + + builder + .build_block(bb0) + .assign_place(axis, |rv| { + rv.input( + hashql_hir::node::operation::InputOp::Load { required: true }, + "axis", + ) + }) + .finish_with_terminator(TerminatorKind::GraphRead(GraphRead { + head: GraphReadHead::Entity { + axis: Operand::Place(axis), + }, + body: heap::Vec::new_in(&heap), + tail: GraphReadTail::Collect, + target: bb1, + })); + + builder.build_block(bb1).ret(graph_result); + + let mut body = builder.finish(0, int_ty); + body.id = DefId::new(0); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + + let mut inputs = Inputs::default(); + inputs.insert( + heap.intern_symbol("axis"), + make_temporal_axes(&interner, 1000, 500), + ); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let mut callstack = CallStack::new(&runtime, DefId::new(0), []); + runtime.reset(); + + // First call: should suspend at GraphRead (bb0 is allowed) + let result = runtime.run_until_transition::(&mut callstack, |block| block != bb1_id); + let Ok(ControlFlow::Continue(Yield::Suspension(Suspension::GraphRead(suspension)))) = result + else { + panic!("expected suspension, got {result:?}"); + }; + + // Apply continuation → sets current block to bb1 + let continuation = suspension.resolve(Value::Integer(Int::from(42_i128))); + continuation + .apply::(&mut callstack) + .expect("apply should succeed"); + + // Second call: transition should fire immediately on bb1 (before stepping) + let result = runtime.run_until_transition::(&mut callstack, |block| block != bb1_id); + assert_matches!(result, Ok(ControlFlow::Break(_))); + + let current = callstack + .current_block::<()>() + .expect("callstack should not be empty"); + assert_eq!(current, bb1_id); +} + +// ============================================================================= +// CallStack edge cases +// ============================================================================= + +#[test] +fn unwind_produces_correct_frames() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // main (DefId 0) calls inner (DefId 1), inner triggers an error. + let inner_id = DefId::new(1); + + let main = body!(interner, env; fn@0/0 -> Int { + decl result: Int; + + bb0() { + result = apply inner_id; + return result; + } + }); + + let inner = body!(interner, env; fn@inner_id/0 -> Int { + decl x: Int; + + bb0() { + return x; + } + }); + + let result = run_bodies(DefIdSlice::from_raw(&[main, inner]), DefId::new(0), []); + let error = result.expect_err("should fail with uninitialized local"); + + // The error should include stack trace info (manifested as labels in the diagnostic) + assert_eq!(error.category, InterpretDiagnosticCategory::LocalAccess); + // Primary label from inner + secondary "called from here" from main = at least 2 labels + assert!( + error.labels.len() >= 2, + "expected at least 2 labels (error site + call site), got {}", + error.labels.len() + ); +} + +#[test] +fn block_param_aliasing_swap() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // goto bb1(b, a) where bb1 params are (a, b) + // Without the scratch-based staging, naive sequential assignment would clobber. + let body = body!(interner, env; fn@0/0 -> Int { + decl a: Int, b: Int, result: Int; + + bb0() { + a = load 1; + b = load 2; + goto bb1(b, a); + }, + bb1(a, b) { + result = bin.- a b; + return result; + } + }); + + let result = run_body(body).expect("should succeed"); + // After swap: a=2, b=1. result = 2 - 1 = 1. + assert_eq!(result, Value::Integer(Int::from(1_i128))); +} + +// ============================================================================= +// Minor gaps +// ============================================================================= + +#[test] +fn unary_neg_number() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/0 -> Num { + decl result: Num; + + bb0() { + result = un.neg 3.5; + return result; + } + }); + + let result = run_body(body).expect("should succeed"); + assert_eq!(result, Value::Number(Num::from(-3.5))); +} + +#[test] +fn callstack_new_in_runs_to_completion() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + let body = body!(interner, env; fn@0/0 -> Int { + decl; + + bb0() { + return 77; + } + }); + + let bodies = [body]; + let bodies = DefIdSlice::from_raw(&bodies); + let inputs = Inputs::default(); + + let mut runtime = Runtime::new(RuntimeConfig::default(), bodies, &inputs); + let callstack = CallStack::new_in::<()>(&bodies[DefId::new(0)], [], alloc::alloc::Global) + .expect("new_in should succeed"); + + let result = runtime + .run(callstack, |_| unreachable!()) + .expect("should succeed"); + assert_eq!(result, Value::Integer(Int::from(77_i128))); +} diff --git a/libs/@local/hashql/mir/src/interpret/value/int.rs b/libs/@local/hashql/mir/src/interpret/value/int.rs index 0e04839a950..c70e1cf26bf 100644 --- a/libs/@local/hashql/mir/src/interpret/value/int.rs +++ b/libs/@local/hashql/mir/src/interpret/value/int.rs @@ -1,8 +1,30 @@ +//! Finite-precision integer constants for the HashQL MIR. +//! +//! [`Int`] represents compile-time integer and boolean values with size tracking. +//! Values carry their bit-width: 1 bit for booleans, 128 bits for integers. +//! This allows serialization to distinguish `true`/`false` from `0`/`1` without +//! external type information — critical for round-tripping through formats like jsonb +//! that have distinct boolean and number representations. +//! +//! # Size Invariants +//! +//! Only two sizes are valid: +//! - **1 bit**: boolean values (`0` or `1`) +//! - **128 bits**: integer values (full [`i128`] range) +//! +//! # Arithmetic Promotion +//! +//! All arithmetic operations produce 128-bit results, even when both operands are booleans. +//! Bitwise boolean operations (`BitAnd`, `BitOr`, `BitXor`) preserve the 1-bit size when +//! both operands are booleans. + use core::{ + cmp, debug_assert_matches, error::Error, fmt::{self, Display}, + hash::{Hash, Hasher}, hint, - num::TryFromIntError, + num::{NonZero, TryFromIntError}, ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Neg, Not, Sub}, }; @@ -13,442 +35,235 @@ use crate::{ macros::{forward_ref_binop, forward_ref_op_assign, forward_ref_unop}, }; +/// Bit-width for boolean values. +const BOOL_BITS: NonZero = NonZero::new(1).unwrap(); + +/// Bit-width for integer values. +const INT_BITS: NonZero = NonZero::new(128).unwrap(); + /// A finite-precision integer constant in the MIR. /// -/// Unlike Rust, HashQL cannot differentiate between signed and unsigned integers at the type -/// level, so all values are stored as signed [`i128`]. -/// -/// # Conversion Methods -/// -/// **Range-checked conversions** (`as_i8`, `as_u8`, `as_i16`, etc.) return [`Some`] only if -/// the value fits in the target type's range. -/// -/// **Unchecked conversions** (`as_int`, `as_uint`) return the raw value without range checks. +/// Stores an [`i128`] value alongside its bit-width. The width distinguishes booleans +/// (1 bit, values `0` or `1`) from integers (128 bits, full [`i128`] range). /// /// # Examples /// /// ``` /// use hashql_mir::interpret::value::Int; /// -/// // Values that fit in the target range succeed -/// let small = Int::from(42_i64); -/// assert_eq!(small.as_i8(), Some(42)); -/// assert_eq!(small.as_i16(), Some(42)); -/// -/// // Values outside the target range return None -/// let large = Int::from(1000_i64); -/// assert_eq!(large.as_i8(), None); // 1000 > i8::MAX -/// assert_eq!(large.as_i16(), Some(1000)); +/// // Booleans are 1-bit integers +/// let t = Int::from(true); +/// assert_eq!(t.size(), 1); +/// assert_eq!(t.as_bool(), Some(true)); /// -/// // Unsigned conversions require non-negative values -/// let negative = Int::from(-1_i8); -/// assert_eq!(negative.as_i8(), Some(-1)); -/// assert_eq!(negative.as_u8(), None); +/// // Integers are 128-bit +/// let n = Int::from(42_i64); +/// assert_eq!(n.size(), 128); +/// assert_eq!(n.as_int(), 42); /// -/// // Raw value access always succeeds -/// assert_eq!(large.as_int(), 1000); +/// // Bool provenance is preserved: from(true) ≠ from(1) +/// assert_ne!(Int::from(true), Int::from(1_i32)); /// ``` -#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +// Uses `#[repr(packed)]` to avoid alignment padding, which would duplicate size, same as +// rust-lang's ScalarInt. +#[derive(Copy, Clone)] +#[repr(Rust, packed)] pub struct Int { + /// The raw integer value. + /// + /// For booleans (size == 1), only `0` and `1` are valid. + /// For integers (size == 128), any `i128` value is valid. value: i128, + + /// Bit-width of the value: `1` for booleans, `128` for integers. + size: NonZero, } -#[expect( - clippy::cast_possible_truncation, - clippy::cast_precision_loss, - clippy::cast_sign_loss -)] impl Int { - #[inline] - const fn from_value_unchecked(value: i128) -> Self { - Self { value } - } + /// Boolean constant `false`. + pub const FALSE: Self = Self { + value: 0, + size: BOOL_BITS, + }; + /// Integer constant `1`. + pub const ONE: Self = Self { + value: 1, + size: INT_BITS, + }; + /// Boolean constant `true`. + pub const TRUE: Self = Self { + value: 1, + size: BOOL_BITS, + }; + /// Integer constant `0`. + pub const ZERO: Self = Self { + value: 0, + size: INT_BITS, + }; - /// Converts this integer to a boolean if the value is 0 or 1. - /// - /// Returns `Some(false)` for 0, `Some(true)` for 1, or [`None`] for any other value. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(true).as_bool(), Some(true)); - /// assert_eq!(Int::from(false).as_bool(), Some(false)); - /// assert_eq!(Int::from(1_i32).as_bool(), Some(true)); - /// assert_eq!(Int::from(0_i64).as_bool(), Some(false)); - /// - /// // Values other than 0 or 1 return None - /// assert_eq!(Int::from(2_i8).as_bool(), None); - /// assert_eq!(Int::from(-1_i8).as_bool(), None); - /// ``` + /// Creates a boolean `Int` from a `bool`. #[inline] - #[must_use] - pub const fn as_bool(self) -> Option { - match self.value { - 0 => Some(false), - 1 => Some(true), - _ => None, + const fn from_bool(value: bool) -> Self { + Self { + value: value as i128, + size: BOOL_BITS, } } - /// Converts this integer to [`i8`] if the value fits in the range `-128..=127`. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(42_i8).as_i8(), Some(42)); - /// assert_eq!(Int::from(42_i64).as_i8(), Some(42)); - /// assert_eq!(Int::from(-128_i32).as_i8(), Some(-128)); - /// assert_eq!(Int::from(127_u8).as_i8(), Some(127)); - /// - /// // Value out of i8 range returns None - /// assert_eq!(Int::from(128_i32).as_i8(), None); - /// assert_eq!(Int::from(-129_i32).as_i8(), None); - /// ``` + /// Creates a 128-bit integer `Int` from an `i128`. #[inline] - #[must_use] - pub const fn as_i8(self) -> Option { - if self.value >= i8::MIN as i128 && self.value <= i8::MAX as i128 { - Some(self.value as i8) - } else { - None + const fn from_i128(value: i128) -> Self { + Self { + value, + size: INT_BITS, } } - /// Converts this integer to [`u8`] if the value fits in the range `0..=255`. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(42_i8).as_u8(), Some(42)); - /// assert_eq!(Int::from(255_u8).as_u8(), Some(255)); - /// assert_eq!(Int::from(200_i64).as_u8(), Some(200)); + /// Validates the internal invariants in debug builds. /// - /// // Negative or too large values return None - /// assert_eq!(Int::from(-1_i8).as_u8(), None); - /// assert_eq!(Int::from(256_i32).as_u8(), None); - /// ``` - #[inline] - #[must_use] - pub const fn as_u8(self) -> Option { - if self.value >= 0 && self.value <= u8::MAX as i128 { - Some(self.value as u8) - } else { - None - } - } + /// - `size` must be 1 or 128 + /// - If `size == 1`, value must be 0 or 1 + #[expect( + clippy::inline_always, + reason = "mirrors rustc's check_data pattern — cheap assertion, always inlined" + )] + #[inline(always)] + fn check_data(self) { + let value = self.value; + let size = self.size.get(); - /// Converts this integer to [`i16`] if the value fits in the range `-32768..=32767`. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(1000_i16).as_i16(), Some(1000)); - /// assert_eq!(Int::from(1000_i64).as_i16(), Some(1000)); - /// assert_eq!(Int::from(-1000_i32).as_i16(), Some(-1000)); - /// - /// // Value out of i16 range returns None - /// assert_eq!(Int::from(40000_i64).as_i16(), None); - /// ``` - #[inline] - #[must_use] - pub const fn as_i16(self) -> Option { - if self.value >= i16::MIN as i128 && self.value <= i16::MAX as i128 { - Some(self.value as i16) - } else { - None - } + debug_assert_matches!(size, 1 | 128, "Int size must be 1 or 128, got {size}"); + debug_assert!( + size == 128 || matches!(value, 0 | 1), + "Bool Int must have value 0 or 1, got {value}" + ); } - /// Converts this integer to [`u16`] if the value fits in the range `0..=65535`. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(1000_i16).as_u16(), Some(1000)); - /// assert_eq!(Int::from(65535_u16).as_u16(), Some(65535)); - /// assert_eq!(Int::from(50000_i64).as_u16(), Some(50000)); - /// - /// // Negative or too large values return None - /// assert_eq!(Int::from(-1_i16).as_u16(), None); - /// assert_eq!(Int::from(70000_i64).as_u16(), None); - /// ``` + /// Returns the bit-width of this value: `1` for booleans, `128` for integers. #[inline] #[must_use] - pub const fn as_u16(self) -> Option { - if self.value >= 0 && self.value <= u16::MAX as i128 { - Some(self.value as u16) - } else { - None - } + pub const fn size(self) -> u8 { + self.size.get() } - /// Converts this integer to [`i32`] if the value fits in the [`i32`] range. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(100_000_i32).as_i32(), Some(100_000)); - /// assert_eq!(Int::from(100_000_i64).as_i32(), Some(100_000)); - /// assert_eq!(Int::from(-100_000_i64).as_i32(), Some(-100_000)); - /// - /// // Value out of i32 range returns None - /// assert_eq!(Int::from(3_000_000_000_i64).as_i32(), None); - /// ``` + /// Returns `true` if this value has boolean width (1 bit). #[inline] #[must_use] - pub const fn as_i32(self) -> Option { - if self.value >= i32::MIN as i128 && self.value <= i32::MAX as i128 { - Some(self.value as i32) - } else { - None - } + pub const fn is_bool(self) -> bool { + self.size.get() == 1 } - /// Converts this integer to [`u32`] if the value fits in the [`u32`] range. - /// - /// # Examples + /// Converts this value to a `bool` if it has boolean width. /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(100_000_i32).as_u32(), Some(100_000)); - /// assert_eq!(Int::from(3_000_000_000_u32).as_u32(), Some(3_000_000_000)); - /// assert_eq!(Int::from(100_000_i64).as_u32(), Some(100_000)); - /// - /// // Negative or too large values return None - /// assert_eq!(Int::from(-1_i32).as_u32(), None); - /// assert_eq!(Int::from(5_000_000_000_i64).as_u32(), None); - /// ``` - #[inline] - #[must_use] - pub const fn as_u32(self) -> Option { - if self.value >= 0 && self.value <= u32::MAX as i128 { - Some(self.value as u32) - } else { - None - } - } - - /// Converts this integer to [`i64`] if the value fits in the [`i64`] range. + /// Returns `None` for 128-bit integers, even if the value is 0 or 1. /// /// # Examples /// /// ``` /// use hashql_mir::interpret::value::Int; /// - /// assert_eq!(Int::from(10_000_000_000_i64).as_i64(), Some(10_000_000_000)); - /// assert_eq!( - /// Int::from(-10_000_000_000_i64).as_i64(), - /// Some(-10_000_000_000) - /// ); - /// assert_eq!(Int::from(100_i32).as_i64(), Some(100)); + /// assert_eq!(Int::from(true).as_bool(), Some(true)); + /// assert_eq!(Int::from(false).as_bool(), Some(false)); /// - /// // Value out of i64 range returns None - /// assert_eq!(Int::from(10_000_000_000_000_000_000_u64).as_i64(), None); + /// // Integer 1 is NOT a bool — different size + /// assert_eq!(Int::from(1_i32).as_bool(), None); /// ``` #[inline] #[must_use] - pub const fn as_i64(self) -> Option { - if self.value >= i64::MIN as i128 && self.value <= i64::MAX as i128 { - Some(self.value as i64) - } else { - None + pub const fn as_bool(self) -> Option { + if !self.is_bool() { + return None; } - } - /// Converts this integer to [`u64`] if the value fits in the [`u64`] range. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(10_000_000_000_i64).as_u64(), Some(10_000_000_000)); - /// assert_eq!(Int::from(100_i32).as_u64(), Some(100)); - /// - /// // Negative or too large values return None - /// assert_eq!(Int::from(-1_i64).as_u64(), None); - /// ``` - #[inline] - #[must_use] - pub const fn as_u64(self) -> Option { - if self.value >= 0 && self.value <= u64::MAX as i128 { - Some(self.value as u64) - } else { - None + match self.value { + 0 => Some(false), + 1 => Some(true), + _ => { + // The check_data invariant guarantees boolean values are 0 or 1. This branch is + // unreachable in valid programs. + unreachable!() + } } } - /// Returns the value as [`i128`]. + /// Returns the value as a signed `i128`. /// - /// This always succeeds since the internal representation is [`i128`]. + /// For booleans, returns `0` or `1`. For integers, returns the raw value. + /// This always succeeds regardless of the bit-width. /// /// # Examples /// /// ``` /// use hashql_mir::interpret::value::Int; /// - /// assert_eq!(Int::from(i128::MAX).as_i128(), i128::MAX); - /// assert_eq!(Int::from(i128::MIN).as_i128(), i128::MIN); - /// assert_eq!(Int::from(42_i8).as_i128(), 42); + /// assert_eq!(Int::from(42_i64).as_int(), 42); + /// assert_eq!(Int::from(-1_i128).as_int(), -1); + /// assert_eq!(Int::from(true).as_int(), 1); /// ``` #[inline] #[must_use] - pub const fn as_i128(self) -> i128 { + pub const fn as_int(self) -> i128 { self.value } - /// Converts this integer to [`u128`] if the value is non-negative. - /// - /// # Examples + /// Returns the value reinterpreted as unsigned `u128`. /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(i128::MAX).as_u128(), Some(i128::MAX as u128)); - /// assert_eq!(Int::from(42_i8).as_u128(), Some(42)); - /// - /// // Negative values return None - /// assert_eq!(Int::from(-1_i128).as_u128(), None); - /// ``` - #[inline] - #[must_use] - pub const fn as_u128(self) -> Option { - if self.value >= 0 { - Some(self.value as u128) - } else { - None - } - } - - /// Converts this integer to [`isize`] if the value fits in the platform's [`isize`] range. + /// For booleans, returns `0` or `1`. For integers, performs a two's complement + /// bit-cast (negative values wrap to large unsigned values). /// /// # Examples /// /// ``` /// use hashql_mir::interpret::value::Int; /// - /// assert_eq!(Int::from(42_isize).as_isize(), Some(42)); - /// assert_eq!(Int::from(-42_i32).as_isize(), Some(-42)); - /// assert_eq!(Int::from(1000_i64).as_isize(), Some(1000)); + /// assert_eq!(Int::from(42_i64).as_uint(), 42); + /// assert_eq!(Int::from(true).as_uint(), 1); + /// assert_eq!(Int::from(-1_i128).as_uint(), u128::MAX); /// ``` #[inline] #[must_use] - pub const fn as_isize(self) -> Option { - if self.value >= isize::MIN as i128 && self.value <= isize::MAX as i128 { - Some(self.value as isize) - } else { - None - } + #[expect( + clippy::cast_sign_loss, + reason = "intentional two's complement reinterpretation" + )] + pub const fn as_uint(self) -> u128 { + self.as_int() as u128 } - /// Converts this integer to [`usize`] if the value fits in the platform's [`usize`] range. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(42_usize).as_usize(), Some(42)); - /// assert_eq!(Int::from(1000_i64).as_usize(), Some(1000)); + /// Checked integer addition. Returns `None` on overflow. /// - /// // Negative values return None - /// assert_eq!(Int::from(-1_isize).as_usize(), None); - /// ``` + /// Always produces a 128-bit result (arithmetic promotes booleans). #[inline] #[must_use] - pub const fn as_usize(self) -> Option { - if self.value >= 0 && self.value <= usize::MAX as i128 { - Some(self.value as usize) - } else { - None + pub const fn checked_add(self, rhs: Self) -> Option { + match self.as_int().checked_add(rhs.as_int()) { + Some(result) => Some(Self::from_i128(result)), + None => None, } } - /// Returns the raw signed value. - /// - /// This always succeeds and returns the internal [`i128`] representation directly. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(42_i8).as_int(), 42); - /// assert_eq!(Int::from(-1_i64).as_int(), -1); - /// assert_eq!(Int::from(i128::MAX).as_int(), i128::MAX); - /// ``` - #[inline] - #[must_use] - pub const fn as_int(self) -> i128 { - self.value - } - - /// Returns the raw value reinterpreted as unsigned. - /// - /// This performs a direct bit-cast from [`i128`] to [`u128`], preserving the - /// two's complement representation. For negative values, this produces the - /// corresponding unsigned value with the sign bit set. - /// - /// This is primarily useful for operations like [`SwitchInt`] that work with - /// unsigned discriminant values. - /// - /// [`SwitchInt`]: crate::body::terminator::SwitchInt - /// - /// # Sign Overflow Behavior - /// - /// Negative signed values wrap around to large unsigned values: - /// - `-1_i8` becomes `u128::MAX` (all bits set) - /// - `-128_i8` becomes `u128::MAX - 127` + /// Checked integer subtraction. Returns `None` on overflow. /// - /// This is intentional and matches Rust's `as` cast semantics for signed-to-unsigned - /// conversions. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// // Positive values convert directly - /// assert_eq!(Int::from(42_i8).as_uint(), 42); - /// - /// // Negative values wrap (two's complement) - /// assert_eq!(Int::from(-1_i8).as_uint(), u128::MAX); - /// assert_eq!(Int::from(-1_i128).as_uint(), u128::MAX); - /// ``` + /// Always produces a 128-bit result (arithmetic promotes booleans). #[inline] #[must_use] - pub const fn as_uint(self) -> u128 { - self.value as u128 + pub const fn checked_sub(self, rhs: Self) -> Option { + match self.as_int().checked_sub(rhs.as_int()) { + Some(result) => Some(Self::from_i128(result)), + None => None, + } } /// Converts this integer to [`f32`]. /// /// This may lose precision for values that cannot be exactly represented /// as a 32-bit floating point number. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(42_i32).as_f32(), 42.0_f32); - /// assert_eq!(Int::from(-1_i8).as_f32(), -1.0_f32); - /// ``` #[inline] #[must_use] + #[expect( + clippy::cast_precision_loss, + reason = "intentional lossy conversion to float" + )] pub const fn as_f32(self) -> f32 { self.as_int() as f32 } @@ -457,58 +272,105 @@ impl Int { /// /// This may lose precision for values that cannot be exactly represented /// as a 64-bit floating point number. - /// - /// # Examples - /// - /// ``` - /// use hashql_mir::interpret::value::Int; - /// - /// assert_eq!(Int::from(42_i64).as_f64(), 42.0_f64); - /// assert_eq!(Int::from(-1_i8).as_f64(), -1.0_f64); - /// ``` #[inline] #[must_use] + #[expect( + clippy::cast_precision_loss, + reason = "intentional lossy conversion to float" + )] pub const fn as_f64(self) -> f64 { self.as_int() as f64 } } +impl fmt::Debug for Int { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let this = *self; + this.check_data(); + + match this.as_bool() { + Some(value) => f.debug_tuple("Bool").field(&value).finish(), + None => f.debug_tuple("Int").field(&this.as_int()).finish(), + } + } +} + impl Display for Int { - fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { - Display::fmt(&self.value, fmt) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let this = *self; + this.check_data(); + + match this.as_bool() { + Some(value) => Display::fmt(&value, f), + None => Display::fmt(&this.as_int(), f), + } } } -macro_rules! impl_from { - ($($ty:ty),*) => { - $(impl_from!(@impl $ty);)* - }; +impl PartialEq for Int { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_int() == other.as_int() && self.size() == other.size() + } +} + +impl Eq for Int {} - (@impl $ty:ty) => { - impl const From<$ty> for Int { - #[inline] - fn from(value: $ty) -> Self { - Self::from_value_unchecked(i128::from(value)) +impl PartialOrd for Int { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Int { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + (self.size(), self.as_int()).cmp(&(other.size(), other.as_int())) + } +} + +impl Hash for Int { + #[inline] + fn hash(&self, state: &mut H) { + self.as_int().hash(state); + self.size().hash(state); + } +} + +impl const From for Int { + #[inline] + fn from(value: bool) -> Self { + Self::from_bool(value) + } +} + +macro_rules! impl_from_int { + ($($ty:ty),*) => { + $( + impl const From<$ty> for Int { + #[inline] + fn from(value: $ty) -> Self { + Self::from_i128(i128::from(value)) + } } - } + )* }; } -impl_from!(bool, u8, u16, u32, u64, i8, i16, i32, i64, i128); +impl_from_int!(u8, u16, u32, u64, i8, i16, i32, i64, i128); -// `usize` and `isize` cannot use the macro because `i128::from()` doesn't accept -// platform-dependent types. impl const From for Int { #[inline] fn from(value: usize) -> Self { - Self::from_value_unchecked(value as i128) + Self::from_i128(value as i128) } } impl const From for Int { #[inline] fn from(value: isize) -> Self { - Self::from_value_unchecked(value as i128) + Self::from_i128(value as i128) } } @@ -518,7 +380,7 @@ impl const TryFrom for Int { #[inline] fn try_from(value: u128) -> Result { match i128::try_from(value) { - Ok(value) => Ok(Self::from_value_unchecked(value)), + Ok(value) => Ok(Self::from_i128(value)), Err(error) => Err(error), } } @@ -545,7 +407,7 @@ impl TryFrom> for Int { fn try_from(value: Integer<'_>) -> Result { value .as_i128() - .map(From::from) + .map(Self::from_i128) .ok_or(TryFromIntegerError(())) } } @@ -602,7 +464,7 @@ impl<'heap> TryFrom> for Int { fn try_from(value: Primitive<'heap>) -> Result { match value { - Primitive::Boolean(bool) => Ok(bool.into()), + Primitive::Boolean(bool) => Ok(Self::from_bool(bool)), Primitive::Integer(integer) => { integer.try_into().map_err(|_err| TryFromPrimitiveError { kind: TryFromPrimitiveErrorKind::OutOfRange, @@ -622,9 +484,19 @@ impl<'heap> TryFrom> for Int { impl Not for Int { type Output = Self; + /// Boolean NOT for 1-bit values, bitwise NOT for 128-bit values. + /// + /// For booleans: `!true == false`, `!false == true`. + /// For integers: flips all 128 bits (two's complement: `!x == -(x + 1)`). #[inline] fn not(self) -> Self::Output { - Self::from_value_unchecked(!self.as_int()) + if self.is_bool() { + // Boolean NOT: flip the single bit + Self::from_bool(self.as_int() == 0) + } else { + // Bitwise NOT on full 128-bit value + Self::from_i128(!self.as_int()) + } } } @@ -633,14 +505,14 @@ impl Neg for Int { #[expect(clippy::cast_precision_loss, clippy::float_arithmetic)] fn neg(self) -> Self::Output { - let (value, overflow) = self.as_int().overflowing_neg(); + let value = self.as_int(); + let (result, overflow) = value.overflowing_neg(); - if overflow { - // There's only a single reason why this overflowed: the value was i128::MIN, in this - // case we return `i128::MAX + 1` as a Num. + if hint::unlikely(overflow) { + // Only i128::MIN overflows: return i128::MAX + 1 as float. Numeric::Num(Num::from((i128::MAX as f64) + 1.0)) } else { - Numeric::Int(Self::from_value_unchecked(value)) + Numeric::Int(Self::from_i128(result)) } } } @@ -650,12 +522,13 @@ impl Add for Int { #[expect(clippy::float_arithmetic)] fn add(self, rhs: Self) -> Self::Output { - let (value, overflow) = self.as_int().overflowing_add(rhs.as_int()); + let (lhs, rhs) = (self.as_int(), rhs.as_int()); + let (result, overflow) = lhs.overflowing_add(rhs); if hint::unlikely(overflow) { - Numeric::Num(Num::from(self.as_f64() + rhs.as_f64())) + Numeric::Num(Num::from(self.as_f64() + Self::from_i128(rhs).as_f64())) } else { - Numeric::Int(Self::from_value_unchecked(value)) + Numeric::Int(Self::from_i128(result)) } } } @@ -675,12 +548,13 @@ impl Sub for Int { #[expect(clippy::float_arithmetic)] fn sub(self, rhs: Self) -> Self::Output { - let (value, overflow) = self.as_int().overflowing_sub(rhs.as_int()); + let (lhs, rhs_val) = (self.as_int(), rhs.as_int()); + let (result, overflow) = lhs.overflowing_sub(rhs_val); if hint::unlikely(overflow) { Numeric::Num(Num::from(self.as_f64() - rhs.as_f64())) } else { - Numeric::Int(Self::from_value_unchecked(value)) + Numeric::Int(Self::from_i128(result)) } } } @@ -695,19 +569,32 @@ impl Sub for Int { } } +/// Returns `BOOL_BITS` if both operands are bools, `INT_BITS` otherwise. +#[inline] +const fn bitwise_result_size(lhs: Int, rhs: Int) -> NonZero { + if lhs.is_bool() && rhs.is_bool() { + BOOL_BITS + } else { + INT_BITS + } +} + impl BitOr for Int { type Output = Self; #[inline] fn bitor(self, rhs: Self) -> Self::Output { - Self::from_value_unchecked(self.as_int() | rhs.as_int()) + Self { + value: self.as_int() | rhs.as_int(), + size: bitwise_result_size(self, rhs), + } } } impl BitOrAssign for Int { #[inline] fn bitor_assign(&mut self, rhs: Self) { - self.value |= rhs.value; + *self = *self | rhs; } } @@ -716,14 +603,17 @@ impl BitAnd for Int { #[inline] fn bitand(self, rhs: Self) -> Self::Output { - Self::from_value_unchecked(self.as_int() & rhs.as_int()) + Self { + value: self.as_int() & rhs.as_int(), + size: bitwise_result_size(self, rhs), + } } } impl BitAndAssign for Int { #[inline] fn bitand_assign(&mut self, rhs: Self) { - self.value &= rhs.value; + *self = *self & rhs; } } @@ -732,14 +622,17 @@ impl BitXor for Int { #[inline] fn bitxor(self, rhs: Self) -> Self::Output { - Self::from_value_unchecked(self.as_int() ^ rhs.as_int()) + Self { + value: self.as_int() ^ rhs.as_int(), + size: bitwise_result_size(self, rhs), + } } } impl BitXorAssign for Int { #[inline] fn bitxor_assign(&mut self, rhs: Self) { - self.value ^= rhs.value; + *self = *self ^ rhs; } } @@ -766,44 +659,206 @@ mod tests { use crate::interpret::value::{Int, Numeric}; + #[test] + fn layout() { + assert_eq!(size_of::(), 17); + assert_eq!(align_of::(), 1); + } + + #[test] + fn from_bool_preserves_size() { + assert_eq!(Int::from(true).size(), 1); + assert_eq!(Int::from(false).size(), 1); + } + + #[test] + fn from_integer_preserves_size() { + assert_eq!(Int::from(0_i32).size(), 128); + assert_eq!(Int::from(1_i32).size(), 128); + assert_eq!(Int::from(42_i64).size(), 128); + assert_eq!(Int::from(i128::MAX).size(), 128); + assert_eq!(Int::from(i128::MIN).size(), 128); + } + + #[test] + fn bool_provenance_preserved() { + // from(true) and from(1) have the same numeric value but different sizes + assert_ne!(Int::from(true), Int::from(1_i32)); + assert_ne!(Int::from(false), Int::from(0_i32)); + } + + #[test] + fn as_bool_only_for_bools() { + assert_eq!(Int::from(true).as_bool(), Some(true)); + assert_eq!(Int::from(false).as_bool(), Some(false)); + + // Integer 1 is NOT a bool + assert_eq!(Int::from(1_i32).as_bool(), None); + assert_eq!(Int::from(0_i32).as_bool(), None); + } + + #[test] + fn as_int_works_for_all() { + assert_eq!(Int::from(42_i64).as_int(), 42); + assert_eq!(Int::from(-1_i128).as_int(), -1); + assert_eq!(Int::from(i128::MAX).as_int(), i128::MAX); + assert_eq!(Int::from(true).as_int(), 1); + assert_eq!(Int::from(false).as_int(), 0); + } + + #[test] + fn as_uint_works_for_all() { + assert_eq!(Int::from(42_i64).as_uint(), 42); + assert_eq!(Int::from(true).as_uint(), 1); + assert_eq!(Int::from(-1_i128).as_uint(), u128::MAX); + } + + #[test] + fn display_bool() { + assert_eq!(format!("{}", Int::from(true)), "true"); + assert_eq!(format!("{}", Int::from(false)), "false"); + } + + #[test] + fn display_int() { + assert_eq!(format!("{}", Int::from(42_i64)), "42"); + assert_eq!(format!("{}", Int::from(-1_i128)), "-1"); + } + + #[test] + fn equality_is_size_aware() { + assert_eq!(Int::from(true), Int::from(true)); + assert_eq!(Int::from(42_i64), Int::from(42_i64)); + assert_ne!(Int::from(true), Int::from(1_i64)); + } + + #[test] + fn ordering_groups_by_size() { + // Bools (size 1) sort before ints (size 128) + assert!(Int::from(true) < Int::from(0_i32)); + } + + #[test] + fn constants() { + assert_eq!(Int::FALSE, Int::from(false)); + assert_eq!(Int::TRUE, Int::from(true)); + assert_eq!(Int::ZERO, Int::from(0_i32)); + assert_eq!(Int::ONE, Int::from(1_i32)); + + // Constants have correct sizes + assert!(Int::FALSE.is_bool()); + assert!(Int::TRUE.is_bool()); + assert!(!Int::ZERO.is_bool()); + assert!(!Int::ONE.is_bool()); + } + + #[test] + fn add_ints() { + let result = Int::from(2_i64) + Int::from(3_i64); + assert!(matches!(result, Numeric::Int(int) if int.as_int() == 5 && int.size() == 128)); + } + + #[test] + fn add_bools_promotes() { + let result = Int::from(true) + Int::from(true); + assert!(matches!(result, Numeric::Int(int) if int.as_int() == 2 && int.size() == 128)); + } + + #[test] + fn sub_ints() { + let result = Int::from(5_i64) - Int::from(3_i64); + assert!(matches!(result, Numeric::Int(int) if int.as_int() == 2 && int.size() == 128)); + } + #[test] fn neg_positive() { - let int = Int::from(42_i64); - let result = -int; - assert!(matches!(result, Numeric::Int(int) if int.as_i64() == Some(-42))); + let result = -Int::from(42_i64); + assert!(matches!(result, Numeric::Int(int) if int.as_int() == -42)); } #[test] fn neg_negative() { - let int = Int::from(-100_i64); - let result = -int; - assert!(matches!(result, Numeric::Int(int) if int.as_i64() == Some(100))); + let result = -Int::from(-100_i64); + assert!(matches!(result, Numeric::Int(int) if int.as_int() == 100)); } #[test] fn neg_zero() { - let int = Int::from(0_i64); - let result = -int; - assert!(matches!(result, Numeric::Int(int) if int.as_i64() == Some(0))); + let result = -Int::from(0_i64); + assert!(matches!(result, Numeric::Int(int) if int.as_int() == 0)); } #[test] fn neg_i128_max() { - let int = Int::from(i128::MAX); - let result = -int; + let result = -Int::from(i128::MAX); assert!(matches!(result, Numeric::Int(int) if int.as_int() == -i128::MAX)); } #[test] fn neg_i128_min_overflows_to_float() { - let int = Int::from(i128::MIN); - let result = -int; - + let result = -Int::from(i128::MIN); let Numeric::Num(num) = result else { panic!("expected Numeric::Num for -i128::MIN, got {result:?}"); }; - let expected = -(i128::MIN as f64); assert_eq!(num.as_f64(), expected); } + + #[test] + fn bitand_bools_stays_bool() { + let result = Int::from(true) & Int::from(false); + assert_eq!(result.size(), 1); + assert_eq!(result.as_bool(), Some(false)); + } + + #[test] + fn bitor_bools_stays_bool() { + let result = Int::from(false) | Int::from(true); + assert_eq!(result.size(), 1); + assert_eq!(result.as_bool(), Some(true)); + } + + #[test] + fn bitxor_bools_stays_bool() { + let result = Int::from(true) ^ Int::from(true); + assert_eq!(result.size(), 1); + assert_eq!(result.as_bool(), Some(false)); + } + + #[test] + fn bitand_mixed_promotes_to_int() { + let result = Int::from(true) & Int::from(1_i32); + assert_eq!(result.size(), 128); + assert_eq!(result.as_int(), 1); + } + + #[test] + fn not_bool() { + assert_eq!(!Int::from(true), Int::from(false)); + assert_eq!(!Int::from(false), Int::from(true)); + } + + #[test] + fn try_from_primitive_bool() { + use hashql_core::value::Primitive; + + let int = Int::try_from(Primitive::Boolean(true)).expect("should be able to convert bool"); + assert_eq!(int.size(), 1); + assert_eq!(int.as_bool(), Some(true)); + } + + #[test] + fn try_from_primitive_integer() { + use hashql_core::{ + heap::Heap, + value::{Integer, Primitive}, + }; + + let heap = Heap::new(); + let integer = Integer::new_unchecked(heap.intern_symbol("42")); + let int = + Int::try_from(Primitive::Integer(integer)).expect("should be able to convert integer"); + assert_eq!(int.size(), 128); + assert_eq!(int.as_int(), 42); + } } diff --git a/libs/@local/hashql/mir/src/interpret/value/list.rs b/libs/@local/hashql/mir/src/interpret/value/list.rs index 50093304881..ce457e1ad3f 100644 --- a/libs/@local/hashql/mir/src/interpret/value/list.rs +++ b/libs/@local/hashql/mir/src/interpret/value/list.rs @@ -42,7 +42,7 @@ impl<'heap, A: Allocator> List<'heap, A> { /// Returns a reference to the element at the given `index`. #[must_use] pub fn get(&self, index: Int) -> Option<&Value<'heap, A>> { - let index = index.as_isize()?; + let index = isize::try_from(index.as_int()).ok()?; if index.is_negative() { let abs = index.unsigned_abs(); @@ -63,7 +63,7 @@ impl<'heap, A: Allocator> List<'heap, A> { where A: Clone, { - let index = index.as_isize()?; + let index = isize::try_from(index.as_int()).ok()?; if index.is_negative() { let abs = index.unsigned_abs(); diff --git a/libs/@local/hashql/mir/src/interpret/value/mod.rs b/libs/@local/hashql/mir/src/interpret/value/mod.rs index 9b22a5fea1c..30c1ad42eb5 100644 --- a/libs/@local/hashql/mir/src/interpret/value/mod.rs +++ b/libs/@local/hashql/mir/src/interpret/value/mod.rs @@ -55,7 +55,7 @@ pub use self::{ opaque::Opaque, ptr::Ptr, str::Str, - r#struct::Struct, + r#struct::{Struct, StructBuilder}, tuple::Tuple, }; use super::error::{RuntimeError, TypeName}; @@ -125,7 +125,7 @@ pub enum Value<'heap, A: Allocator = Global> { impl<'heap, A: Allocator> Value<'heap, A> { const UNIT: Self = Self::Unit; - pub(crate) fn type_name(&self) -> ValueTypeName<'_, 'heap, A> { + pub fn type_name(&self) -> ValueTypeName<'_, 'heap, A> { ValueTypeName::from(self) } @@ -154,10 +154,10 @@ impl<'heap, A: Allocator> Value<'heap, A> { /// Returns an error if this value is not subscriptable (not a list or dict), /// or if the index type is invalid for the collection type. #[inline] - pub fn subscript<'this, 'index>( + pub fn subscript<'this, 'index, E>( &'this self, index: &'index Self, - ) -> Result<&'this Self, RuntimeError<'heap, A>> { + ) -> Result<&'this Self, RuntimeError<'heap, E, A>> { match self { Self::List(list) if let &Self::Integer(value) = index => { Ok(list.get(value).unwrap_or(&Self::UNIT)) @@ -189,10 +189,10 @@ impl<'heap, A: Allocator> Value<'heap, A> { /// /// Returns an error if this value is not subscriptable, if the index type /// is invalid, or if a list index is out of bounds. - pub fn subscript_mut<'this>( + pub fn subscript_mut<'this, E>( &'this mut self, index: &Self, - ) -> Result<&'this mut Self, RuntimeError<'heap, A>> + ) -> Result<&'this mut Self, RuntimeError<'heap, E, A>> where A: Clone, { @@ -232,11 +232,12 @@ impl<'heap, A: Allocator> Value<'heap, A> { /// /// Returns an error if this value is not projectable or the field index is invalid. #[inline] - pub fn project<'this>( + pub fn project<'this, E>( &'this self, index: FieldIndex, - ) -> Result<&'this Self, RuntimeError<'heap, A>> { + ) -> Result<&'this Self, RuntimeError<'heap, E, A>> { match self { + Self::Opaque(opaque) => opaque.value().project(index), Self::Struct(r#struct) => { r#struct .get_by_index(index) @@ -254,7 +255,6 @@ impl<'heap, A: Allocator> Value<'heap, A> { | Self::Number(_) | Self::String(_) | Self::Pointer(_) - | Self::Opaque(_) | Self::List(_) | Self::Dict(_) => Err(RuntimeError::InvalidProjectionType { base: self.type_name().into(), @@ -269,10 +269,10 @@ impl<'heap, A: Allocator> Value<'heap, A> { /// # Errors /// /// Returns an error if this value is not projectable or the field index is invalid. - pub fn project_mut<'this>( + pub fn project_mut<'this, E>( &'this mut self, index: FieldIndex, - ) -> Result<&'this mut Self, RuntimeError<'heap, A>> + ) -> Result<&'this mut Self, RuntimeError<'heap, E, A>> where A: Clone, { @@ -293,12 +293,12 @@ impl<'heap, A: Allocator> Value<'heap, A> { base: TypeName::terse(terse_name), field: index, }), + Self::Opaque(opaque) => opaque.value_mut().project_mut(index), Self::Unit | Self::Integer(_) | Self::Number(_) | Self::String(_) | Self::Pointer(_) - | Self::Opaque(_) | Self::List(_) | Self::Dict(_) => Err(RuntimeError::InvalidProjectionType { base: self.type_name().into(), @@ -313,22 +313,31 @@ impl<'heap, A: Allocator> Value<'heap, A> { /// # Errors /// /// Returns an error if this value is not a struct or the field name is not found. - pub fn project_by_name<'this>( + pub fn project_by_name<'this, E>( &'this self, index: Symbol<'heap>, - ) -> Result<&'this Self, RuntimeError<'heap, A>> { - let Self::Struct(r#struct) = self else { - return Err(RuntimeError::InvalidProjectionByNameType { - base: self.type_name().into(), - }); - }; - - r#struct - .get_by_name(index) - .ok_or_else(|| RuntimeError::UnknownFieldByName { + ) -> Result<&'this Self, RuntimeError<'heap, E, A>> { + match self { + Value::Opaque(opaque) => opaque.value().project_by_name(index), + Value::Struct(r#struct) => { + r#struct + .get_by_name(index) + .ok_or_else(|| RuntimeError::UnknownFieldByName { + base: self.type_name().into(), + field: index, + }) + } + Value::Unit + | Value::Integer(_) + | Value::Number(_) + | Value::String(_) + | Value::Pointer(_) + | Value::Tuple(_) + | Value::List(_) + | Value::Dict(_) => Err(RuntimeError::InvalidProjectionByNameType { base: self.type_name().into(), - field: index, - }) + }), + } } /// Mutably projects a field from this value by name. @@ -338,28 +347,37 @@ impl<'heap, A: Allocator> Value<'heap, A> { /// # Errors /// /// Returns an error if this value is not a struct or the field name is not found. - pub fn project_by_name_mut<'this>( + pub fn project_by_name_mut<'this, E>( &'this mut self, index: Symbol<'heap>, - ) -> Result<&'this mut Self, RuntimeError<'heap, A>> + ) -> Result<&'this mut Self, RuntimeError<'heap, E, A>> where A: Clone, { let terse_name = self.type_name_terse(); - let Self::Struct(r#struct) = self else { - return Err(RuntimeError::InvalidProjectionByNameType { - base: self.type_name().into(), - }); - }; + match self { + Value::Opaque(opaque) => opaque.value_mut().project_by_name_mut(index), + Value::Struct(r#struct) => { + if let Some(value) = r#struct.get_by_name_mut(index) { + return Ok(value); + } - if let Some(value) = r#struct.get_by_name_mut(index) { - return Ok(value); + Err(RuntimeError::UnknownFieldByName { + base: TypeName::terse(terse_name), + field: index, + }) + } + Value::Unit + | Value::Integer(_) + | Value::Number(_) + | Value::String(_) + | Value::Pointer(_) + | Value::Tuple(_) + | Value::List(_) + | Value::Dict(_) => Err(RuntimeError::InvalidProjectionByNameType { + base: self.type_name().into(), + }), } - - Err(RuntimeError::UnknownFieldByName { - base: TypeName::terse(terse_name), - field: index, - }) } } diff --git a/libs/@local/hashql/mir/src/interpret/value/opaque.rs b/libs/@local/hashql/mir/src/interpret/value/opaque.rs index 1b746ac10e6..dfbae62c0af 100644 --- a/libs/@local/hashql/mir/src/interpret/value/opaque.rs +++ b/libs/@local/hashql/mir/src/interpret/value/opaque.rs @@ -44,6 +44,14 @@ impl<'heap, A: Allocator> Opaque<'heap, A> { &self.value } + #[must_use] + pub fn value_mut(&mut self) -> &mut Value<'heap, A> + where + A: Clone, + { + Rc::make_mut(&mut self.value) + } + /// Returns a displayable representation of this opaque type's name. pub fn type_name(&self) -> impl Display { fmt::from_fn(|fmt| { diff --git a/libs/@local/hashql/mir/src/interpret/value/str.rs b/libs/@local/hashql/mir/src/interpret/value/str.rs index 9dcd6357e67..ee211df869d 100644 --- a/libs/@local/hashql/mir/src/interpret/value/str.rs +++ b/libs/@local/hashql/mir/src/interpret/value/str.rs @@ -1,12 +1,12 @@ //! String representation for the MIR interpreter. use alloc::{alloc::Global, rc::Rc}; -use core::{alloc::Allocator, cmp}; +use core::{alloc::Allocator, cmp, fmt}; use hashql_core::{symbol::Symbol, value::String}; /// Internal storage for string values. -#[derive(Debug, Clone)] +#[derive(Clone)] enum StrInner<'heap, A: Allocator> { Owned(Rc), Interned(Symbol<'heap>), @@ -46,7 +46,7 @@ impl Ord for StrInner<'_, A> { /// Supports both owned strings (via [`Rc`]) and borrowed interned /// symbols. This dual representation allows efficient handling of both /// dynamically created strings and compile-time literals. -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct Str<'heap, A: Allocator = Global> { inner: StrInner<'heap, A>, } @@ -89,6 +89,20 @@ impl<'heap, A: Allocator> From<&String<'heap>> for Str<'heap, A> { } } +impl From> for Str<'_, A> { + fn from(value: Rc) -> Self { + Self { + inner: StrInner::Owned(value), + } + } +} + +impl core::fmt::Debug for Str<'_, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Str").field(&self.as_str()).finish() + } +} + impl PartialEq for Str<'_, A> { fn eq(&self, other: &Self) -> bool { let Self { inner } = self; diff --git a/libs/@local/hashql/mir/src/interpret/value/struct.rs b/libs/@local/hashql/mir/src/interpret/value/struct.rs index 6475652ef4b..4e265b00983 100644 --- a/libs/@local/hashql/mir/src/interpret/value/struct.rs +++ b/libs/@local/hashql/mir/src/interpret/value/struct.rs @@ -5,12 +5,14 @@ use core::{ alloc::Allocator, cmp, fmt::{self, Display}, + mem::MaybeUninit, + ptr, }; -use hashql_core::{id::Id as _, intern::Interned, symbol::Symbol}; +use hashql_core::{algorithms::co_sort, id::Id as _, intern::Interned, symbol::Symbol}; use super::Value; -use crate::body::place::FieldIndex; +use crate::{body::place::FieldIndex, intern::Interner}; /// A named-field struct value. /// @@ -36,6 +38,7 @@ impl<'heap, A: Allocator> Struct<'heap, A> { values: Rc<[Value<'heap, A>], A>, ) -> Self { debug_assert_eq!(fields.len(), values.len()); + debug_assert!(fields.is_sorted()); Self { fields, values } } @@ -205,3 +208,357 @@ impl DoubleEndedIterator for StructIter<'_, '_, A> { } impl ExactSizeIterator for StructIter<'_, '_, A> {} + +/// A builder for [`Struct`] values with capacity for `N` fields. +pub struct StructBuilder<'heap, A: Allocator, const N: usize> { + /// Number of initialized field-value pairs. Only elements in + /// `[..initialized]` are considered live for dropping. + initialized: usize, + + fields: [MaybeUninit>; N], + values: [MaybeUninit>; N], +} + +#[expect(unsafe_code)] +impl<'heap, A: Allocator, const N: usize> StructBuilder<'heap, A, N> { + /// Creates an empty builder with capacity for `N` fields. + #[must_use] + pub const fn new() -> Self { + Self { + initialized: 0, + fields: MaybeUninit::uninit().transpose(), + values: MaybeUninit::uninit().transpose(), + } + } + + /// Returns the field names pushed so far. + #[must_use] + pub fn fields(&self) -> &[Symbol<'heap>] { + // SAFETY: `fields[..initialized]` is fully initialized by invariant. + unsafe { self.fields[..self.initialized].assume_init_ref() } + } + + /// Returns the field values pushed so far. + #[must_use] + pub fn values(&self) -> &[Value<'heap, A>] { + // SAFETY: `values[..initialized]` is fully initialized by invariant. + unsafe { self.values[..self.initialized].assume_init_ref() } + } + + /// Returns the number of fields pushed so far. + #[must_use] + pub const fn len(&self) -> usize { + self.initialized + } + + /// Returns `true` if no fields have been pushed. + #[must_use] + pub const fn is_empty(&self) -> bool { + self.initialized == 0 + } + + /// Pushes a field-value pair without checking capacity or uniqueness. + /// + /// # Safety + /// + /// The caller must ensure that `self.initialized < N` (the builder is not full), + /// and that `field` has not already been pushed. + pub const unsafe fn push_unchecked(&mut self, field: Symbol<'heap>, value: Value<'heap, A>) { + // Both `MaybeUninit::write` calls complete without panicking, so + // incrementing `initialized` afterwards preserves the invariant. + self.fields[self.initialized].write(field); + self.values[self.initialized].write(value); + + self.initialized += 1; + } + + /// Pushes a field-value pair. + /// + /// # Panics + /// + /// - If the builder is full (`initialized == N`) + /// - If `field` has already been pushed + pub fn push(&mut self, field: Symbol<'heap>, value: Value<'heap, A>) { + assert_ne!(self.initialized, N, "struct is full"); + assert!(!self.fields().contains(&field), "field already exists"); + + // SAFETY: we just asserted `initialized < N`. + unsafe { + self.push_unchecked(field, value); + } + } + + /// Consumes the builder and produces a [`Struct`]. + pub fn finish(mut self, interner: &Interner<'heap>, alloc: A) -> Struct<'heap, A> { + // SAFETY: `fields[..initialized]` is fully initialized by invariant. + let fields_mut = unsafe { self.fields[..self.initialized].assume_init_mut() }; + // SAFETY: `values[..initialized]` is fully initialized by invariant. + let values_mut = unsafe { self.values[..self.initialized].assume_init_mut() }; + + // The `Struct` expects that fields are sorted by their symbol. + // `co_sort` only swaps elements in-place and never leaves holes, so the + // initialization invariant is preserved even if it were to unwind. + co_sort(fields_mut, values_mut); + + let fields = interner.symbols.intern_slice(self.fields()); + + // Allocate an uninitialized Rc slice for the values. + // + // No drop guard is needed here because: + // - Any panic before `copy_nonoverlapping` leaves ownership with `self`, and + // `self.initialized` is unchanged, so `Drop` frees everything. + // - There is no panicking operation between `copy_nonoverlapping` and `self.initialized = + // 0`. + let mut values = Rc::new_uninit_slice_in(self.initialized, alloc); + + // SAFETY: `values` was just created so the refcount is 1 and no other references exist. + let destination = unsafe { Rc::get_mut_unchecked(&mut values) }; + + // SAFETY: we copy exactly `self.initialized` initialized elements from + // the builder's stack array into the Rc allocation. The source and + // destination do not overlap (stack vs heap). + unsafe { + ptr::copy_nonoverlapping( + self.values.as_ptr(), + destination.as_mut_ptr(), + self.initialized, + ); + }; + + // Ownership of the values has been moved into the Rc via bitwise copy. + // We must clear the drop frontier so `Drop` does not double-free them. + self.initialized = 0; + + // SAFETY: all elements in the Rc slice were initialized by the + // `copy_nonoverlapping` above. + let values = unsafe { values.assume_init() }; + + Struct { fields, values } + } +} + +impl Default for StructBuilder<'_, A, N> { + fn default() -> Self { + Self::new() + } +} + +#[expect(unsafe_code)] +impl Drop for StructBuilder<'_, A, N> { + fn drop(&mut self) { + // SAFETY: by invariant, `[..initialized]` is fully initialized. + // After `finish()` sets `initialized = 0`, this is a no-op. + unsafe { + self.fields[..self.initialized].assume_init_drop(); + self.values[..self.initialized].assume_init_drop(); + } + } +} + +#[cfg(test)] +mod tests { + use alloc::alloc::Global; + + use hashql_core::heap::Heap; + + use super::*; + use crate::interpret::value::{Int, Str, Value}; + + fn int(value: i128) -> Value<'static> { + Value::Integer(Int::from(value)) + } + + fn string(value: &str) -> Value<'static> { + Value::String(Str::from(Rc::::from(value))) + } + + #[test] + fn finish_produces_sorted_fields() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + + let sym_b = heap.intern_symbol("b"); + let sym_a = heap.intern_symbol("a"); + + // Push in reverse order; finish must sort by symbol. + let mut builder = StructBuilder::<'_, Global, 2>::new(); + builder.push(sym_b, int(2)); + builder.push(sym_a, int(1)); + + let result = builder.finish(&interner, Global); + + // Fields should be sorted: a before b. + assert_eq!(result.fields().len(), 2); + assert_eq!(result.get_by_name(sym_a), Some(&int(1))); + assert_eq!(result.get_by_name(sym_b), Some(&int(2))); + } + + #[test] + fn finish_empty_builder() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + + let builder = StructBuilder::<'_, Global, 0>::new(); + let result = builder.finish(&interner, Global); + + assert!(result.is_empty()); + assert_eq!(result.len(), 0); + } + + #[test] + fn finish_single_field() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + + let sym = heap.intern_symbol("only"); + + let mut builder = StructBuilder::<'_, Global, 1>::new(); + builder.push(sym, int(42)); + + let result = builder.finish(&interner, Global); + + assert_eq!(result.len(), 1); + assert_eq!(result.get_by_name(sym), Some(&int(42))); + } + + #[test] + fn drop_partial_builder_no_double_free() { + let heap = Heap::new(); + + let sym_x = heap.intern_symbol("x"); + let sym_y = heap.intern_symbol("y"); + + // Push values with Drop (String contains Rc), then drop the + // builder without finishing. Miri detects double-free or leak. + let mut builder = StructBuilder::<'_, Global, 3>::new(); + builder.push(sym_x, string("hello")); + builder.push(sym_y, string("world")); + // Capacity is 3 but only 2 are filled; drop must handle this. + drop(builder); + } + + #[test] + fn drop_empty_builder() { + // Zero initialized elements; Drop should be a no-op. + let _builder = StructBuilder::<'_, Global, 4>::new(); + } + + #[test] + fn drop_full_builder_without_finish() { + let heap = Heap::new(); + + let sym_a = heap.intern_symbol("a"); + let sym_b = heap.intern_symbol("b"); + + let mut builder = StructBuilder::<'_, Global, 2>::new(); + builder.push(sym_a, string("val_a")); + builder.push(sym_b, string("val_b")); + // Full but never finished; Drop must free both. + drop(builder); + } + + #[test] + fn finish_with_drop_values_no_double_free() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + + let sym_a = heap.intern_symbol("a"); + let sym_b = heap.intern_symbol("b"); + + let mut builder = StructBuilder::<'_, Global, 2>::new(); + builder.push(sym_a, string("alpha")); + builder.push(sym_b, string("beta")); + + // finish moves values into Rc; builder Drop must not re-drop them. + let result = builder.finish(&interner, Global); + + assert_eq!(result.len(), 2); + // Verify values survived the move. + let Value::String(ref value) = *result.get_by_name(sym_a).expect("field should exist") + else { + panic!("expected String"); + }; + assert_eq!(value.as_str(), "alpha"); + } + + #[test] + fn finish_sorts_drop_values_correctly() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + + // Create symbols that sort in a known order. + let sym_c = heap.intern_symbol("c"); + let sym_a = heap.intern_symbol("a"); + let sym_b = heap.intern_symbol("b"); + + // Push in c, a, b order. + let mut builder = StructBuilder::<'_, Global, 3>::new(); + builder.push(sym_c, string("charlie")); + builder.push(sym_a, string("alpha")); + builder.push(sym_b, string("bravo")); + + let result = builder.finish(&interner, Global); + + // After sorting: a, b, c. + let pairs: Vec<_> = result.iter().collect(); + assert_eq!(pairs.len(), 3); + assert_eq!(pairs[0].0, sym_a); + assert_eq!(pairs[1].0, sym_b); + assert_eq!(pairs[2].0, sym_c); + + // Values must follow their fields. + let Value::String(ref value) = *pairs[0].1 else { + panic!("expected String"); + }; + assert_eq!(value.as_str(), "alpha"); + } + + #[test] + fn fields_and_values_reflect_push_count() { + let heap = Heap::new(); + + let sym_a = heap.intern_symbol("a"); + let sym_b = heap.intern_symbol("b"); + + let mut builder = StructBuilder::<'_, Global, 3>::new(); + assert!(builder.is_empty()); + assert_eq!(builder.len(), 0); + assert!(builder.fields().is_empty()); + assert!(builder.values().is_empty()); + + builder.push(sym_a, int(1)); + assert_eq!(builder.len(), 1); + assert_eq!(builder.fields(), &[sym_a]); + assert_eq!(builder.values(), &[int(1)]); + + builder.push(sym_b, int(2)); + assert_eq!(builder.len(), 2); + } + + #[test] + #[should_panic(expected = "struct is full")] + fn push_panics_when_full() { + let heap = Heap::new(); + + let sym_a = heap.intern_symbol("a"); + let sym_b = heap.intern_symbol("b"); + + let mut builder = StructBuilder::<'_, Global, 1>::new(); + builder.push(sym_a, int(1)); + // This must panic, and the builder's Drop must still free sym_a's value. + builder.push(sym_b, int(2)); + } + + #[test] + #[should_panic(expected = "field already exists")] + fn push_panics_on_duplicate_field() { + let heap = Heap::new(); + + let sym = heap.intern_symbol("dup"); + + let mut builder = StructBuilder::<'_, Global, 2>::new(); + builder.push(sym, string("first")); + // This must panic. "first" must still be freed by Drop. + builder.push(sym, string("second")); + } +} diff --git a/libs/@local/hashql/mir/src/lib.rs b/libs/@local/hashql/mir/src/lib.rs index 744d289dd47..dcbb5b2831a 100644 --- a/libs/@local/hashql/mir/src/lib.rs +++ b/libs/@local/hashql/mir/src/lib.rs @@ -28,6 +28,7 @@ temporary_niche_types, try_trait_v2, variant_count, + maybe_uninit_uninit_array_transpose )] #![cfg_attr(test, feature( // Library Features diff --git a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs index e3e661d377b..c3c4f6a54dc 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/graph/mod.rs @@ -279,6 +279,12 @@ impl IslandGraph { .filter(move |node| node.data.target == target) .map(|node| (IslandId::new(node.id().as_u32()), &node.data)) } + + pub fn lookup(&self, block: BasicBlockId) -> (IslandId, &IslandNode) { + let id = self.lookup[block]; + + (id, &self[id]) + } } impl DirectedGraph for IslandGraph { diff --git a/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs index 26099f9396d..143842e4498 100644 --- a/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/island/schedule/mod.rs @@ -84,8 +84,9 @@ impl IslandGraph { /// strictly lower levels. Islands at the same level have no direct dependencies and /// can execute concurrently. #[expect(clippy::cast_possible_truncation)] - pub fn schedule_in(&self, scratch: S, alloc: A) -> IslandSchedule + pub fn schedule_in(&self, scratch: S, alloc: B) -> IslandSchedule where + B: Allocator, S: Allocator + Clone, { let node_count = self.node_count(); diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs index 2e9b2f77646..05078445d0b 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/entity.rs @@ -6,6 +6,7 @@ use hashql_core::{ bit_vec::{BitRelations as _, FiniteBitSet}, }, symbol::{ConstantSymbol, Symbol, sym}, + r#type::{TypeBuilder, TypeId, environment::Environment}, }; use super::{ @@ -229,6 +230,127 @@ impl EntityPath { } } + /// The sequence of struct field names from the entity root to this path's position + /// in the type hierarchy. + /// + /// Each element corresponds to a field name in a nested struct. For example, + /// [`WebId`](Self::WebId) returns `[metadata, record_id, entity_id, web_id]`, + /// meaning the resolution walks: `Entity` → `metadata` field → `EntityMetadata` → + /// `record_id` field → `EntityRecordId` → `entity_id` field → `EntityId` → `web_id` field. + /// + /// Used by [`Self::resolve_type`] to navigate the entity type structure. + #[must_use] + pub const fn field_path(self) -> &'static [Symbol<'static>] { + match self { + Self::Properties => &[sym::properties], + Self::Vectors => &[sym::encodings, sym::vectors], + Self::RecordId => &[sym::metadata, sym::record_id], + Self::EntityId => &[sym::metadata, sym::record_id, sym::entity_id], + Self::WebId => &[sym::metadata, sym::record_id, sym::entity_id, sym::web_id], + Self::EntityUuid => &[ + sym::metadata, + sym::record_id, + sym::entity_id, + sym::entity_uuid, + ], + Self::DraftId => &[sym::metadata, sym::record_id, sym::entity_id, sym::draft_id], + Self::EditionId => &[sym::metadata, sym::record_id, sym::edition_id], + Self::TemporalVersioning => &[sym::metadata, sym::temporal_versioning], + Self::DecisionTime => &[sym::metadata, sym::temporal_versioning, sym::decision_time], + Self::TransactionTime => &[ + sym::metadata, + sym::temporal_versioning, + sym::transaction_time, + ], + Self::EntityTypeIds => &[sym::metadata, sym::entity_type_ids], + Self::Archived => &[sym::metadata, sym::archived], + Self::Confidence => &[sym::metadata, sym::confidence], + Self::ProvenanceInferred => &[sym::metadata, sym::provenance, sym::inferred], + Self::ProvenanceEdition => &[sym::metadata, sym::provenance, sym::edition], + Self::PropertyMetadata => &[sym::metadata, sym::properties], + Self::LeftEntityWebId => &[sym::link_data, sym::left_entity_id, sym::web_id], + Self::LeftEntityUuid => &[sym::link_data, sym::left_entity_id, sym::entity_uuid], + Self::RightEntityWebId => &[sym::link_data, sym::right_entity_id, sym::web_id], + Self::RightEntityUuid => &[sym::link_data, sym::right_entity_id, sym::entity_uuid], + Self::LeftEntityConfidence => &[sym::link_data, sym::left_entity_confidence], + Self::RightEntityConfidence => &[sym::link_data, sym::right_entity_confidence], + Self::LeftEntityProvenance => &[sym::link_data, sym::left_entity_provenance], + Self::RightEntityProvenance => &[sym::link_data, sym::right_entity_provenance], + } + } + + /// Returns the type of this path. + /// + /// Every path except [`Properties`](Self::Properties) has a fixed type that does not + /// depend on the entity being queried. This method is a convenience for callers that + /// know the path is not `Properties`. + /// + /// # Panics + /// + /// Panics if called on [`Properties`](Self::Properties), which has no fixed type. + pub fn expect_type(self, env: &Environment<'_>) -> TypeId { + self.resolve_type(env) + .expect("called `expect_type` on `Properties`, which has no fixed type") + } + + /// Returns the type of this path, or `None` for [`Properties`](Self::Properties). + /// + /// Every path except `Properties` has a fixed type determined by the entity schema — + /// it doesn't depend on which `Entity` is being queried. `Properties` returns `None` + /// because its type is the generic `T` parameter, which varies per entity type. + /// + /// Types are constructed from the canonical factory functions in the standard library, + /// ensuring they match the definitions registered by the module system. + pub fn resolve_type(self, env: &Environment<'_>) -> Option { + use hashql_core::module::std_lib::{ + core::option::types as option, + graph::{ + temporal::types as temporal, + types::{ + knowledge::entity::types as entity, ontology::types as ontology, + principal::actor_group::web::types as web, + }, + }, + }; + + let ty = TypeBuilder::synthetic(env); + + let r#type = match self { + Self::Properties => return None, + Self::Vectors => ty.unknown(), + Self::RecordId => entity::record_id(&ty, None), + Self::EntityId => entity::entity_id(&ty, None), + Self::WebId | Self::LeftEntityWebId | Self::RightEntityWebId => web::web_id(&ty, None), + Self::EntityUuid | Self::LeftEntityUuid | Self::RightEntityUuid => { + entity::entity_uuid(&ty, None) + } + Self::DraftId => entity::draft_id(&ty, None), + Self::EditionId => entity::entity_edition_id(&ty, None), + Self::TemporalVersioning => entity::temporal_metadata(&ty, None), + Self::DecisionTime => { + let interval = temporal::left_closed_temporal_interval(&ty); + temporal::decision_time(&ty, interval) + } + Self::TransactionTime => { + let interval = temporal::left_closed_temporal_interval(&ty); + temporal::transaction_time(&ty, interval) + } + Self::EntityTypeIds => ty.list(ontology::versioned_url(&ty, None)), + Self::Archived => ty.boolean(), + Self::Confidence | Self::LeftEntityConfidence | Self::RightEntityConfidence => { + option::option(&ty, entity::confidence(&ty)) + } + Self::ProvenanceInferred => entity::inferred_entity_provenance(&ty), + Self::ProvenanceEdition => entity::entity_edition_provenance(&ty), + Self::PropertyMetadata => entity::property_object_metadata(&ty), + Self::LeftEntityProvenance | Self::RightEntityProvenance => { + entity::property_provenance(&ty) + } + }; + + Some(r#type) + } + /// Returns the set of execution targets that natively serve this path. pub(crate) const fn origin(self) -> TargetBitSet { let mut set = TargetBitSet::new_empty(TargetId::VARIANT_COUNT_U32); diff --git a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs index f72b02e0e75..37b5f21404f 100644 --- a/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs +++ b/libs/@local/hashql/mir/src/pass/execution/traversal/mod.rs @@ -19,7 +19,11 @@ mod analysis; mod tests; pub(crate) use analysis::{TraversalAnalysisVisitor, TraversalResult}; -use hashql_core::{id::IdArray, symbol::Symbol}; +use hashql_core::{ + id::IdArray, + symbol::Symbol, + r#type::{TypeId, environment::Environment}, +}; pub use self::entity::{EntityPath, EntityPathBitSet}; pub(crate) use self::{access::Access, entity::TransferCostConfig}; @@ -241,6 +245,19 @@ impl TraversalPath { } } + /// Returns the type of this path. + /// + /// Most paths have a fixed type determined by the graph schema. Paths whose type + /// depends on the specific vertex being queried (e.g. entity properties) return + /// [`None`]. + #[inline] + #[must_use] + pub fn resolve_type(self, env: &Environment<'_>) -> Option { + match self { + Self::Entity(path) => path.resolve_type(env), + } + } + /// Returns the set of execution targets that natively serve this path. #[inline] #[must_use] diff --git a/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs b/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs index cb18f27e390..9081ad56b10 100644 --- a/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/inst_simplify/mod.rs @@ -252,20 +252,19 @@ impl<'heap, A: Allocator> InstSimplifyVisitor<'_, 'heap, A> { /// Evaluates a binary operation on two constant integers. fn eval_bin_op(lhs: Int, op: BinOp, rhs: Int) -> Option { - let lhs = lhs.as_int(); - let rhs = rhs.as_int(); - let result = match op { - BinOp::Add => return lhs.checked_add(rhs).map(Int::from), - BinOp::Sub => return lhs.checked_sub(rhs).map(Int::from), - BinOp::BitAnd => lhs & rhs, - BinOp::BitOr => lhs | rhs, - BinOp::Eq => i128::from(lhs == rhs), - BinOp::Ne => i128::from(lhs != rhs), - BinOp::Lt => i128::from(lhs < rhs), - BinOp::Lte => i128::from(lhs <= rhs), - BinOp::Gt => i128::from(lhs > rhs), - BinOp::Gte => i128::from(lhs >= rhs), + BinOp::Add => return lhs.checked_add(rhs), + BinOp::Sub => return lhs.checked_sub(rhs), + // Bitwise ops preserve bool provenance via the Int operators + BinOp::BitAnd => return Some(lhs & rhs), + BinOp::BitOr => return Some(lhs | rhs), + // Comparisons produce booleans + BinOp::Eq => lhs.as_int() == rhs.as_int(), + BinOp::Ne => lhs.as_int() != rhs.as_int(), + BinOp::Lt => lhs.as_int() < rhs.as_int(), + BinOp::Lte => lhs.as_int() <= rhs.as_int(), + BinOp::Gt => lhs.as_int() > rhs.as_int(), + BinOp::Gte => lhs.as_int() >= rhs.as_int(), }; Some(Int::from(result)) @@ -273,21 +272,12 @@ impl<'heap, A: Allocator> InstSimplifyVisitor<'_, 'heap, A> { /// Evaluates a unary operation on a constant integer. fn eval_un_op(op: UnOp, operand: Int) -> Int { - let value = operand.as_int(); - - let result = match op { - UnOp::Not => { - let Some(value) = operand.as_bool() else { - unreachable!("only boolean values can be negated"); - }; - - i128::from(!value) - } - UnOp::Neg => -value, - UnOp::BitNot => !value, - }; - - Int::from(result) + match op { + // Both Not and BitNot use the `!` operator, which dispatches on size: + // booleans get logical NOT, integers get bitwise NOT. + UnOp::Not | UnOp::BitNot => !operand, + UnOp::Neg => Int::from(-operand.as_int()), + } } /// Attempts to simplify a binary operation with a constant left operand and place right diff --git a/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs b/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs index 91c91265a55..46eabd3cbee 100644 --- a/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs +++ b/libs/@local/hashql/mir/src/pass/transform/ssa_repair/mod.rs @@ -547,10 +547,13 @@ impl<'heap> VisitorMut<'heap> for RewireBody<'_, 'heap> { location: Location, params: &mut Interned<'heap, [Local]>, ) -> Self::Result<()> { - // We don't walk the params here, we handle the `Def` site differently in `visit_local`, so - // don't need to set `self.last_def`. + // Block parameters are definitions at the block header. They must be renamed and placed + // on the reaching-definition chain before any uses in the block are visited. This is + // independent of `block_top`: a block can have an existing param for the repaired local + // without being in the IDF (e.g. terminal blocks where the IDF is empty). + Ok(()) = visit::r#mut::walk_params(self, location, params); + let Some(&def) = self.block_top.lookup(location.block) else { - // No `FindDefFromTop` result is required in the body return Ok(()); }; @@ -753,6 +756,19 @@ impl<'heap> Visitor<'heap> for UseBeforeDef { visit::r#ref::walk_statement(self, location, statement) } + fn visit_terminator( + &mut self, + location: Location, + terminator: &crate::body::terminator::Terminator<'heap>, + ) -> Self::Result { + // Same thing applies as in `visit_statement`. + if location.statement_index >= self.def_statement_index { + return ControlFlow::Continue(()); + } + + visit::r#ref::walk_terminator(self, location, terminator) + } + fn visit_statement_assign( &mut self, location: Location, diff --git a/libs/@local/hashql/mir/src/pass/transform/ssa_repair/tests.rs b/libs/@local/hashql/mir/src/pass/transform/ssa_repair/tests.rs index 58f938054d6..c9110755da4 100644 --- a/libs/@local/hashql/mir/src/pass/transform/ssa_repair/tests.rs +++ b/libs/@local/hashql/mir/src/pass/transform/ssa_repair/tests.rs @@ -616,3 +616,83 @@ fn reassign_rodeo() { }, ); } + +/// Regression test for a bug where SSA repair panicked when a local was both +/// a block parameter in a terminal block and assigned in a sibling block. +/// +/// `UseBeforeDef` incorrectly reported a use-before-def in the block-param +/// block (the terminator use was not guarded), and `RewireBody` failed to +/// recognize existing block-param definitions without a `block_top` entry. +#[test] +fn block_param_def_with_sibling_assignment() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // bb0 branches to bb1 or bb2. bb1 assigns x and returns it directly. + // bb2 receives x as a block parameter and returns it. Both blocks are + // terminal, so the IDF of {bb1, bb2} is empty. + let body = body!(interner, env; fn@0/0 -> Int { + decl x: Int, cond: Bool; + + bb0() { + cond = load true; + if cond then bb2() else bb1(0); + }, + bb1(x) { + return x; + }, + bb2() { + x = load 1; + return x; + } + }); + + assert_ssa_pass( + "block_param_def_with_sibling_assignment", + body, + MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + ); +} + +#[test] +fn block_param_def_with_sibling_assignment2() { + let heap = Heap::new(); + let interner = Interner::new(&heap); + let env = Environment::new(&heap); + + // bb0 branches to bb1 or bb2. bb1 assigns x and returns it directly. + // bb2 receives x as a block parameter and returns it. Both blocks are + // terminal, so the IDF of {bb1, bb2} is empty. + let body = body!(interner, env; fn@0/0 -> Int { + decl x: Int, cond: Bool; + + bb0() { + cond = load true; + if cond then bb1() else bb2(0); + }, + bb1() { + x = load 1; + return x; + }, + bb2(x) { + return x; + } + }); + + assert_ssa_pass( + "block_param_def_with_sibling_assignment2", + body, + MirContext { + heap: &heap, + env: &env, + interner: &interner, + diagnostics: DiagnosticIssues::new(), + }, + ); +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/cfg_simplify/noop_block_multiple_predecessors.snap b/libs/@local/hashql/mir/tests/ui/pass/cfg_simplify/noop_block_multiple_predecessors.snap index 0574d53f42a..3e01c57eb61 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/cfg_simplify/noop_block_multiple_predecessors.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/cfg_simplify/noop_block_multiple_predecessors.snap @@ -6,7 +6,7 @@ fn {closure@4294967040}() -> Null { let %0: Boolean bb0(): { - %0 = 1 + %0 = true switchInt(%0) -> [0: bb2(), 1: bb1()] } @@ -34,7 +34,7 @@ fn {closure@4294967040}() -> Null { let %0: Boolean bb0(): { - %0 = 1 + %0 = true switchInt(%0) -> [0: bb2(), 1: bb1()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy.snap b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy.snap index 59e943e3318..88d0220260a 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy.snap @@ -9,7 +9,7 @@ fn {closure@4294967040}() -> Boolean { let %3: Boolean bb0(): { - %1 = 1 + %1 = true switchInt(%1) -> [0: bb2(), 1: bb1()] } @@ -38,9 +38,9 @@ fn {closure@4294967040}() -> Boolean { let %3: Boolean bb0(): { - %1 = 1 + %1 = true - switchInt(1) -> [0: bb2(), 1: bb1()] + switchInt(true) -> [0: bb2(), 1: bb1()] } bb1(): { diff --git a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy_disagreement.snap b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy_disagreement.snap index 860c8f29c6d..1a1c694a316 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy_disagreement.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_copy_disagreement.snap @@ -10,7 +10,7 @@ fn {closure@4294967040}() -> Boolean { let %4: Boolean bb0(): { - %2 = 1 + %2 = true switchInt(%2) -> [0: bb2(), 1: bb1()] } @@ -40,9 +40,9 @@ fn {closure@4294967040}() -> Boolean { let %4: Boolean bb0(): { - %2 = 1 + %2 = true - switchInt(1) -> [0: bb2(), 1: bb1()] + switchInt(true) -> [0: bb2(), 1: bb1()] } bb1(): { diff --git a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_disagreement.snap b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_disagreement.snap index 10ceceb2c04..4a4dfa65892 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_disagreement.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_disagreement.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/transform/cp/tests.rs +source: libs/@local/hashql/mir/src/pass/transform/copy_propagation/tests.rs expression: value --- fn {closure@4294967040}() -> Boolean { @@ -8,7 +8,7 @@ fn {closure@4294967040}() -> Boolean { let %2: Boolean bb0(): { - %0 = 1 + %0 = true switchInt(%0) -> [0: bb2(), 1: bb1()] } @@ -36,9 +36,9 @@ fn {closure@4294967040}() -> Boolean { let %2: Boolean bb0(): { - %0 = 1 + %0 = true - switchInt(1) -> [0: bb2(), 1: bb1()] + switchInt(true) -> [0: bb2(), 1: bb1()] } bb1(): { diff --git a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_unanimous.snap b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_unanimous.snap index 56584f9df79..08fd9cdcc8c 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_unanimous.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/block_param_unanimous.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/transform/cp/tests.rs +source: libs/@local/hashql/mir/src/pass/transform/copy_propagation/tests.rs expression: value --- fn {closure@4294967040}() -> Boolean { @@ -8,7 +8,7 @@ fn {closure@4294967040}() -> Boolean { let %2: Boolean bb0(): { - %0 = 1 + %0 = true switchInt(%0) -> [0: bb2(), 1: bb1()] } @@ -36,9 +36,9 @@ fn {closure@4294967040}() -> Boolean { let %2: Boolean bb0(): { - %0 = 1 + %0 = true - switchInt(1) -> [0: bb2(), 1: bb1()] + switchInt(true) -> [0: bb2(), 1: bb1()] } bb1(): { diff --git a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/loop_back_edge.snap b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/loop_back_edge.snap index 2fe78eb4bbf..be7571e9dc3 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/loop_back_edge.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/copy_propagation/loop_back_edge.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/transform/cp/tests.rs +source: libs/@local/hashql/mir/src/pass/transform/copy_propagation/tests.rs expression: value --- fn {closure@4294967040}() -> Boolean { @@ -8,7 +8,7 @@ fn {closure@4294967040}() -> Boolean { let %2: Boolean bb0(): { - %2 = 1 + %2 = true goto -> bb1(1) } @@ -32,7 +32,7 @@ fn {closure@4294967040}() -> Boolean { let %2: Boolean bb0(): { - %2 = 1 + %2 = true goto -> bb1(1) } @@ -40,7 +40,7 @@ fn {closure@4294967040}() -> Boolean { bb1(%0): { %1 = %0 == %0 - switchInt(1) -> [0: bb2(), 1: bb1(2)] + switchInt(true) -> [0: bb2(), 1: bb1(2)] } bb2(): { diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap index 2192f800648..7d9e9926001 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/entity_uuid_equality.snap @@ -2,7 +2,7 @@ source: libs/@local/hashql/mir/src/pass/execution/tests.rs expression: output --- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %2: Boolean let %3: Uuid let %4: EntityUuid diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_backward_chain.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_backward_chain.snap index 49532f6b102..e24ae2e5856 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_backward_chain.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_backward_chain.snap @@ -8,7 +8,7 @@ fn {closure@4294967040}() -> Integer { let %2: Integer bb0(): { - %0 = 1 + %0 = true switchInt(%0) -> [0: bb2(), 1: bb1()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_diamond_non_monotonic_rpo.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_diamond_non_monotonic_rpo.snap index 0665de1d9fa..a11ef434516 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_diamond_non_monotonic_rpo.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_diamond_non_monotonic_rpo.snap @@ -11,7 +11,7 @@ fn {closure@4294967040}() -> Integer { let %5: Integer bb0(): { - %0 = 1 + %0 = true switchInt(%0) -> [0: bb1(), 1: bb2()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_does_not_fuse_join_points.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_does_not_fuse_join_points.snap index b29f7c070c7..736dae66f33 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_does_not_fuse_join_points.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_does_not_fuse_join_points.snap @@ -9,7 +9,7 @@ fn {closure@4294967040}() -> Integer { let %3: Boolean bb0(): { - %3 = 1 + %3 = true switchInt(%3) -> [0: bb2(), 1: bb1()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_updates_branch_references.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_updates_branch_references.snap index 0236a965f84..a192b9defd8 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_updates_branch_references.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/fusion/fuse_updates_branch_references.snap @@ -10,7 +10,7 @@ fn {closure@4294967040}() -> Integer { bb0(): { %0 = 1 - %3 = 1 + %3 = true switchInt(%3) -> [0: bb2(), 1: bb1()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/splitting/split_block_references_updated.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/splitting/split_block_references_updated.snap index 86f2ee0a201..64d1ff3c3f9 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/splitting/split_block_references_updated.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/splitting/split_block_references_updated.snap @@ -1,5 +1,5 @@ --- -source: libs/@local/hashql/mir/src/pass/analysis/execution/splitting/tests.rs +source: libs/@local/hashql/mir/src/pass/execution/splitting/tests.rs expression: output --- fn {closure@4294967040}() -> Integer { @@ -20,7 +20,7 @@ fn {closure@4294967040}() -> Integer { } bb2(): { - %2 = 1 // IP + %2 = true // IP switchInt(%2) -> [0: bb4(), 1: bb3()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap index bcfb665f330..fb609ea3efe 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/embedding/other_operations_rejected.snap @@ -24,7 +24,7 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { %8 = %0 %9 = closure(({def@123} as FnPtr), %8) %10 = apply %9 1 - %11 = 1 + %11 = true return %11 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap index 49428694348..b9de54f4bb3 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/all_statements_supported.snap @@ -26,7 +26,7 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { %9 = closure(({def@42} as FnPtr), %8) // cost: 8 %10 = apply %9 5 // cost: 8 %11 = input LOAD param // cost: 8 - %12 = 1 // cost: 8 + %12 = true // cost: 8 return %12 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap index 1e9e2dd7f7a..7ff2c1f6db5 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/interpret/eq_opaque_entity_uuid.snap @@ -1,8 +1,9 @@ --- source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs +assertion_line: 92 expression: output --- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %2: Boolean let %3: Uuid let %4: EntityUuid diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap index 2f9c56244ef..1f22a24cd1b 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_closure_rejected.snap @@ -10,7 +10,7 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { bb0(): { %2 = %0.0 // cost: 4 %3 = closure(({def@42} as FnPtr), %2) - %4 = 1 // cost: 4 + %4 = true // cost: 4 return %4 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap index 5fbf3d9d5c2..ca2fefd4973 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/aggregate_tuple_supported.snap @@ -10,7 +10,7 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { bb0(): { %2 = (1, 2) // cost: 4 %3 = (a: 10, b: 20) // cost: 4 - %4 = 1 // cost: 4 + %4 = true // cost: 4 return %4 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap index 4d5a3e85593..8db2da9a59a 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/diamond_must_analysis.snap @@ -11,7 +11,7 @@ fn {graph::read::filter@4294967040}(%0: (Integer,), %1: Entity) -> Boolean { let %7: Boolean bb0(): { - %2 = 1 // cost: 4 + %2 = true // cost: 4 %3 = %0.0 // cost: 4 %4 = (%3) // cost: 4 %5 = closure(({def@77} as FnPtr), %4) diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap index 7d7b5573690..bcceac667eb 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_non_string_key_rejected.snap @@ -8,7 +8,7 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) - bb0(): { %2 = %0.0 - %3 = 1 // cost: 4 + %3 = true // cost: 4 return %3 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap index e43510fd97a..2344681e1dd 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_opaque_string_key_accepted.snap @@ -8,7 +8,7 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> bb0(): { %2 = %0.0 // cost: 4 - %3 = 1 // cost: 4 + %3 = true // cost: 4 return %3 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap index 21b8df95642..7f68af1af40 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/env_dict_string_key_accepted.snap @@ -8,7 +8,7 @@ fn {graph::read::filter@4294967040}(%0: (Dict,), %1: Entity) -> bb0(): { %2 = %0.0 // cost: 4 - %3 = 1 // cost: 4 + %3 = true // cost: 4 return %3 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap index 87cdcc5db7c..6ed6afca2eb 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/eq_opaque_entity_uuid.snap @@ -2,7 +2,7 @@ source: libs/@local/hashql/mir/src/pass/execution/statement_placement/tests.rs expression: output --- -fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { +fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { let %2: Boolean let %3: Uuid let %4: EntityUuid diff --git a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap index ca5104a18ab..2d8920c74a4 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/execution/statement_placement/postgres/fnptr_constant_rejected.snap @@ -8,7 +8,7 @@ fn {graph::read::filter@4294967040}(%0: (), %1: Entity) -> Boolean { bb0(): { %2 = ({def@99} as FnPtr) - %3 = 1 // cost: 4 + %3 = true // cost: 4 return %3 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_predecessors_agree.snap b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_predecessors_agree.snap index 4076cb923c7..44cbdd05195 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_predecessors_agree.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_predecessors_agree.snap @@ -44,7 +44,7 @@ fn {closure@4294967040}(%0: Integer) -> Boolean { } bb3(%1): { - %2 = 1 + %2 = true return %2 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_single_predecessor.snap b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_single_predecessor.snap index ca811c89d69..53ecb4e7b4e 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_single_predecessor.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/block_param_single_predecessor.snap @@ -28,7 +28,7 @@ fn {closure@4294967040}() -> Boolean { } bb1(%0): { - %1 = 1 + %1 = true return %1 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/const_fold_unary_not.snap b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/const_fold_unary_not.snap index 5d9c5c55aac..69cf3ba4f19 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/const_fold_unary_not.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/const_fold_unary_not.snap @@ -6,7 +6,7 @@ fn {closure@4294967040}() -> Boolean { let %0: Boolean bb0(): { - %0 = !1 + %0 = !true return %0 } @@ -18,7 +18,7 @@ fn {closure@4294967040}() -> Boolean { let %0: Boolean bb0(): { - %0 = 0 + %0 = false return %0 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/idempotent_to_const_forwarding.snap b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/idempotent_to_const_forwarding.snap index 7ec35a3e5b1..69766cf5317 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/idempotent_to_const_forwarding.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/inst_simplify/idempotent_to_const_forwarding.snap @@ -26,7 +26,7 @@ fn {closure@4294967040}() -> Boolean { bb0(): { %0 = 42 %1 = 42 - %2 = 1 + %2 = true return %2 } diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/block_param_def_with_sibling_assignment.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/block_param_def_with_sibling_assignment.snap new file mode 100644 index 00000000000..1346588c343 --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/block_param_def_with_sibling_assignment.snap @@ -0,0 +1,48 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/ssa_repair/tests.rs +expression: value +--- +fn {closure@4294967040}() -> Integer { + let %0: Integer + let %1: Boolean + + bb0(): { + %1 = true + + switchInt(%1) -> [0: bb1(0), 1: bb2()] + } + + bb1(%0): { + return %0 + } + + bb2(): { + %0 = 1 + + return %0 + } +} + +================== Changed: Yes ================== + +fn {closure@4294967040}() -> Integer { + let %0: Integer + let %1: Boolean + let %2: Integer + + bb0(): { + %1 = true + + switchInt(%1) -> [0: bb1(0), 1: bb2()] + } + + bb1(%2): { + return %2 + } + + bb2(): { + %0 = 1 + + return %0 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/block_param_def_with_sibling_assignment2.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/block_param_def_with_sibling_assignment2.snap new file mode 100644 index 00000000000..a0bd400e2fb --- /dev/null +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/block_param_def_with_sibling_assignment2.snap @@ -0,0 +1,48 @@ +--- +source: libs/@local/hashql/mir/src/pass/transform/ssa_repair/tests.rs +expression: value +--- +fn {closure@4294967040}() -> Integer { + let %0: Integer + let %1: Boolean + + bb0(): { + %1 = true + + switchInt(%1) -> [0: bb2(0), 1: bb1()] + } + + bb1(): { + %0 = 1 + + return %0 + } + + bb2(%0): { + return %0 + } +} + +================== Changed: Yes ================== + +fn {closure@4294967040}() -> Integer { + let %0: Integer + let %1: Boolean + let %2: Integer + + bb0(): { + %1 = true + + switchInt(%1) -> [0: bb2(0), 1: bb1()] + } + + bb1(): { + %2 = 1 + + return %2 + } + + bb2(%0): { + return %0 + } +} diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_both_branches_define.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_both_branches_define.snap index f7e49fa775e..c9e1d33abec 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_both_branches_define.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_both_branches_define.snap @@ -7,7 +7,7 @@ fn {closure@4294967040}() -> Null { let %1: Boolean bb0(): { - %1 = 1 + %1 = true switchInt(%1) -> [0: bb2(), 1: bb1()] } @@ -41,7 +41,7 @@ fn {closure@4294967040}() -> Null { let %4: Boolean bb0(): { - %4 = 1 + %4 = true switchInt(%4) -> [0: bb2(), 1: bb1()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_one_branch_redefines.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_one_branch_redefines.snap index 70faaa5c3fb..78fed6cad3f 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_one_branch_redefines.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/diamond_one_branch_redefines.snap @@ -7,7 +7,7 @@ fn {closure@4294967040}() -> Null { let %1: Boolean bb0(): { - %1 = 1 + %1 = true %0 = 1 switchInt(%1) -> [0: bb2(), 1: bb1()] @@ -40,7 +40,7 @@ fn {closure@4294967040}() -> Null { let %4: Boolean bb0(): { - %4 = 1 + %4 = true %2 = 1 switchInt(%4) -> [0: bb2(), 1: bb1()] diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/irreducible.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/irreducible.snap index 7e93e1ddf1b..d066eebf180 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/irreducible.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/irreducible.snap @@ -10,8 +10,8 @@ fn {closure@4294967040}() -> Null { bb0(): { %0 = 0 - %2 = 1 - %3 = 1 + %2 = true + %3 = true switchInt(%2) -> [0: bb2(), 1: bb1()] } @@ -53,8 +53,8 @@ fn {closure@4294967040}() -> Null { bb0(): { %4 = 0 - %2 = 1 - %3 = 1 + %2 = true + %3 = true switchInt(%2) -> [0: bb2(%4), 1: bb1(%4)] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/loop_with_conditional_def.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/loop_with_conditional_def.snap index a0cd46b2f06..06591200ed4 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/loop_with_conditional_def.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/loop_with_conditional_def.snap @@ -10,8 +10,8 @@ fn {closure@4294967040}() -> Null { bb0(): { %0 = 0 - %2 = 1 - %3 = 1 + %2 = true + %3 = true goto -> bb1() } @@ -54,8 +54,8 @@ fn {closure@4294967040}() -> Null { bb0(): { %4 = 0 - %2 = 1 - %3 = 1 + %2 = true + %3 = true goto -> bb1(%4) } diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/multiple_variables_violated.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/multiple_variables_violated.snap index a045cd4613c..b2ec7d60aa1 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/multiple_variables_violated.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/multiple_variables_violated.snap @@ -9,7 +9,7 @@ fn {closure@4294967040}() -> Null { let %3: Boolean bb0(): { - %3 = 1 + %3 = true switchInt(%3) -> [0: bb2(), 1: bb1()] } @@ -48,7 +48,7 @@ fn {closure@4294967040}() -> Null { let %7: Integer bb0(): { - %3 = 1 + %3 = true switchInt(%3) -> [0: bb2(), 1: bb1()] } diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/nested_loop.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/nested_loop.snap index d5bf4d89060..a11cc0f6c76 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/nested_loop.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/nested_loop.snap @@ -9,8 +9,8 @@ fn {closure@4294967040}() -> Null { bb0(): { %0 = 0 - %1 = 1 - %2 = 1 + %1 = true + %2 = true goto -> bb1() } @@ -48,8 +48,8 @@ fn {closure@4294967040}() -> Null { bb0(): { %3 = 0 - %6 = 1 - %2 = 1 + %6 = true + %2 = true goto -> bb1(%3) } diff --git a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/simple_loop.snap b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/simple_loop.snap index 8d899c55a1c..cbf7269ff6c 100644 --- a/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/simple_loop.snap +++ b/libs/@local/hashql/mir/tests/ui/pass/ssa_repair/simple_loop.snap @@ -8,7 +8,7 @@ fn {closure@4294967040}() -> Null { bb0(): { %0 = 0 - %1 = 1 + %1 = true goto -> bb1() } @@ -36,7 +36,7 @@ fn {closure@4294967040}() -> Null { bb0(): { %2 = 0 - %4 = 1 + %4 = true goto -> bb1(%2) }