From 92a93d367c24f163a482f2d54d74346a20a49f29 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Sat, 30 May 2026 15:37:47 -0600 Subject: [PATCH 1/2] feat(client): add max_decompressed_size to guard against decompression bombs Closes #1178. A new `max_decompressed_size` request keyword caps how many bytes an auto-decompressed (gzip/deflate) response body may produce; reading past it throws `DecompressionLimitError` before the bomb inflates. `0` (default) keeps the current unbounded behavior. The limiter wraps the decompressor stream, so it fires after at most one extra read chunk (memory stays bounded near the limit) and applies to both the materialized `resp.body` and caller-owned `response_stream` sinks. Co-Authored-By: Claude Opus 4.8 --- src/http_client.jl | 86 +++++++++++++++++++++++++++++++++++---- test/http_client_tests.jl | 43 ++++++++++++++++++++ 2 files changed, 120 insertions(+), 9 deletions(-) diff --git a/src/http_client.jl b/src/http_client.jl index 833641e79..c9bded116 100644 --- a/src/http_client.jl +++ b/src/http_client.jl @@ -1332,19 +1332,84 @@ function Base.close(io::_BodyIO) return nothing end -function _response_body_reader(incoming::_IncomingResponse, decompress::Union{Nothing,Bool})::Tuple{IO,Union{Nothing,Task}} +""" + DecompressionLimitError(limit) + +Thrown when an automatically decompressed response body exceeds the +`max_decompressed_size` byte limit passed to the request. Guards against +decompression bombs — small compressed payloads that inflate to exhaust memory. +""" +struct DecompressionLimitError <: Exception + limit::Int +end + +function Base.showerror(io::IO, err::DecompressionLimitError) + print(io, "DecompressionLimitError: decompressed response body exceeded max_decompressed_size = ", err.limit, " bytes") + return nothing +end + +# IO wrapper that caps how many decompressed bytes may be read from `inner`, +# throwing `DecompressionLimitError` once the running total exceeds `limit`. +# Response bodies are read via chunked `readbytes!`, so the limit fires after at +# most one extra chunk and memory stays bounded near `limit`. +mutable struct _DecompressLimitReader{S<:IO} <: IO + inner::S + limit::Int + seen::Int +end +_DecompressLimitReader(inner::IO, limit::Integer) = _DecompressLimitReader{typeof(inner)}(inner, Int(limit), 0) + +@inline function _count_decompressed!(r::_DecompressLimitReader, n::Integer)::Nothing + r.seen += Int(n) + r.seen > r.limit && throw(DecompressionLimitError(r.limit)) + return nothing +end + +function Base.readbytes!(r::_DecompressLimitReader, dst::AbstractVector{UInt8}, nb::Integer=length(dst))::Int + n = readbytes!(r.inner, dst, nb) + _count_decompressed!(r, n) + return n +end + +function Base.readavailable(r::_DecompressLimitReader)::Vector{UInt8} + chunk = readavailable(r.inner) + _count_decompressed!(r, length(chunk)) + return chunk +end + +function Base.read(r::_DecompressLimitReader)::Vector{UInt8} + out = UInt8[] + buf = Vector{UInt8}(undef, 65536) + while true + n = readbytes!(r.inner, buf, length(buf)) + n == 0 && break + _count_decompressed!(r, n) + append!(out, @view(buf[1:n])) + end + return out +end + +Base.eof(r::_DecompressLimitReader)::Bool = eof(r.inner) +Base.bytesavailable(r::_DecompressLimitReader)::Int = bytesavailable(r.inner) +Base.isopen(r::_DecompressLimitReader)::Bool = isopen(r.inner) +Base.close(r::_DecompressLimitReader) = close(r.inner) + +function _response_body_reader(incoming::_IncomingResponse, decompress::Union{Nothing,Bool}, max_decompressed_size::Int=0)::Tuple{IO,Union{Nothing,Task}} raw_stream = _BodyIO(incoming.rawbody) encoding = _response_content_encoding(incoming.head.headers, decompress) - if encoding == :gzip - return CodecZlib.GzipDecompressorStream(raw_stream), nothing + decompressor = if encoding == :gzip + CodecZlib.GzipDecompressorStream(raw_stream) elseif encoding == :deflate - return CodecZlib.ZlibDecompressorStream(raw_stream), nothing + CodecZlib.ZlibDecompressorStream(raw_stream) + else + return raw_stream, nothing end - return raw_stream, nothing + reader = max_decompressed_size > 0 ? _DecompressLimitReader(decompressor, max_decompressed_size) : decompressor + return reader, nothing end -function _with_response_reader(f::F, incoming::_IncomingResponse, decompress::Union{Nothing,Bool}) where {F} - reader, _ = _response_body_reader(incoming, decompress) +function _with_response_reader(f::F, incoming::_IncomingResponse, decompress::Union{Nothing,Bool}, max_decompressed_size::Int=0) where {F} + reader, _ = _response_body_reader(incoming, decompress, max_decompressed_size) try return f(reader) finally @@ -1365,6 +1430,7 @@ function _consume_incoming_response!( incoming::_IncomingResponse, sink, decompress::Union{Nothing,Bool}, + max_decompressed_size::Int=0, )::Tuple{Any,Int64} if _incoming_response_has_no_body(incoming) || !_should_decompress_response(incoming.head.headers, decompress) try @@ -1388,7 +1454,7 @@ function _consume_incoming_response!( rethrow() end end - return _with_response_reader(incoming, decompress) do reader + return _with_response_reader(incoming, decompress, max_decompressed_size) do reader if sink === nothing body = _read_all_response_bytes(reader) return body, Int64(length(body)) @@ -1706,6 +1772,7 @@ function request( query=nothing, response_stream=nothing, decompress::Union{Nothing,Bool}=nothing, + max_decompressed_size::Integer=0, sse_callback=nothing, client::Union{Nothing,Client}=nothing, context::Union{Nothing,RequestContext}=nothing, @@ -1831,7 +1898,7 @@ function request( return sse_response end end - final_body, final_length = _consume_incoming_response!(incoming, sink, decompress) + final_body, final_length = _consume_incoming_response!(incoming, sink, decompress, Int(max_decompressed_size)) response = _finalize_request_response(incoming, final_body, final_length, resolved_request, parsed.url) final_response = response status_exception && _status_throws(response) && throw(StatusError(response)) @@ -1900,6 +1967,7 @@ Keyword arguments: - `query`: optional query string or key/value collection appended to the URL - `response_stream`: optional sink `IO` or byte buffer written with the final response body - `decompress`: `nothing`/`true` auto-decompress gzip and deflate responses, `false` leaves wire bytes untouched +- `max_decompressed_size`: cap, in bytes, on an auto-decompressed response body; reading past it throws `DecompressionLimitError`, guarding against decompression bombs. `0` (default) disables the limit - `sse_callback`: callback receiving `(event)` or `(stream, event)` for successful SSE responses - `trace`: optional callback receiving request lifecycle events diff --git a/test/http_client_tests.jl b/test/http_client_tests.jl index b3b9b4d29..5faddddbc 100644 --- a/test/http_client_tests.jl +++ b/test/http_client_tests.jl @@ -2337,3 +2337,46 @@ end HT.forceclose(slow_server) end end + +@testset "max_decompressed_size guards against decompression bombs" begin + # ~4 MB of zeros compresses to a few KB of gzip — a small "bomb". + big = zeros(UInt8, 4_000_000) + gz = transcode(HTTP.CodecZlib.GzipCompressor, big) + @test length(gz) < 100_000 # confirm the payload really is small on the wire + server = HT.serve!("127.0.0.1", 0; listenany = true) do req + return HT.Response(200; headers = ["Content-Encoding" => "gzip"], body = gz) + end + try + base = "http://127.0.0.1:$(HT.port(server))/" + + # No limit (default): the full body decompresses. + r = HT.get(base) + @test length(r.body) == length(big) + + # Limit below the decompressed size: rejected before the bomb inflates. + err = try + HT.get(base; max_decompressed_size = 1024) + nothing + catch e + e + end + @test err isa HTTP.DecompressionLimitError + err isa HTTP.DecompressionLimitError && @test err.limit == 1024 + + # Limit at/above the decompressed size: succeeds. + r2 = HT.get(base; max_decompressed_size = length(big)) + @test length(r2.body) == length(big) + + # The limit also applies to a caller-owned IO sink. + sink = IOBuffer() + err2 = try + HT.get(base; response_stream = sink, max_decompressed_size = 1024) + nothing + catch e + e + end + @test err2 isa HTTP.DecompressionLimitError + finally + HT.forceclose(server) + end +end From 6cf6415d55ea1a1dacdc930e8e076b121cac6a99 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Sat, 30 May 2026 17:54:49 -0600 Subject: [PATCH 2/2] fix(client): apply decompression limit in the read loop, not a wrapper type The _DecompressLimitReader wrapper widened _response_body_reader's return into an abstractly-parameterized union (Union{_BodyIO, _DecompressLimitReader, TranscodingStream...}), which broke --trim static compilation (trim_compile_tests verifier errors on macOS/Linux). Apply max_decompressed_size inside the chunked read loops (_read_all_response_bytes / _copy_response_bytes!) instead, leaving the reader's type set unchanged. Same DecompressionLimitError and behavior; trim compile passes (63/63 locally). Co-Authored-By: Claude Opus 4.8 --- src/http_client.jl | 82 +++++++++++----------------------------------- 1 file changed, 19 insertions(+), 63 deletions(-) diff --git a/src/http_client.jl b/src/http_client.jl index c9bded116..a82db7ceb 100644 --- a/src/http_client.jl +++ b/src/http_client.jl @@ -1066,12 +1066,15 @@ function _status_throws(resp::Response)::Bool return resp.status >= 300 && !_is_redirect_status(resp.status) end -function _read_all_response_bytes(io::IO)::Vector{UInt8} +function _read_all_response_bytes(io::IO, limit::Int=0)::Vector{UInt8} out = UInt8[] buf = Vector{UInt8}(undef, 8192) + total = 0 while true n = readbytes!(io, buf, length(buf)) n == 0 && return out + total += n + limit > 0 && total > limit && throw(DecompressionLimitError(limit)) append!(out, @view(buf[1:n])) end end @@ -1095,18 +1098,19 @@ function _read_all_response_bytes(body::AbstractBody, content_length_hint::Int64 end end -function _copy_response_bytes!(dest::IO, io::IO)::Int64 +function _copy_response_bytes!(dest::IO, io::IO, limit::Int=0)::Int64 buf = Vector{UInt8}(undef, 8192) total = Int64(0) while true n = readbytes!(io, buf, length(buf)) n == 0 && return total total += n + limit > 0 && total > limit && throw(DecompressionLimitError(limit)) write(dest, view(buf, 1:n)) end end -function _copy_response_bytes!(dest::AbstractVector{UInt8}, io::IO)::Int64 +function _copy_response_bytes!(dest::AbstractVector{UInt8}, io::IO, limit::Int=0)::Int64 buf = Vector{UInt8}(undef, 8192) total = 0 capacity = length(dest) @@ -1114,6 +1118,7 @@ function _copy_response_bytes!(dest::AbstractVector{UInt8}, io::IO)::Int64 n = readbytes!(io, buf, length(buf)) n == 0 && break needed = total + n + limit > 0 && needed > limit && throw(DecompressionLimitError(limit)) needed <= capacity || throw(ArgumentError("Unable to grow response stream IOBuffer $(capacity) large enough for response body size: $(needed)")) copyto!(dest, total + 1, buf, 1, n) total = needed @@ -1348,68 +1353,19 @@ function Base.showerror(io::IO, err::DecompressionLimitError) return nothing end -# IO wrapper that caps how many decompressed bytes may be read from `inner`, -# throwing `DecompressionLimitError` once the running total exceeds `limit`. -# Response bodies are read via chunked `readbytes!`, so the limit fires after at -# most one extra chunk and memory stays bounded near `limit`. -mutable struct _DecompressLimitReader{S<:IO} <: IO - inner::S - limit::Int - seen::Int -end -_DecompressLimitReader(inner::IO, limit::Integer) = _DecompressLimitReader{typeof(inner)}(inner, Int(limit), 0) - -@inline function _count_decompressed!(r::_DecompressLimitReader, n::Integer)::Nothing - r.seen += Int(n) - r.seen > r.limit && throw(DecompressionLimitError(r.limit)) - return nothing -end - -function Base.readbytes!(r::_DecompressLimitReader, dst::AbstractVector{UInt8}, nb::Integer=length(dst))::Int - n = readbytes!(r.inner, dst, nb) - _count_decompressed!(r, n) - return n -end - -function Base.readavailable(r::_DecompressLimitReader)::Vector{UInt8} - chunk = readavailable(r.inner) - _count_decompressed!(r, length(chunk)) - return chunk -end - -function Base.read(r::_DecompressLimitReader)::Vector{UInt8} - out = UInt8[] - buf = Vector{UInt8}(undef, 65536) - while true - n = readbytes!(r.inner, buf, length(buf)) - n == 0 && break - _count_decompressed!(r, n) - append!(out, @view(buf[1:n])) - end - return out -end - -Base.eof(r::_DecompressLimitReader)::Bool = eof(r.inner) -Base.bytesavailable(r::_DecompressLimitReader)::Int = bytesavailable(r.inner) -Base.isopen(r::_DecompressLimitReader)::Bool = isopen(r.inner) -Base.close(r::_DecompressLimitReader) = close(r.inner) - -function _response_body_reader(incoming::_IncomingResponse, decompress::Union{Nothing,Bool}, max_decompressed_size::Int=0)::Tuple{IO,Union{Nothing,Task}} +function _response_body_reader(incoming::_IncomingResponse, decompress::Union{Nothing,Bool})::Tuple{IO,Union{Nothing,Task}} raw_stream = _BodyIO(incoming.rawbody) encoding = _response_content_encoding(incoming.head.headers, decompress) - decompressor = if encoding == :gzip - CodecZlib.GzipDecompressorStream(raw_stream) + if encoding == :gzip + return CodecZlib.GzipDecompressorStream(raw_stream), nothing elseif encoding == :deflate - CodecZlib.ZlibDecompressorStream(raw_stream) - else - return raw_stream, nothing + return CodecZlib.ZlibDecompressorStream(raw_stream), nothing end - reader = max_decompressed_size > 0 ? _DecompressLimitReader(decompressor, max_decompressed_size) : decompressor - return reader, nothing + return raw_stream, nothing end -function _with_response_reader(f::F, incoming::_IncomingResponse, decompress::Union{Nothing,Bool}, max_decompressed_size::Int=0) where {F} - reader, _ = _response_body_reader(incoming, decompress, max_decompressed_size) +function _with_response_reader(f::F, incoming::_IncomingResponse, decompress::Union{Nothing,Bool}) where {F} + reader, _ = _response_body_reader(incoming, decompress) try return f(reader) finally @@ -1454,16 +1410,16 @@ function _consume_incoming_response!( rethrow() end end - return _with_response_reader(incoming, decompress, max_decompressed_size) do reader + return _with_response_reader(incoming, decompress) do reader if sink === nothing - body = _read_all_response_bytes(reader) + body = _read_all_response_bytes(reader, max_decompressed_size) return body, Int64(length(body)) end if sink isa IO - n = _copy_response_bytes!(sink::IO, reader) + n = _copy_response_bytes!(sink::IO, reader, max_decompressed_size) return nothing, n end - n = _copy_response_bytes!(sink::AbstractVector{UInt8}, reader) + n = _copy_response_bytes!(sink::AbstractVector{UInt8}, reader, max_decompressed_size) if sink isa Vector{UInt8} return sink::Vector{UInt8}, n end