diff --git a/.gitignore b/.gitignore index f119890ee..615bac335 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,4 @@ compiled.spv *.pyc */.vscode/* */__main__.py -/tmp/rtSamples.bin +tmp diff --git a/30_ComputeShaderPathTracer/CMakeLists.txt b/30_ComputeShaderPathTracer/CMakeLists.txt deleted file mode 100644 index 1a0b0e9bd..000000000 --- a/30_ComputeShaderPathTracer/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -if(NBL_BUILD_IMGUI) - set(NBL_INCLUDE_SERACH_DIRECTORIES - "${CMAKE_CURRENT_SOURCE_DIR}/include" - ) - - list(APPEND NBL_LIBRARIES - imtestengine - "${NBL_EXT_IMGUI_UI_LIB}" - Nabla::ext::FullScreenTriangle - ) - - nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - - if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) - endif() -endif() - - diff --git a/30_ComputeShaderPathTracer/app_resources/common.glsl b/30_ComputeShaderPathTracer/app_resources/common.glsl deleted file mode 100644 index 65ed0609e..000000000 --- a/30_ComputeShaderPathTracer/app_resources/common.glsl +++ /dev/null @@ -1,822 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -// firefly and variance reduction techniques -//#define KILL_DIFFUSE_SPECULAR_PATHS -//#define VISUALIZE_HIGH_VARIANCE - -// debug -//#define NEE_ONLY - -layout(set = 2, binding = 0) uniform sampler2D envMap; -layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence; -layout(set = 2, binding = 2) uniform usampler2D scramblebuf; - -layout(set=0, binding=0, rgba16f) uniform image2D outImage; - -#ifndef _NBL_GLSL_WORKGROUP_SIZE_ -#define _NBL_GLSL_WORKGROUP_SIZE_ 32 -layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=_NBL_GLSL_WORKGROUP_SIZE_, local_size_z=1) in; -#endif - -ivec2 getCoordinates() { - return ivec2(gl_GlobalInvocationID.xy); -} - -vec2 getTexCoords() { - ivec2 imageSize = imageSize(outImage); - ivec2 iCoords = getCoordinates(); - return vec2(float(iCoords.x) / imageSize.x, 1.0 - float(iCoords.y) / imageSize.y); -} - - -#include -#include -#include - -#include - -layout(push_constant, row_major) uniform constants -{ - mat4 invMVP; - int sampleCount; - int depth; -} PTPushConstant; - -#define INVALID_ID_16BIT 0xffffu -struct Sphere -{ - vec3 position; - float radius2; - uint bsdfLightIDs; -}; - -Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID) -{ - Sphere sphere; - sphere.position = position; - sphere.radius2 = radius*radius; - sphere.bsdfLightIDs = bitfieldInsert(bsdfID,lightID,16,16); - return sphere; -} - -// return intersection distance if found, nbl_glsl_FLT_NAN otherwise -float Sphere_intersect(in Sphere sphere, in vec3 origin, in vec3 direction) -{ - vec3 relOrigin = origin-sphere.position; - float relOriginLen2 = dot(relOrigin,relOrigin); - const float radius2 = sphere.radius2; - - float dirDotRelOrigin = dot(direction,relOrigin); - float det = radius2-relOriginLen2+dirDotRelOrigin*dirDotRelOrigin; - - // do some speculative math here - float detsqrt = sqrt(det); - return -dirDotRelOrigin+(relOriginLen2>radius2 ? (-detsqrt):detsqrt); -} - -vec3 Sphere_getNormal(in Sphere sphere, in vec3 position) -{ - const float radiusRcp = inversesqrt(sphere.radius2); - return (position-sphere.position)*radiusRcp; -} - -float Sphere_getSolidAngle_impl(in float cosThetaMax) -{ - return 2.0*nbl_glsl_PI*(1.0-cosThetaMax); -} -float Sphere_getSolidAngle(in Sphere sphere, in vec3 origin) -{ - float cosThetaMax = sqrt(1.0-sphere.radius2/nbl_glsl_lengthSq(sphere.position-origin)); - return Sphere_getSolidAngle_impl(cosThetaMax); -} - - -Sphere spheres[SPHERE_COUNT] = { - Sphere_Sphere(vec3(0.0,-100.5,-1.0),100.0,0u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(2.0,0.0,-1.0),0.5,1u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(0.0,0.0,-1.0),0.5,2u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(-2.0,0.0,-1.0),0.5,3u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(2.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(0.0,0.0,1.0),0.5,4u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(-2.0,0.0,1.0),0.5,5u,INVALID_ID_16BIT), - Sphere_Sphere(vec3(0.5,1.0,0.5),0.5,6u,INVALID_ID_16BIT) -#if SPHERE_COUNT>8 - ,Sphere_Sphere(vec3(-1.5,1.5,0.0),0.3,INVALID_ID_16BIT,0u) -#endif -}; - - -struct Triangle -{ - vec3 vertex0; - uint bsdfLightIDs; - vec3 vertex1; - uint padding0; - vec3 vertex2; - uint padding1; -}; - -Triangle Triangle_Triangle(in mat3 vertices, in uint bsdfID, in uint lightID) -{ - Triangle tri; - tri.vertex0 = vertices[0]; - tri.vertex1 = vertices[1]; - tri.vertex2 = vertices[2]; - // - tri.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); - return tri; -} - -// return intersection distance if found, nbl_glsl_FLT_NAN otherwise -float Triangle_intersect(in Triangle tri, in vec3 origin, in vec3 direction) -{ - const vec3 edges[2] = vec3[2](tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0); - - const vec3 h = cross(direction,edges[1]); - const float a = dot(edges[0],h); - - const vec3 relOrigin = origin-tri.vertex0; - - const float u = dot(relOrigin,h)/a; - - const vec3 q = cross(relOrigin,edges[0]); - const float v = dot(direction,q)/a; - - const float t = dot(edges[1],q)/a; - - return t>0.f&&u>=0.f&&v>=0.f&&(u+v)<=1.f ? t:nbl_glsl_FLT_NAN; -} - -vec3 Triangle_getNormalTimesArea_impl(in mat2x3 edges) -{ - return cross(edges[0],edges[1])*0.5; -} -vec3 Triangle_getNormalTimesArea(in Triangle tri) -{ - return Triangle_getNormalTimesArea_impl(mat2x3(tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0)); -} - - - -struct Rectangle -{ - vec3 offset; - uint bsdfLightIDs; - vec3 edge0; - uint padding0; - vec3 edge1; - uint padding1; -}; - -Rectangle Rectangle_Rectangle(in vec3 offset, in vec3 edge0, in vec3 edge1, in uint bsdfID, in uint lightID) -{ - Rectangle rect; - rect.offset = offset; - rect.edge0 = edge0; - rect.edge1 = edge1; - // - rect.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); - return rect; -} - -void Rectangle_getNormalBasis(in Rectangle rect, out mat3 basis, out vec2 extents) -{ - extents = vec2(length(rect.edge0), length(rect.edge1)); - basis[0] = rect.edge0/extents[0]; - basis[1] = rect.edge1/extents[1]; - basis[2] = normalize(cross(basis[0],basis[1])); -} - -// return intersection distance if found, nbl_glsl_FLT_NAN otherwise -float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction) -{ - const vec3 h = cross(direction,rect.edge1); - const float a = dot(rect.edge0,h); - - const vec3 relOrigin = origin-rect.offset; - - const float u = dot(relOrigin,h)/a; - - const vec3 q = cross(relOrigin,rect.edge0); - const float v = dot(direction,q)/a; - - const float t = dot(rect.edge1,q)/a; - - const bool intersection = t>0.f&&u>=0.f&&v>=0.f&&u<=1.f&&v<=1.f; - return intersection ? t:nbl_glsl_FLT_NAN; -} - -vec3 Rectangle_getNormalTimesArea(in Rectangle rect) -{ - return cross(rect.edge0,rect.edge1); -} - - - -#define DIFFUSE_OP 0u -#define CONDUCTOR_OP 1u -#define DIELECTRIC_OP 2u -#define OP_BITS_OFFSET 0 -#define OP_BITS_SIZE 2 -struct BSDFNode -{ - uvec4 data[2]; -}; - -uint BSDFNode_getType(in BSDFNode node) -{ - return bitfieldExtract(node.data[0].w,OP_BITS_OFFSET,OP_BITS_SIZE); -} -bool BSDFNode_isBSDF(in BSDFNode node) -{ - return BSDFNode_getType(node)==DIELECTRIC_OP; -} -bool BSDFNode_isNotDiffuse(in BSDFNode node) -{ - return BSDFNode_getType(node)!=DIFFUSE_OP; -} -float BSDFNode_getRoughness(in BSDFNode node) -{ - return uintBitsToFloat(node.data[1].w); -} -vec3 BSDFNode_getRealEta(in BSDFNode node) -{ - return uintBitsToFloat(node.data[0].rgb); -} -vec3 BSDFNode_getImaginaryEta(in BSDFNode node) -{ - return uintBitsToFloat(node.data[1].rgb); -} -mat2x3 BSDFNode_getEta(in BSDFNode node) -{ - return mat2x3(BSDFNode_getRealEta(node),BSDFNode_getImaginaryEta(node)); -} -#include -vec3 BSDFNode_getReflectance(in BSDFNode node, in float VdotH) -{ - const vec3 albedoOrRealIoR = uintBitsToFloat(node.data[0].rgb); - if (BSDFNode_isNotDiffuse(node)) - return nbl_glsl_fresnel_conductor(albedoOrRealIoR, BSDFNode_getImaginaryEta(node), VdotH); - else - return albedoOrRealIoR; -} - -float BSDFNode_getNEEProb(in BSDFNode bsdf) -{ - const float alpha = BSDFNode_isNotDiffuse(bsdf) ? BSDFNode_getRoughness(bsdf):1.0; - return min(8.0*alpha,1.0); -} - -#include -#include -float getLuma(in vec3 col) -{ - return dot(transpose(nbl_glsl_scRGBtoXYZ)[1],col); -} - -#define BSDF_COUNT 7 -BSDFNode bsdfs[BSDF_COUNT] = { - {{uvec4(floatBitsToUint(vec3(0.8,0.8,0.8)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(0.8,0.4,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(0.4,0.8,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(1.02,1.02,1.3)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,1.0,2.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.15))}}, - {{uvec4(floatBitsToUint(vec3(1.4,1.45,1.5)),DIELECTRIC_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0625))}} -}; - - -struct Light -{ - vec3 radiance; - uint objectID; -}; - -vec3 Light_getRadiance(in Light light) -{ - return light.radiance; -} -uint Light_getObjectID(in Light light) -{ - return light.objectID; -} - - -#define LIGHT_COUNT 1 -float scene_getLightChoicePdf(in Light light) -{ - return 1.0/float(LIGHT_COUNT); -} - - -#define LIGHT_COUNT 1 -Light lights[LIGHT_COUNT] = -{ - { - vec3(30.0,25.0,15.0), -#ifdef POLYGON_METHOD - 0u -#else - 8u -#endif - } -}; - - - -#define ANY_HIT_FLAG (-2147483648) -#define DEPTH_BITS_COUNT 8 -#define DEPTH_BITS_OFFSET (31-DEPTH_BITS_COUNT) -struct ImmutableRay_t -{ - vec3 origin; - vec3 direction; -#if POLYGON_METHOD==2 - vec3 normalAtOrigin; - bool wasBSDFAtOrigin; -#endif -}; -struct MutableRay_t -{ - float intersectionT; - uint objectID; - /* irrelevant here - uint triangleID; - vec2 barycentrics; - */ -}; -struct Payload_t -{ - vec3 accumulation; - float otherTechniqueHeuristic; - vec3 throughput; -#ifdef KILL_DIFFUSE_SPECULAR_PATHS - bool hasDiffuse; -#endif -}; - -struct Ray_t -{ - ImmutableRay_t _immutable; - MutableRay_t _mutable; - Payload_t _payload; -}; - - -#define INTERSECTION_ERROR_BOUND_LOG2 (-8.0) -float getTolerance_common(in uint depth) -{ - float depthRcp = 1.0/float(depth); - return INTERSECTION_ERROR_BOUND_LOG2;// *depthRcp*depthRcp; -} -float getStartTolerance(in uint depth) -{ - return exp2(getTolerance_common(depth)); -} -float getEndTolerance(in uint depth) -{ - return 1.0-exp2(getTolerance_common(depth)+1.0); -} - - -vec2 SampleSphericalMap(vec3 v) -{ - vec2 uv = vec2(atan(v.z, v.x), asin(v.y)); - uv *= nbl_glsl_RECIPROCAL_PI*0.5; - uv += 0.5; - return uv; -} - -void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload) -{ - vec3 finalContribution = _payload.throughput; - // #define USE_ENVMAP -#ifdef USE_ENVMAP - vec2 uv = SampleSphericalMap(_immutable.direction); - finalContribution *= textureLod(envMap, uv, 0.0).rgb; -#else - const vec3 kConstantEnvLightRadiance = vec3(0.15, 0.21, 0.3); - finalContribution *= kConstantEnvLightRadiance; - _payload.accumulation += finalContribution; -#endif -} - -#include -#include -#include -#include -#include -#include -#include -nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in vec3 u, in BSDFNode bsdf, in float monochromeEta, out nbl_glsl_AnisotropicMicrofacetCache _cache) -{ - const float a = BSDFNode_getRoughness(bsdf); - const mat2x3 ior = BSDFNode_getEta(bsdf); - - // fresnel stuff for dielectrics - float orientedEta, rcpOrientedEta; - const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); - - nbl_glsl_LightSample smpl; - nbl_glsl_AnisotropicMicrofacetCache dummy; - switch (BSDFNode_getType(bsdf)) - { - case DIFFUSE_OP: - smpl = nbl_glsl_oren_nayar_cos_generate(interaction,u.xy,a*a); - break; - case CONDUCTOR_OP: - smpl = nbl_glsl_ggx_cos_generate(interaction,u.xy,a,a,_cache); - break; - default: - smpl = nbl_glsl_ggx_dielectric_cos_generate(interaction,u,a,a,monochromeEta,_cache); - break; - } - return smpl; -} - -vec3 nbl_glsl_bsdf_cos_remainder_and_pdf(out float pdf, in nbl_glsl_LightSample _sample, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in BSDFNode bsdf, in float monochromeEta, in nbl_glsl_AnisotropicMicrofacetCache _cache) -{ - // are V and L on opposite sides of the surface? - const bool transmitted = nbl_glsl_isTransmissionPath(interaction.isotropic.NdotV,_sample.NdotL); - - // is the BSDF or BRDF, if it is then we make the dot products `abs` before `max(,0.0)` - const bool transmissive = BSDFNode_isBSDF(bsdf); - const float clampedNdotL = nbl_glsl_conditionalAbsOrMax(transmissive,_sample.NdotL,0.0); - const float clampedNdotV = nbl_glsl_conditionalAbsOrMax(transmissive,interaction.isotropic.NdotV,0.0); - - vec3 remainder; - - const float minimumProjVectorLen = 0.00000001; - if (clampedNdotV>minimumProjVectorLen && clampedNdotL>minimumProjVectorLen) - { - // fresnel stuff for conductors (but reflectance also doubles as albedo) - const mat2x3 ior = BSDFNode_getEta(bsdf); - const vec3 reflectance = BSDFNode_getReflectance(bsdf,_cache.isotropic.VdotH); - - // fresnel stuff for dielectrics - float orientedEta, rcpOrientedEta; - const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); - - // - const float VdotL = dot(interaction.isotropic.V.dir,_sample.L); - - // - const float a = max(BSDFNode_getRoughness(bsdf),0.0001); // TODO: @Crisspl 0-roughness still doesn't work! Also Beckmann has a weird dark rim instead as fresnel!? - const float a2 = a*a; - - // TODO: refactor into Material Compiler-esque thing - switch (BSDFNode_getType(bsdf)) - { - case DIFFUSE_OP: - remainder = reflectance*nbl_glsl_oren_nayar_cos_remainder_and_pdf_wo_clamps(pdf,a*a,VdotL,clampedNdotL,clampedNdotV); - break; - case CONDUCTOR_OP: - remainder = nbl_glsl_ggx_cos_remainder_and_pdf_wo_clamps(pdf,nbl_glsl_ggx_trowbridge_reitz(a2,_cache.isotropic.NdotH2),clampedNdotL,_sample.NdotL2,clampedNdotV,interaction.isotropic.NdotV_squared,reflectance,a2); - break; - default: - remainder = vec3(nbl_glsl_ggx_dielectric_cos_remainder_and_pdf(pdf, _sample, interaction.isotropic, _cache.isotropic, monochromeEta, a*a)); - break; - } - } - else - remainder = vec3(0.0); - return remainder; -} - -layout (constant_id = 0) const int MAX_DEPTH_LOG2 = 4; -layout (constant_id = 1) const int MAX_SAMPLES_LOG2 = 10; - - -#include - -// TODO: use PCG hash + XOROSHIRO and don't read any textures -mat2x3 rand3d(in uint protoDimension, in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state) -{ - mat2x3 retval; - uint address = bitfieldInsert(protoDimension,_sample,MAX_DEPTH_LOG2,MAX_SAMPLES_LOG2); - for (int i=0; i<2u; i++) - { - uvec3 seqVal = texelFetch(sampleSequence,int(address)+i).xyz; - seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)); - retval[i] = vec3(seqVal)*uintBitsToFloat(0x2f800004u); - } - return retval; -} - - -void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction); -int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction) -{ - const bool anyHit = intersectionT!=nbl_glsl_FLT_MAX; - - int objectID = -1; - for (int i=0; i0.0 && tnbl_glsl_FLT_MIN; - // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself - nbl_glsl_AnisotropicMicrofacetCache _cache; - validPath = validPath && nbl_glsl_calcAnisotropicMicrofacetCache(_cache, interaction, nee_sample, monochromeEta); - // infinite PDF would mean a point light or a thin line, but our lights have finite radiance per steradian (area lights) - if (lightPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) - ray._payload.accumulation += neeContrib; - } - } -#ifndef NEE_ONLY - } - - // sample BSDF - float bsdfPdf; vec3 bsdfSampleL; - { - nbl_glsl_AnisotropicMicrofacetCache _cache; - nbl_glsl_LightSample bsdf_sample = nbl_glsl_bsdf_cos_generate(interaction,epsilon[1],bsdf,monochromeEta,_cache); - // the value of the bsdf divided by the probability of the sample being generated - throughput *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,bsdf_sample,interaction,bsdf,monochromeEta,_cache); - // - bsdfSampleL = bsdf_sample.L; - } - - // additional threshold - const float lumaThroughputThreshold = lumaContributionThreshold; - if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold) - { - ray._payload.throughput = throughput; - ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch - ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic; - - // trace new ray - ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth); - ray._immutable.direction = bsdfSampleL; - #if POLYGON_METHOD==2 - ray._immutable.normalAtOrigin = interaction.isotropic.N; - ray._immutable.wasBSDFAtOrigin = isBSDF; - #endif - return true; - } -#endif - } - return false; -} - -void main() -{ - const ivec2 imageExtents = imageSize(outImage); - const ivec2 coords = getCoordinates(); - vec2 texCoord = vec2(coords) / vec2(imageExtents); - texCoord.y = 1.0 - texCoord.y; - - if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) { - return; - } - - if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0) - { - vec4 pixelCol = vec4(1.0,0.0,0.0,1.0); - imageStore(outImage, coords, pixelCol); - return; - } - - nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; - const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0)); - - - const mat4 invMVP = PTPushConstant.invMVP; - - vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0); - vec3 camPos; - { - vec4 tmp = invMVP*NDC; - camPos = tmp.xyz/tmp.w; - NDC.z = 1.0; - } - - vec3 color = vec3(0.0); - float meanLumaSquared = 0.0; - // TODO: if we collapse the nested for loop, then all GPUs will get `PTPushConstant.depth` factor speedup, not just NV with separate PC - for (int i=0; i5.0) - color = vec3(1.0,0.0,0.0); - #endif - - vec4 pixelCol = vec4(color, 1.0); - imageStore(outImage, coords, pixelCol); -} -/** TODO: Improving Rendering - -Now: -- Always MIS (path correlated reuse) -- Test MIS alpha (roughness) scheme - -Many Lights: -- Path Guiding -- Light Importance Lists/Classification -- Spatio-Temporal Reservoir Sampling - -Indirect Light: -- Bidirectional Path Tracing -- Uniform Path Sampling / Vertex Connection and Merging / Path Space Regularization - -Animations: -- A-SVGF / BMFR -**/ \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/app_resources/litByRectangle.comp b/30_ComputeShaderPathTracer/app_resources/litByRectangle.comp deleted file mode 100644 index 300cef559..000000000 --- a/30_ComputeShaderPathTracer/app_resources/litByRectangle.comp +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 430 core -#extension GL_GOOGLE_include_directive : require - -#define SPHERE_COUNT 8 -#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling -#include "common.glsl" - -#define RECTANGLE_COUNT 1 -const vec3 edge0 = normalize(vec3(2,0,-1)); -const vec3 edge1 = normalize(vec3(2,-5,4)); -Rectangle rectangles[RECTANGLE_COUNT] = { - Rectangle_Rectangle(vec3(-3.8,0.35,1.3),edge0*7.0,edge1*0.1,INVALID_ID_16BIT,0u) -}; - - -void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) -{ - for (int i=0; i0.0 && t -#include -#include - -float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) -{ - const Rectangle rect = rectangles[Light_getObjectID(light)]; - - const ImmutableRay_t _immutable = ray._immutable; - const vec3 L = _immutable.direction; -#if POLYGON_METHOD==0 - const float dist = ray._mutable.intersectionT; - return dist*dist/abs(dot(Rectangle_getNormalTimesArea(rect),L)); -#else - #ifdef TRIANGLE_REFERENCE - const mat3 sphericalVertices[2] = - { - nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),_immutable.origin), - nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),_immutable.origin) - }; - float solidAngle[2]; - vec3 cos_vertices[2],sin_vertices[2]; - float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; - for (uint i=0u; i<2u; i++) - solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); - const float rectSolidAngle = solidAngle[0]+solidAngle[1]; - #if POLYGON_METHOD==1 - return 1.f/rectSolidAngle; - #elif POLYGON_METHOD==2 - // TODO: figure out what breaks for a directly visible light under MIS - if (rectSolidAngle > nbl_glsl_FLT_MIN) - { - const vec2 bary = nbl_glsl_barycentric_reconstructBarycentrics(L*ray._mutable.intersectionT+_immutable.origin-rect.offset,mat2x3(rect.edge0,rect.edge1)); - const uint i = bary.x>=0.f&&bary.y>=0.f&&(bary.x+bary.y)<=1.f ? 0u:1u; - - float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); - pdf *= solidAngle[i]/rectSolidAngle; - return pdf; - } - else - return nbl_glsl_FLT_INF; - #endif - #else - float pdf; - mat3 rectNormalBasis; - vec2 rectExtents; - Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); - vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(_immutable.origin, rect.offset, rectNormalBasis); - float solidAngle = nbl_glsl_shapes_SolidAngleOfRectangle(sphR0, rectExtents); - if (solidAngle > nbl_glsl_FLT_MIN) - { - #if POLYGON_METHOD==1 - pdf = 1.f/solidAngle; - #else - #error - #endif - } - else - pdf = nbl_glsl_FLT_INF; - return pdf; - #endif -#endif -} - -vec3 nbl_glsl_light_generate_and_pdf(out float pdf, out float newRayMaxT, in vec3 origin, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in bool isBSDF, in vec3 xi, in uint objectID) -{ - const Rectangle rect = rectangles[objectID]; - const vec3 N = Rectangle_getNormalTimesArea(rect); - - const vec3 origin2origin = rect.offset-origin; -#if POLYGON_METHOD==0 - vec3 L = origin2origin+rect.edge0*xi.x+rect.edge1*xi.y; // TODO: refactor - - const float distanceSq = dot(L,L); - const float rcpDistance = inversesqrt(distanceSq); - L *= rcpDistance; - - pdf = distanceSq/abs(dot(N,L)); - newRayMaxT = 1.0/rcpDistance; - return L; -#else - #ifdef TRIANGLE_REFERENCE - const mat3 sphericalVertices[2] = - { - nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset,rect.offset+rect.edge0,rect.offset+rect.edge1),origin), - nbl_glsl_shapes_getSphericalTriangle(mat3(rect.offset+rect.edge1,rect.offset+rect.edge0,rect.offset+rect.edge0+rect.edge1),origin) - }; - float solidAngle[2]; - vec3 cos_vertices[2],sin_vertices[2]; - float cos_a[2],cos_c[2],csc_b[2],csc_c[2]; - for (uint i=0u; i<2u; i++) - solidAngle[i] = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i]); - vec3 L = vec3(0.f,0.f,0.f); - const float rectangleSolidAngle = solidAngle[0]+solidAngle[1]; - if (rectangleSolidAngle > nbl_glsl_FLT_MIN) - { - float rcpTriangleChoiceProb; - const uint i = nbl_glsl_partitionRandVariable(solidAngle[0]/rectangleSolidAngle,xi.z,rcpTriangleChoiceProb) ? 1u:0u; - #if POLYGON_METHOD==1 - L = nbl_glsl_sampling_generateSphericalTriangleSample(solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],xi.xy); - pdf = 1.f/rectangleSolidAngle; - #elif POLYGON_METHOD==2 - float rcpPdf; - L = nbl_glsl_sampling_generateProjectedSphericalTriangleSample(rcpPdf,solidAngle[i],cos_vertices[i],sin_vertices[i],cos_a[i],cos_c[i],csc_b[i],csc_c[i],sphericalVertices[i],interaction.isotropic.N,isBSDF,xi.xy); - pdf = 1.f/(rcpPdf*rcpTriangleChoiceProb); - #endif - } - else - pdf = nbl_glsl_FLT_INF; - #else - mat3 rectNormalBasis; - vec2 rectExtents; - Rectangle_getNormalBasis(rect, rectNormalBasis, rectExtents); - vec3 sphR0 = nbl_glsl_shapes_getSphericalRectangle(origin, rect.offset, rectNormalBasis); - vec3 L = vec3(0.f,0.f,0.f); - float solidAngle; - vec2 sphUv = nbl_glsl_sampling_generateSphericalRectangleSample(sphR0, rectExtents, xi.xy, solidAngle); - if (solidAngle > nbl_glsl_FLT_MIN) - { - #if POLYGON_METHOD==1 - vec3 sph_sample = sphUv[0] * rect.edge0 + sphUv[1] * rect.edge1 + rect.offset; - L = normalize(sph_sample - origin); - pdf = 1.f/solidAngle; - #else - #error - #endif - } - else - pdf = nbl_glsl_FLT_INF; - #endif - newRayMaxT = dot(N,origin2origin)/dot(N,L); - return L; -#endif -} - - -uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) -{ - if (objectID0.0) - { - const float rcpDistance = inversesqrt(distanceSQ); - Z *= rcpDistance; - - const float cosThetaMax = sqrt(cosThetaMax2); - const float cosTheta = mix(1.0,cosThetaMax,xi.x); - - vec3 L = Z*cosTheta; - - const float cosTheta2 = cosTheta*cosTheta; - const float sinTheta = sqrt(1.0-cosTheta2); - float sinPhi,cosPhi; - nbl_glsl_sincos(2.0*nbl_glsl_PI*xi.y-nbl_glsl_PI,sinPhi,cosPhi); - mat2x3 XY = nbl_glsl_frisvad(Z); - - L += (XY[0]*cosPhi+XY[1]*sinPhi)*sinTheta; - - newRayMaxT = (cosTheta-sqrt(cosTheta2-cosThetaMax2))/rcpDistance; - pdf = 1.0/Sphere_getSolidAngle_impl(cosThetaMax); - return L; - } - pdf = 0.0; - return vec3(0.0,0.0,0.0); -} - -uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) -{ - Sphere sphere = spheres[objectID]; - normal = Sphere_getNormal(sphere,intersection); - return sphere.bsdfLightIDs; -} \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/app_resources/litByTriangle.comp b/30_ComputeShaderPathTracer/app_resources/litByTriangle.comp deleted file mode 100644 index ba23c82e5..000000000 --- a/30_ComputeShaderPathTracer/app_resources/litByTriangle.comp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 430 core -#extension GL_GOOGLE_include_directive : require - -#define SPHERE_COUNT 8 -#define POLYGON_METHOD 1 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling -#include "common.glsl" - -#define TRIANGLE_COUNT 1 -Triangle triangles[TRIANGLE_COUNT] = { - Triangle_Triangle(mat3(vec3(-1.8,0.35,0.3),vec3(-1.2,0.35,0.0),vec3(-1.5,0.8,-0.3))*10.0,INVALID_ID_16BIT,0u) -}; - -void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) -{ - for (int i=0; i0.0 && t -float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) -{ - const Triangle tri = triangles[Light_getObjectID(light)]; - - const vec3 L = ray._immutable.direction; -#if POLYGON_METHOD==0 - const float dist = ray._mutable.intersectionT; - return dist*dist/abs(dot(Triangle_getNormalTimesArea(tri),L)); -#else - const ImmutableRay_t _immutable = ray._immutable; - const mat3 sphericalVertices = nbl_glsl_shapes_getSphericalTriangle(mat3(tri.vertex0,tri.vertex1,tri.vertex2),_immutable.origin); - #if POLYGON_METHOD==1 - const float rcpProb = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices); - // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 - return rcpProb>nbl_glsl_FLT_MIN ? (1.0/rcpProb):nbl_glsl_FLT_MAX; - #elif POLYGON_METHOD==2 - const float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(sphericalVertices,_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); - // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small - return pdfnbl_glsl_FLT_MIN ? (1.0/rcpPdf):0.0; - - const vec3 N = Triangle_getNormalTimesArea(tri); - newRayMaxT = dot(N,tri.vertex0-origin)/dot(N,L); - return L; -#endif -} - - -uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) -{ - if (objectID -using namespace nbl::hlsl; -using namespace ext::FullScreenTriangle; - -// binding 0 set 0 -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; - -[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 -{ - return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); -} \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/config.json.template b/30_ComputeShaderPathTracer/config.json.template deleted file mode 100644 index 24adf54fb..000000000 --- a/30_ComputeShaderPathTracer/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} diff --git a/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp b/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp deleted file mode 100644 index 3745ca512..000000000 --- a/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ -#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ - -#include "nbl/examples/examples.hpp" - -// example's own headers -#include "nbl/ui/ICursorControl.h" // TODO: why not in nabla.h ? -#include "nbl/ext/ImGui/ImGui.h" -#include "imgui/imgui_internal.h" - -#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp deleted file mode 100644 index 1ba8c53ef..000000000 --- a/30_ComputeShaderPathTracer/main.cpp +++ /dev/null @@ -1,1304 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - - -#include "nbl/examples/examples.hpp" - -#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" -#include "nbl/builtin/hlsl/surface_transform.h" -#include - -#include "nbl/this_example/common.hpp" - - -using namespace nbl; -using namespace nbl::core; -using namespace nbl::hlsl; -using namespace nbl::system; -using namespace nbl::asset; -using namespace nbl::ui; -using namespace nbl::video; -using namespace nbl::examples; - -// TODO: share push constants -struct PTPushConstant { - hlsl::float32_t4x4 invMVP; - int sampleCount; - int depth; -}; - -// TODO: Add a QueryPool for timestamping once its ready (actually add IMGUI mspf plotter) -// TODO: Do buffer creation using assConv -class ComputeShaderPathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication -{ - using device_base_t = SimpleWindowedApplication; - using asset_base_t = BuiltinResourcesApplication; - using clock_t = std::chrono::steady_clock; - - enum E_LIGHT_GEOMETRY : uint8_t - { - ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE, - ELG_COUNT - }; - - constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; - constexpr static inline uint32_t MaxFramesInFlight = 5; - constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); - constexpr static inline uint32_t DefaultWorkGroupSize = 16u; - constexpr static inline uint32_t MaxDescriptorCount = 256u; - constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 - constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 - constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; - constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; - constexpr static inline uint8_t MaxUITextureCount = 1u; - static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; - static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; - static inline std::array PTShaderPaths = { "app_resources/litBySphere.comp", "app_resources/litByTriangle.comp", "app_resources/litByRectangle.comp" }; - static inline std::string PresentShaderPath = "app_resources/present.frag.hlsl"; - - const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { - "ELG_SPHERE", - "ELG_TRIANGLE", - "ELG_RECTANGLE" - }; - - public: - inline ComputeShaderPathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - inline bool isComputeOnly() const override { return false; } - - virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override - { - auto retval = device_base_t::getPreferredDeviceFeatures(); - retval.pipelineExecutableInfo = true; - return retval; - } - - inline core::vector getSurfaces() const override - { - if (!m_surface) - { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WindowDimensions.x; - params.height = WindowDimensions.y; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "ComputeShaderPathtracer"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } - - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); - } - - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; - - return {}; - } - - inline bool onAppInitialized(smart_refctd_ptr&& system) override - { - // Init systems - { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - - // Remember to call the base class initialization! - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!asset_base_t::onAppInitialized(std::move(system))) - return false; - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - - if (!m_semaphore) - return logFail("Failed to create semaphore!"); - } - - // Create renderpass and init surface - nbl::video::IGPURenderpass* renderpass; - { - ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); - - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = - { - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - renderpass = scResources->getRenderpass(); - - if (!renderpass) - return logFail("Failed to create Renderpass!"); - - auto gQueue = getGraphicsQueue(); - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); - } - - // image upload utils - { - m_scratchSemaphore = m_device->createSemaphore(0); - if (!m_scratchSemaphore) - return logFail("Could not create Scratch Semaphore"); - m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); - // we don't want to overcomplicate the example with multi-queue - m_intendedSubmit.queue = getGraphicsQueue(); - // wait for nothing before upload - m_intendedSubmit.waitSemaphores = {}; - m_intendedSubmit.waitSemaphores = {}; - // fill later - m_intendedSubmit.scratchCommandBuffers = {}; - m_intendedSubmit.scratchSemaphore = { - .semaphore = m_scratchSemaphore.get(), - .value = 0, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - } - - // Create command pool and buffers - { - auto gQueue = getGraphicsQueue(); - m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdPool) - return logFail("Couldn't create Command Pool!"); - - if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) - return logFail("Couldn't create Command Buffer!"); - } - - ISampler::SParams samplerParams = { - .AnisotropicFilter = 0 - }; - auto defaultSampler = m_device->createSampler(samplerParams); - - // Create descriptors and pipeline for the pathtracer - { - auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); - CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = { &cpuLayout.get(),1 }; - // don't need to assert that we don't need to provide patches since layouts are not patchable - //assert(true); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuLayout = reservation.getGPUObjects().front().value; - if (!gpuLayout) { - m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); - std::exit(-1); - } - - return gpuLayout; - }; - auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); - CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = { &cpuDS.get(), 1 }; - // don't need to assert that we don't need to provide patches since layouts are not patchable - //assert(true); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuDS = reservation.getGPUObjects().front().value; - if (!gpuDS) { - m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); - std::exit(-1); - } - - return gpuDS; - }; - - std::array descriptorSet0Bindings = {}; - std::array descriptorSet3Bindings = {}; - std::array presentDescriptorSetBindings; - - descriptorSet0Bindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[1] = { - .binding = 1u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[2] = { - .binding = 2u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - presentDescriptorSetBindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler - }; - - auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); - auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); - - auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); - auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); - auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); - - auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); - auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); - - m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); - m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); - - smart_refctd_ptr presentDSPool; - { - const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; - const uint32_t setCounts[] = { 1u }; - presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - } - m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - - // Create Shaders - auto loadAndCompileShader = [&](std::string pathToShader) - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.workingDirectory = localInputCWD; - auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - { - m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - auto source = IAsset::castDown(assets[0]); - // The down-cast should not fail! - assert(source); - - auto shader = m_device->compileShader({ .source = source.get(), .stage = ESS_COMPUTE }); - if (!shader) - { - m_logger->log("Shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - return shader; - }; - - // Create compute pipelines - { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { - auto ptShader = loadAndCompileShader(PTShaderPaths[index]); - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(PTPushConstant) - }; - auto ptPipelineLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout0), - nullptr, - core::smart_refctd_ptr(gpuDescriptorSetLayout2), - nullptr - ); - if (!ptPipelineLayout) { - return logFail("Failed to create Pathtracing pipeline layout"); - } - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.cached.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; - } - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { - return logFail("Failed to create compute pipeline!\n"); - } - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTPipelines[index]->getExecutableInfo()); - m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, PTShaderPaths[index].c_str(), report.c_str()); - } - } - } - - // Create graphics pipeline - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - // Load Fragment Shader - auto fragmentShader = loadAndCompileShader(PresentShaderPath); - if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); - - const IGPUPipelineBase::SShaderSpecInfo fragSpec = { - .shader = fragmentShader.get(), - .entryPoint = "main", - }; - - auto presentLayout = m_device->createPipelineLayout( - {}, - core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); - - } - } - - // load CPUImages and convert to GPUImages - smart_refctd_ptr envMap, scrambleMap; - { - auto convertImgCPU2GPU = [&](std::span cpuImgs) { - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - std::array commandBufferInfo = { cmdbuf }; - core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); - imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); - - auto converter = CAssetConverter::create({ .device = m_device.get() }); - // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. - struct SInputs final : CAssetConverter::SInputs - { - // we also need to override this to have concurrent sharing - inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override - { - if (familyIndices.size() > 1) - return familyIndices; - return {}; - } - - inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return image->getCreationParameters().mipLevels; - } - inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return 0b0u; - } - - std::vector familyIndices; - } inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - { - const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; - inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; - } - // scratch command buffers for asset converter transfer commands - SIntendedSubmitInfo transfer = { - .queue = queue, - .waitSemaphores = {}, - .prevCommandBuffers = {}, - .scratchCommandBuffers = commandBufferInfo, - .scratchSemaphore = { - .semaphore = imgFillSemaphore.get(), - .value = 0, - // because of layout transitions - .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS - } - }; - // as per the `SIntendedSubmitInfo` one commandbuffer must be begun - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the - // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing - CAssetConverter::SConvertParams params = {}; - params.transfer = &transfer; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = cpuImgs; - // assert that we don't need to provide patches - assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuImgs = reservation.getGPUObjects(); - for (auto& gpuImg : gpuImgs) { - if (!gpuImg) { - m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); - std::exit(-1); - } - } - - // and launch the conversions - m_api->startCapture(); - auto result = reservation.convert(params); - m_api->endCapture(); - if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { - m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); - std::exit(-1); - } - - envMap = gpuImgs[0].value; - scrambleMap = gpuImgs[1].value; - }; - - smart_refctd_ptr envMapCPU, scrambleMapCPU; - { - IAssetLoader::SAssetLoadParams lp; - lp.workingDirectory = this->sharedInputCWD; - SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); - if (bundle.getContents().empty()) { - m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); - std::exit(-1); - } - - envMapCPU = IAsset::castDown(bundle.getContents()[0]); - if (!envMapCPU) { - m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); - std::exit(-1); - } - } - { - asset::ICPUImage::SCreationParams info; - info.format = asset::E_FORMAT::EF_R32G32_UINT; - info.type = asset::ICPUImage::ET_2D; - auto extent = envMapCPU->getCreationParameters().extent; - info.extent.width = extent.width; - info.extent.height = extent.height; - info.extent.depth = 1u; - info.mipLevels = 1u; - info.arrayLayers = 1u; - info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - info.flags = static_cast(0u); - info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; - - scrambleMapCPU = ICPUImage::create(std::move(info)); - const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); - const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); - auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); - - core::RandomSampler rng(0xbadc0ffeu); - auto out = reinterpret_cast(texelBuffer->getPointer()); - for (auto index = 0u; index < texelBufferSize / 4; index++) { - out[index] = rng.nextSample(); - } - - auto regions = core::make_refctd_dynamic_array>(1u); - ICPUImage::SBufferCopy& region = regions->front(); - region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - region.imageSubresource.mipLevel = 0u; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - region.bufferOffset = 0u; - region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); - region.bufferImageHeight = 0u; - region.imageOffset = { 0u, 0u, 0u }; - region.imageExtent = scrambleMapCPU->getCreationParameters().extent; - - scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); - - // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) - scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); - } - - std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get()}; - convertImgCPU2GPU(cpuImgs); - } - - // create views for textures - { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { - IGPUImage::SCreationParams imgInfo; - imgInfo.format = colorFormat; - imgInfo.type = IGPUImage::ET_2D; - imgInfo.extent.width = width; - imgInfo.extent.height = height; - imgInfo.extent.depth = 1u; - imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; - imgInfo.samples = IGPUImage::ESCF_1_BIT; - imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; - - auto image = m_device->createImage(std::move(imgInfo)); - auto imageMemReqs = image->getMemoryReqs(); - imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - m_device->allocate(imageMemReqs, image.get()); - - return image; - }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr - { - auto format = img->getCreationParameters().format; - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.image = std::move(img); - imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - imgViewInfo.subresourceRange.baseArrayLayer = 0u; - imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.subresourceRange.levelCount = 1u; - - return m_device->createImageView(std::move(imgViewInfo)); - }; - - auto params = envMap->getCreationParameters(); - auto extent = params.extent; - envMap->setObjectDebugName("Env Map"); - m_envMapView = createHDRIImageView(envMap); - m_envMapView->setObjectDebugName("Env Map View"); - scrambleMap->setObjectDebugName("Scramble Map"); - m_scrambleView = createHDRIImageView(scrambleMap); - m_scrambleView->setObjectDebugName("Scramble Map View"); - auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); - outImg->setObjectDebugName("Output Image"); - m_outImgView = createHDRIImageView(outImg); - m_outImgView->setObjectDebugName("Output Image View"); - } - - // create sequence buffer view - { - // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` - auto createBufferFromCacheFile = [this]( - system::path filename, - size_t bufferSize, - void *data, - smart_refctd_ptr& buffer - ) -> std::pair, bool> - { - ISystem::future_t> owenSamplerFileFuture; - ISystem::future_t owenSamplerFileReadFuture; - size_t owenSamplerFileBytesRead; - - m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); - smart_refctd_ptr owenSamplerFile; - - if (owenSamplerFileFuture.wait()) - { - owenSamplerFileFuture.acquire().move_into(owenSamplerFile); - if (!owenSamplerFile) - return { nullptr, false }; - - owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); - if (owenSamplerFileReadFuture.wait()) - { - owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); - - if (owenSamplerFileBytesRead < bufferSize) - { - buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); - return { owenSamplerFile, false }; - } - - buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); - } - } - - return { owenSamplerFile, true }; - }; - auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) - { - ISystem::future_t owenSamplerFileWriteFuture; - size_t owenSamplerFileBytesWritten; - - file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); - if (owenSamplerFileWriteFuture.wait()) - owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); - }; - - constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; - std::array data = {}; - smart_refctd_ptr sampleSeq; - - auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); - if (!cacheBufferResult.second) - { - core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); - - ICPUBuffer::SCreationParams params = {}; - params.size = MaxBufferDimensions*MaxBufferSamples*sizeof(uint32_t); - sampleSeq = ICPUBuffer::create(std::move(params)); - - auto out = reinterpret_cast(sampleSeq->getPointer()); - for (auto dim = 0u; dim < MaxBufferDimensions; dim++) - for (uint32_t i = 0; i < MaxBufferSamples; i++) - { - out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); - } - if (cacheBufferResult.first) - writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); - } - - IGPUBuffer::SCreationParams params = {}; - params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; - params.size = sampleSeq->getSize(); - - // we don't want to overcomplicate the example with multi-queue - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; - m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; - - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - m_api->startCapture(); - auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( - m_intendedSubmit, - std::move(params), - sampleSeq->getPointer() - ); - m_api->endCapture(); - bufferFuture.wait(); - auto buffer = bufferFuture.get(); - - m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); - m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); - } - - // Update Descriptors - { - ISampler::SParams samplerParams0 = { - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::ETBC_FLOAT_OPAQUE_BLACK, - ISampler::ETF_LINEAR, - ISampler::ETF_LINEAR, - ISampler::ESMM_LINEAR, - 0u, - false, - ECO_ALWAYS - }; - auto sampler0 = m_device->createSampler(samplerParams0); - ISampler::SParams samplerParams1 = { - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::ETBC_INT_OPAQUE_BLACK, - ISampler::ETF_NEAREST, - ISampler::ETF_NEAREST, - ISampler::ESMM_NEAREST, - 0u, - false, - ECO_ALWAYS - }; - auto sampler1 = m_device->createSampler(samplerParams1); - - std::array writeDSInfos = {}; - writeDSInfos[0].desc = m_outImgView; - writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; - // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; - // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - std::array writeDescriptorSets = {}; - writeDescriptorSets[0] = { - .dstSet = m_descriptorSet0.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[0] - }; - writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[1] - }; - writeDescriptorSets[2] = { - .dstSet = m_descriptorSet2.get(), - .binding = 1, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[2] - }; - writeDescriptorSets[3] = { - .dstSet = m_descriptorSet2.get(), - .binding = 2, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[3] - }; - writeDescriptorSets[4] = { - .dstSet = m_presentDescriptorSet.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[4] - }; - - m_device->updateDescriptorSets(writeDescriptorSets, {}); - } - - // Create ui descriptors - { - using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; - { - IGPUSampler::SParams params; - params.AnisotropicFilter = 1u; - params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - - m_ui.samplers.gui = m_device->createSampler(params); - m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); - } - - std::array, 69u> immutableSamplers; - for (auto& it : immutableSamplers) - it = smart_refctd_ptr(m_ui.samplers.scene); - - immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); - - nbl::ext::imgui::UI::SCreationParameters params; - - params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; - params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; - params.assetManager = m_assetMgr; - params.pipelineCache = nullptr; - params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); - params.renderpass = smart_refctd_ptr(renderpass); - params.streamingBuffer = nullptr; - params.subpassIx = 0u; - params.transfer = getTransferUpQueue(); - params.utilities = m_utils; - { - m_ui.manager = ext::imgui::UI::create(std::move(params)); - - // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources - const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - const auto& params = m_ui.manager->getCreationParameters(); - - IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; - descriptorPoolInfo.maxSets = 1u; - descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; - - m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); - assert(m_guiDescriptorSetPool); - - m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); - assert(m_ui.descriptorSet); - } - } - m_ui.manager->registerListener( - [this]() -> void { - ImGuiIO& io = ImGui::GetIO(); - - m_camera.setProjectionMatrix([&]() - { - static hlsl::float32_t4x4 projection; - - projection = hlsl::math::thin_lens::rhPerspectiveFovMatrix(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); - - return projection; - }()); - - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Controls"); - - ImGui::SameLine(); - - ImGui::Text("Camera"); - - ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); - ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - ImGui::ListBox("Shader", &PTPipline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); - ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); - ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 6); - - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); - - ImGui::End(); - } - ); - - // Set Camera - { - core::vectorSIMDf cameraPosition(0, 5, -10); - hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( - core::radians(60.0f), - float(WindowDimensions.x / WindowDimensions.y), - 0.01f, - 500.0f - ); - m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); - } - - m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); - m_surface->recreateSwapchain(); - m_winMgr->show(m_window.get()); - m_oracle.reportBeginFrameRecord(); - m_camera.mapKeysToWASD(); - - return true; - } - - bool updateGUIDescriptorSet() - { - // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout - static std::array descriptorInfo; - static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; - - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); - - for (uint32_t i = 0; i < descriptorInfo.size(); ++i) - { - writes[i].dstSet = m_ui.descriptorSet.get(); - writes[i].binding = 0u; - writes[i].arrayElement = i; - writes[i].count = 1u; - } - writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; - - return m_device->updateDescriptorSets(writes, {}); - } - - inline void workLoopBody() override - { - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) - { - const ISemaphore::SWaitInfo cbDonePending[] = - { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - } - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - - m_api->startCapture(); - - // CPU events - update(); - - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[resourceIx].get(); - - if (!keepRunning()) - return; - - // render whole scene to offline frame buffer & submit - { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; - pc.invMVP = hlsl::inverse(viewProjectionMatrix); - pc.sampleCount = spp; - pc.depth = depth; - - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // cube envmap handle - { - auto pipeline = m_PTPipelines[PTPipline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); - cmdbuf->dispatch(1 + (WindowDimensions.x - 1) / DefaultWorkGroupSize, 1 + (WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u); - } - - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // TODO: tone mapping and stuff - } - - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WindowDimensions.x; - viewport.height = WindowDimensions.y; - } - cmdbuf->setViewport(0u, 1u, &viewport); - - - VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; - cmdbuf->setScissor(defaultScisors); - - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - auto scRes = static_cast(m_surface->getSwapchainResources()); - - // Upload m_outImg to swapchain + UI - { - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clearColor, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea - }; - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; - - cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); - - const auto uiParams = m_ui.manager->getCreationParameters(); - auto* uiPipeline = m_ui.manager->getPipeline(); - cmdbuf->bindGraphicsPipeline(uiPipeline); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); - m_ui.manager->render(cmdbuf, waitInfo); - - cmdbuf->endRenderPass(); - } - - cmdbuf->end(); - { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = - { - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT - } - }; - { - { - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cmdbuf } - }; - - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; - - updateGUIDescriptorSet(); - - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; - } - } - - m_window->setCaption("[Nabla Engine] Computer Path Tracer"); - m_surface->present(m_currentImageAcquire.imageIndex, rendered); - } - m_api->endCapture(); - } - - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; - - return true; - } - - inline bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } - - inline void update() - { - m_camera.setMoveSpeed(moveSpeed); - m_camera.setRotateSpeed(rotateSpeed); - - static std::chrono::microseconds previousEventTimestamp{}; - - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); - - auto updatePresentationTimestamp = [&]() - { - m_currentImageAcquire = m_surface->acquireNextImage(); - - m_oracle.reportEndFrameRecord(); - const auto timestamp = m_oracle.getNextPresentationTimeStamp(); - m_oracle.reportBeginFrameRecord(); - - return timestamp; - }; - - const auto nextPresentationTimestamp = updatePresentationTimestamp(); - - struct - { - std::vector mouse{}; - std::vector keyboard{}; - } capturedEvents; - - m_camera.beginInputProcessing(nextPresentationTimestamp); - { - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void - { - m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl - - for (const auto& e : events) // here capture - { - if (e.timeStamp < previousEventTimestamp) - continue; - - previousEventTimestamp = e.timeStamp; - capturedEvents.mouse.emplace_back(e); - - if (e.type == nbl::ui::SMouseEvent::EET_SCROLL) - gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(e.scrollEvent.verticalScroll)), int64_t(0), int64_t(ELG_COUNT - (uint8_t)1u)); - } - }, m_logger.get()); - - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void - { - m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl - - for (const auto& e : events) // here capture - { - if (e.timeStamp < previousEventTimestamp) - continue; - - previousEventTimestamp = e.timeStamp; - capturedEvents.keyboard.emplace_back(e); - } - }, m_logger.get()); - } - m_camera.endInputProcessing(nextPresentationTimestamp); - - const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); - const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); - - const ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = mousePosition, - .displaySize = { m_window->getWidth(), m_window->getHeight() }, - .mouseEvents = mouseEvents, - .keyboardEvents = keyboardEvents - }; - - m_ui.manager->update(params); - } - - private: - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; - - // gpu resources - smart_refctd_ptr m_cmdPool; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTPipelines; - smart_refctd_ptr m_presentPipeline; - uint64_t m_realFrameIx = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; - - core::smart_refctd_ptr m_guiDescriptorSetPool; - - // system resources - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; - - // pathtracer resources - smart_refctd_ptr m_envMapView, m_scrambleView; - smart_refctd_ptr m_sequenceBufferView; - smart_refctd_ptr m_outImgView; - - // sync - smart_refctd_ptr m_semaphore; - - // image upload resources - smart_refctd_ptr m_scratchSemaphore; - SIntendedSubmitInfo m_intendedSubmit; - - struct C_UI - { - nbl::core::smart_refctd_ptr manager; - - struct - { - core::smart_refctd_ptr gui, scene; - } samplers; - - core::smart_refctd_ptr descriptorSet; - } m_ui; - - Camera m_camera; - - video::CDumbPresentationOracle m_oracle; - - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed - - float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; - int PTPipline = E_LIGHT_GEOMETRY::ELG_SPHERE; - int spp = 32; - int depth = 3; - - bool m_firstFrame = true; - IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; -}; - -NBL_MAIN_FUNC(ComputeShaderPathtracer) \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/pipeline.groovy b/30_ComputeShaderPathTracer/pipeline.groovy deleted file mode 100644 index eb20d0c5a..000000000 --- a/30_ComputeShaderPathTracer/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CComputeShaderPathTracerBuilder extends IBuilder -{ - public CComputeShaderPathTracerBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CComputeShaderPathTracerBuilder(_agent, _info) -} - -return this diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt index 08f2b8778..88f126f56 100644 --- a/31_HLSLPathTracer/CMakeLists.txt +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -44,6 +44,7 @@ if(NBL_BUILD_IMGUI) set(SM 6_8) set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}/../common/include" -I "${CMAKE_CURRENT_SOURCE_DIR}/include" -I "${CMAKE_CURRENT_SOURCE_DIR}/app_resources/hlsl" -isystem "${NBL_ROOT_PATH}/include" # workaround, the same thing like in IES I will address this issue later diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl index be055bd83..bc8af3c38 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl @@ -16,15 +16,20 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +// #include "nbl/builtin/hlsl/random/pcg.hlsl" #include "nbl/builtin/hlsl/random/xoroshiro.hlsl" -#if PATH_TRACER_ENABLE_PERSISTENT +// #include "nbl/builtin/hlsl/morton.hlsl" -#endif +// #include "nbl/builtin/hlsl/bxdf/reflection.hlsl" #include "nbl/builtin/hlsl/bxdf/transmission.hlsl" +// #include "nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl" #include "nbl/builtin/hlsl/path_tracing/unidirectional.hlsl" + +#include "nbl/examples/common/KeyedQuantizedSequence.hlsl" + #include "render_common.hlsl" #if PATH_TRACER_USE_RWMC @@ -53,7 +58,6 @@ #endif #include "example_common.hlsl" -#include "rand_gen.hlsl" #include "intersector.hlsl" #include "material_system.hlsl" #include "next_event_estimator.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl index c5589d800..28e5ada2b 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl @@ -24,7 +24,7 @@ using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; using payload_type = Payload; -using randgen_type = RandomUniformND; +using randgen_type = examples::KeyedQuantizedSequence; using material_system_type = MaterialSystem; #if PATH_TRACER_USE_RWMC @@ -87,7 +87,7 @@ void tracePixel(int32_t2 coords) } scene_type scene; - scene.updateLight(renderPushConstants.generalPurposeLightMatrix); + scene.updateLight(renderPushConstants.lightMatrix()); typename variant_types::raygen_type rayGen; rayGen.pixOffsetParam = pixOffsetParam; @@ -96,7 +96,9 @@ void tracePixel(int32_t2 coords) rayGen.invMVP = renderPushConstants.invMVP; pathtracer.scene = scene; - pathtracer.randGen = randgen_type::create(::scramblebuf[coords].rg, renderPushConstants.pSampleSequence); + pathtracer.randGen.pSampleBuffer = renderPushConstants.pSampleSequence; + pathtracer.randGen.rng = Xoroshiro64Star::construct(scramblebuf[coords].rg); + pathtracer.randGen.sequenceSamplesLog2 = renderPushConstants.sequenceSampleCountLog2; pathtracer.nee.lights = lights; pathtracer.materialSystem.bxdfs = bxdfs; pathtracer.bxdfPdfThreshold = 0.0001; @@ -109,7 +111,7 @@ void tracePixel(int32_t2 coords) accumulator_type accumulator = accumulator_type::create(); #endif - for (int i = 0; i < renderPushConstants.sampleCount; ++i) + for (uint32_t i = 0u; i < renderPushConstants.sampleCount; ++i) { const float32_t3 uvw = pathtracer.randGen(0u, i); typename variant_types::ray_type ray = rayGen.generate(uvw); diff --git a/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl deleted file mode 100644 index ece23374d..000000000 --- a/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _PATHTRACER_EXAMPLE_RANDGEN_INCLUDED_ -#define _PATHTRACER_EXAMPLE_RANDGEN_INCLUDED_ - -#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" -#include "nbl/builtin/hlsl/random/dim_adaptor_recursive.hlsl" - -#include "render_common.hlsl" - -using namespace nbl; -using namespace hlsl; - -template -struct RandomUniformND -{ - using rng_type = RNG; - using return_type = vector; - - static RandomUniformND create(uint32_t2 seed, uint64_t pSampleSequence) - { - RandomUniformND retval; - retval.rng = rng_type::construct(seed); - retval.pSampleBuffer = pSampleSequence; - return retval; - } - - // baseDimension: offset index of the sequence - // sampleIndex: iteration number of current pixel (samples per pixel) - return_type operator()(uint32_t baseDimension, uint32_t sampleIndex) - { - using sequence_type = sampling::QuantizedSequence; - uint32_t address = glsl::bitfieldInsert(baseDimension, sampleIndex, MaxDepthLog2, MaxSamplesLog2); - sequence_type tmpSeq = vk::RawBufferLoad(pSampleBuffer + address * sizeof(sequence_type)); - return tmpSeq.template decode(random::DimAdaptorRecursive::__call(rng)); - } - - rng_type rng; - uint64_t pSampleBuffer; -}; - -#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index f69496c48..d4e48d4bf 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -5,22 +5,42 @@ using namespace nbl; using namespace hlsl; +#define MAX_DEPTH_LOG2 4 +#define MAX_SAMPLES_LOG2 10 +NBL_CONSTEXPR uint32_t MaxDepthLog2 = MAX_DEPTH_LOG2; +NBL_CONSTEXPR uint32_t MaxSamplesLog2 = MAX_SAMPLES_LOG2; + struct RenderPushConstants { - float32_t4x4 invMVP; - float32_t3x4 generalPurposeLightMatrix; - int sampleCount; - int depth; + float32_t3x4 lightMatrix() + { + float32_t4x3 retval; + retval[0] = lightX; + retval[1] = lightY; + retval[2] = cross(lightX,lightY)*lightZscale; + retval[3] = lightPos; + return hlsl::transpose(retval); + } + uint64_t pSampleSequence; + float32_t4x4 invMVP; + float32_t3 lightX; + float32_t3 lightY; + float32_t lightZscale; + float32_t3 lightPos; + // TODO: compact a bit and refactor + uint32_t sampleCount : MAX_SAMPLES_LOG2; + uint32_t depth : MAX_DEPTH_LOG2; + uint32_t sequenceSampleCountLog2 : 5; + uint32_t unused : 13; + uint32_t unused1; }; +#undef MAX_SAMPLES_LOG2 +#undef MAX_DEPTH_LOG2 NBL_CONSTEXPR float32_t3 LightEminence = float32_t3(30.0f, 25.0f, 15.0f); NBL_CONSTEXPR uint32_t RenderWorkgroupSizeSqrt = 8u; NBL_CONSTEXPR uint32_t RenderWorkgroupSize = RenderWorkgroupSizeSqrt*RenderWorkgroupSizeSqrt; -NBL_CONSTEXPR uint32_t MaxDepthLog2 = 4u; -NBL_CONSTEXPR uint32_t MaxSamplesLog2 = 10u; -NBL_CONSTEXPR uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; -NBL_CONSTEXPR uint32_t MaxSamplesBuffer = 1u << MaxSamplesLog2; NBL_CONSTEXPR uint32_t MaxDescriptorCount = 256u; NBL_CONSTEXPR uint16_t MaxUITextureCount = 1u; diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 7fb5ae381..b7281ea22 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -2,6 +2,10 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/examples/examples.hpp" +#include "nbl/examples/common/CCachedOwenScrambledSequence.hpp" + #include "argparse/argparse.hpp" #include "nbl/examples/examples.hpp" #include "nbl/this_example/path_tracer_pipeline_state.hpp" @@ -10,14 +14,16 @@ #include "nbl/this_example/transform.hpp" #include "nbl/this_example/render_variant_strings.hpp" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + #include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" + #include "nbl/this_example/common.hpp" #include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" #include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" #include "nbl/asset/utils/ISPIRVEntryPointTrimmer.h" #include "nbl/system/ModuleLookupUtils.h" -#include "nbl/examples/common/ScrambleSequence.hpp" +#include "nbl/examples/common/CCachedOwenScrambledSequence.hpp" #include "app_resources/hlsl/render_common.hlsl" #include "app_resources/hlsl/render_rwmc_common.hlsl" #include "app_resources/hlsl/resolve_common.hlsl" @@ -34,6 +40,7 @@ #include "nlohmann/json.hpp" + using namespace nbl; using namespace core; using namespace hlsl; @@ -59,7 +66,6 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui static constexpr std::string_view BuildConfigName = PATH_TRACER_BUILD_CONFIG_NAME; static constexpr std::string_view RuntimeConfigFilename = "path_tracer.runtime.json"; static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; - static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; public: inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) @@ -126,6 +132,16 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_semaphore) return logFail("Failed to create semaphore!"); } + + auto sequenceFuture = std::async(std::launch::async,[this]()->auto + { + return CCachedOwenScrambledSequence::create({ + .cachePath = (sharedOutputCWD/CCachedOwenScrambledSequence::SCreationParams::DefaultFilename).string(), + .assMan = m_assetMgr.get(), + .header = {.maxSamplesLog2 = MaxSamplesLog2,.maxDimensions = 0x6u<setObjectDebugName("Cascade View"); } - // create sequence buffer - { - ScrambleSequence::SCreationParams params = { - .queue = getGraphicsQueue(), - .utilities = smart_refctd_ptr(m_utils), - .system = smart_refctd_ptr(m_system), - .localOutputCWD = localOutputCWD, - .sharedOutputCWD = sharedOutputCWD, - .owenSamplerCachePath = OwenSamplerFilePath, - .MaxBufferDimensions = MaxBufferDimensions, - .MaxSamplesBuffer = MaxSamplesBuffer, - }; - m_scrambleSequence = ScrambleSequence::create(params); - } - // Update Descriptors { ISampler::SParams samplerParams0 = { @@ -867,8 +868,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui { "method", &guiControlled.polygonMethod, polygonMethodNames.data(), static_cast(polygonMethodNames.size()) }, }; const this_example::pt_ui::SIntSliderRow renderIntRows[] = { - { "spp", &guiControlled.spp, 1, MaxSamplesBuffer }, - { "depth", &guiControlled.depth, 1, MaxBufferDimensions / 4 }, + { "spp", &guiControlled.spp, 1, (0x1u<getHeader().maxSamplesLog2; + auto* const seqBufferCPU = sequence->getBuffer(); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=getGraphicsQueue()},IGPUBuffer::SCreationParams{seqBufferCPU->getCreationParams()},seqBufferCPU->getPointer()).move_into(m_sequenceBuffer); + m_sequenceBuffer->setObjectDebugName("Low Discrepancy Sequence"); + } + return true; } @@ -1207,22 +1218,25 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui const float32_t4x4 modelViewProjectionMatrix = nbl::hlsl::math::linalg::promoted_mul(viewProjectionMatrix, modelMatrix); const float32_t4x4 invMVP = hlsl::inverse(modelViewProjectionMatrix); - if (guiControlled.useRWMC) + pc.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + pc.invMVP = invMVP; { - rwmcPushConstants.renderPushConstants.invMVP = invMVP; - rwmcPushConstants.renderPushConstants.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); - rwmcPushConstants.renderPushConstants.depth = guiControlled.depth; - rwmcPushConstants.renderPushConstants.sampleCount = guiControlled.rwmcParams.sampleCount = guiControlled.spp; - rwmcPushConstants.renderPushConstants.pSampleSequence = m_scrambleSequence->buffer->getDeviceAddress(); - rwmcPushConstants.splattingParameters = rwmc::SPackedSplattingParameters::create(guiControlled.rwmcParams.base, guiControlled.rwmcParams.start, CascadeCount); + const auto matT = hlsl::float32_t4x3(m_lightModelMatrix); + pc.lightX = matT[0]; + pc.lightY = matT[1]; + // Z had a length and a direction, can point colinear or opposite cross product + const auto recon = hlsl::cross(matT[0], matT[1]); + pc.lightZscale = hlsl::sign(hlsl::dot(recon,matT[2]))*hlsl::length(matT[2])/hlsl::length(recon); + pc.lightPos = matT[3]; + assert(pc.lightMatrix()==hlsl::transpose(matT)); } - else + pc.sampleCount = guiControlled.spp; + pc.depth = guiControlled.depth; + pc.sequenceSampleCountLog2 = m_sequenceSamplesLog2; + if (guiControlled.useRWMC) { - pc.invMVP = invMVP; - pc.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); - pc.sampleCount = guiControlled.spp; - pc.depth = guiControlled.depth; - pc.pSampleSequence = m_scrambleSequence->buffer->getDeviceAddress(); + rwmcPushConstants.renderPushConstants = pc; + rwmcPushConstants.splattingParameters = rwmc::SPackedSplattingParameters::create(guiControlled.rwmcParams.base, guiControlled.rwmcParams.start, CascadeCount); } }; updatePathtracerPushConstants(); @@ -2564,10 +2578,10 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui // pathtracer resources smart_refctd_ptr m_envMapView, m_scrambleView; - //smart_refctd_ptr m_sequenceBuffer; - smart_refctd_ptr m_scrambleSequence; + smart_refctd_ptr m_sequenceBuffer; smart_refctd_ptr m_outImgView; smart_refctd_ptr m_cascadeView; + uint8_t m_sequenceSamplesLog2; // sync smart_refctd_ptr m_semaphore; diff --git a/40_PathTracer/CMakeLists.txt b/40_PathTracer/CMakeLists.txt index 8c0fbae51..cfdee9493 100644 --- a/40_PathTracer/CMakeLists.txt +++ b/40_PathTracer/CMakeLists.txt @@ -4,6 +4,7 @@ set(NBL_INCLUDE_SERACH_DIRECTORIES "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" "${CMAKE_CURRENT_SOURCE_DIR}/include" "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/app_resources" ) set(NBL_LIBRARIES "${NBL_EXT_MITSUBA_LOADER_LIB}" @@ -47,6 +48,7 @@ set(JSON [=[ string(CONFIGURE "${JSON}" JSON) set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}/../common/include" -I "${CMAKE_CURRENT_SOURCE_DIR}/include" -T lib_${SM} ) diff --git a/40_PathTracer/app_resources/pathtrace/beauty.hlsl b/40_PathTracer/app_resources/pathtrace/beauty.hlsl index 39132326a..6e51e4c99 100644 --- a/40_PathTracer/app_resources/pathtrace/beauty.hlsl +++ b/40_PathTracer/app_resources/pathtrace/beauty.hlsl @@ -1,31 +1,352 @@ -#include "renderer/shaders/pathtrace/common.hlsl" -using namespace nbl::hlsl; -using namespace nbl::this_example; +#include "common.hlsl" [[vk::push_constant]] SBeautyPushConstants pc; -struct[raypayload] BeautyPayload +// There's actually a huge problem with doing any throughput or accumulation modification in AnyHit shaders, they run out of order (BVH order) and a hit behind your eventual closest hit can invoke the anyhit stage. +// +// Most examples which multiply alpha in anyhit are super misleading, because: +// - for shadow / anyhit rays you either eventually hit an opaque (leading to a mul/replacement of transparency by 0) or you hit all opaques along the ray +// - for NEE rays you often have a finite tMax and this stops you accumulating translucency behind the emitter +// - multiplicative operations are order independent, so accumulating the visibility function can happen out of order (basis of many OIT techniques) as long as you know tMax of the closest hit +// - stochastic transparency cancels out the alpha weighting on the throughput, so there's no multiplication to perform, the throughput stays constant no matter what you do. +// Which means it doesn't matter if you perform the test for occluded transparent geometries, you will never know, the alpha on the opaque also cancels out (shouldn't use premultiplied to shade). +// +// However the minute you want to do stochastic RGB translucency the pdf no longer cancels out the RGB weight coefficients. While the application of `opacity/luma(opacity)` from a hit accepted as the closest, +// can be delayed until the closest hit if found, you'll start accumulating the wrong visibility from all the ignored hits. You literally have to use stochastic monochrome transparency. +// +// Furthermore the minute you wish to add emission to the accumulation in the payload you run into Order Dependent Transparency because it requires a blend over operator. +// +// The solutions are then as follows: +// 1. Only use Anyhit to employ stochastic transparency when the translucency weight is monochrome +// 2. Re-trace rays, find closest hit as with (1), then launch anyhit rays with known tMax - this only gets you correct RGB translucency +// 3. Use OIT techniques (A-Buffer, MLAB, WBOIT) to estimate the visibility function but without re-tracing need a robust technique which can handle "opaque transparents" +// RGB translucency can be accumulated without sorting an A-Buffer in a O(1) pass over all intersections, also self-balancing tree and MLAB can throw out entries beyond current tMin. +// Note that within a TLAS instances are likely to be traversed approximately in-order, and within a BLAS the primitives are too (see CWBVH8 paper with children visit order depending on ray direction signs). +// Therefore a two tier linked list + insertion sort are a viable alternative to a self-balancing tree. To allow for emittance to be contributed by anyhit stage, it would need to be deferred to be performant, +// the hit attributes would need to be stored alongside the translucency, so at least instance ID (possibly material ID or SBT offset), primitive ID, and the barycentrics. +// 4. Decompose the Complex Mixture Material into a Scalar Delta Transmission plus the rest of the BxDF. The motivation is simple, for monochrome materials we have +// DeltaTransmission*(1-alpha) + alpha*(Rest of BxDF Nodes with their Weights) +// Where the thing getting factored is a blackbox sum of contributors, but we can reformulate any BxDF as +// DeltaTransmission*Factor + (Rest of BxDF Nodes with their Weights) +// Then we can simply break down the transmissive part into a monochrome part and a coloured residual, if we're unwilling to get into negative weights only option is `Transparency = min_element(Factor[0],...)` +// DeltaTransmission * Transparency + (DeltaTransmission * (Factor-Transparency) + Rest of BxDF Nodes with their Weights) +// We can still use stochastic transparency! Its just that whenever we accept a hit, we need pass `transparency` at the point of acceptance to the closest hit shader as to compute this +// (DeltaTransmission * (Factor-Transparency) + Rest of BxDF Nodes with their Weights)/(1-Transparency) +// Since Transparency can be just an approximation of the `Factor` in a monochrome form (luma) or its minimum, already computed or fetched data could be passed in payload for accepted hit +// +// MOST IMPORTANT THING: AFTER ANYHIT ACCEPTS, ANOTHER MAY ACCEPT THATS CLOSER! +// This is very important to keep in mind when we do our Solid Angle Sampling. +// +// Anyhit needs to pass the transparency probability to any closest hit it accepts and which then becomes the final anyhit +struct[raypayload] SAnyHitRetval { - uint32_t instanceID : read(caller):write(closesthit); -// float16_t3 normal : read(caller):write(closesthit); + // before sending the ray by the caller + inline void init() + { + rayT = hlsl::numeric_limits::max; + } + // call in AnyHit instead of AcceptHit + inline void acceptHit(const float16_t _transparency) + { + // need to read the spec if an anyhit is possible that the last anyhit to run and accept a hit candidate for a ray is not the last one to + if (rayT>spirv::RayTmaxKHR) + { + rayT = spirv::RayTmaxKHR; + transparency = _transparency; + } + // TODO: call accept Hit intrinsic + } + // + + // opacity russian roulette requires this for Discrete Probability Sampling + float32_t xi : read(anyhit) : write(caller,anyhit); + // need to store the t value at which the anyhit was executed, so we know whether the current closest hit comes from a confirmed anyhit + float32_t rayT : read(caller,anyhit) : write(caller,anyhit); + // essentially the probability of transmission + float16_t transparency : read(caller) : write(anyhit); + // can use additional `float16` to store BxDF mixture weights or other things so they don't need recomputing/re-fetching during shading }; + + +// Because SER based on Material ID will probably greatly benefit us, the shading needs to happen in Raygen Shader or ClosestHit executed directly by Raygen +// Lets examine what happens in the 3 options of Shading with SER Hit Objects: +// 1. Fused hitObjectTraceReorderExecuteEXT -> shading in Closest Hit +// Miss and Closest hit still called immediately, Shading happens in both of them, only need payload to store anyhit + random number state (depth and optionally the seed), but `SClosestHitRetval` gets passed to a shading function +// 2. hitObjectTraceRayEXT && Shading in Closest Hit with hitObjectExecuteShaderEXT +// Only Anyhit payload needed, separate `SClosestHitRetval` payload is made in raygen and passed to the hitObjectExecuteShaderEXT, miss shader is not used +// 3. hitObjectTraceRayEXT && Shading in Raygen +// Only Anyhit payload needed, separate `SClosestHitRetval` is made and passed to traceRay, no closest hit shaders at all +struct SClosestHitRetval +{ + float32_t3 hitPos; + // to interpolate our vertex attributes + float32_t2 barycentrics; + // to get our material and geometry data back + uint32_t instancedGeometryID; + // to get particular Triangle's indices + uint32_t primitiveID; + // + float32_t3 geometricNormal; +}; + + +// This payload will eventually not be needed with SER, and only the one below will be used +struct [raypayload] BeautyPayload +{ + inline void markAsMissed() + { + closestRet.geometricNormal = 45.f; + } + inline bool hasMissed() {return closestRet.geometricNormal[0]>1.f;} + + SClosestHitRetval closestRet : read(caller) : write(caller,closesthit); +}; + + +enum E_SBT_OFFSETS : uint16_t +{ + ESBTO_PATH, + ESBTO_NEE +}; + +// TODO: do a function with MIS to do envmap lighting + [shader("raygeneration")] void raygen() { - const uint32_t3 launchID = spirv::LaunchIdKHR; - const uint32_t3 launchSize = spirv::LaunchSizeKHR; + const uint16_t3 launchID = uint16_t3(spirv::LaunchIdKHR); + + const SBeautyPushConstants::S16BitData unpacked16BitPC = pc.get16BitData(); + + // Take n samples per frame + // TODO: establish min/max - adaptive sampling + uint16_t samplesThisFrame = unpacked16BitPC.maxSppPerDispatch; + SPixelSamplingInfo samplingInfo = advanceSampleCount(launchID,samplesThisFrame,uint16_t(pc.sensorDynamics.keepAccumulating)); + // took 64k-1 spp + if (samplingInfo.rcpNewSampleCount==0.f) + return; + // weight for non RWMC contribution + const float16_t newSamplesOverTotal = float16_t(float32_t(samplesThisFrame)*samplingInfo.rcpNewSampleCount); + const float16_t rcpSamplesThisFrame = float16_t(1)/float16_t(samplesThisFrame); + + float16_t transparency = 0.f; + SArbitraryOutputValues aovs; + aovs.clear(); + [[loop]] for (uint16_t sampleIndex=samplingInfo.firstSample; sampleIndex!=samplingInfo.newSampleCount; sampleIndex++) + { + // For RWMC to work, every sample must be splatted individually + accum_t color; - gAlbedo[launchID] = float32_t4(float32_t3(launchID)/float32_t3(launchSize),1.f); + const uint32_t PrimaryRayRandTripletsUsed = 2; + // trace primary ray + float32_t3 rayOrigin,rayDir; + // + bool missed; + SClosestHitRetval closestInfo; + { + // fetch random variable from memory + const float32_t3 randVec = samplingInfo.randgen(0u,sampleIndex); + // TODO: motion blur and lens DOF triplet + + // get our NDC coordinates and ray + const float32_t2 pixelSizeNDC = promote(2.f)/float32_t2(spirv::LaunchSizeKHR.xy); + const float32_t2 NDC = float32_t2(launchID.xy)*pixelSizeNDC - promote(1.f); + const SRay ray = SRay::create(pc.sensorDynamics,pixelSizeNDC,NDC,float16_t2(randVec.xy)); + // TODO: possible SER point if doing variable spp + + // TODO: when doing anyhit opacity pass `randVec.z` into the payload + [[vk::ext_storage_class(spv::StorageClassRayPayloadKHR)]] BeautyPayload payload; + payload.markAsMissed(); + spirv::traceRayKHR(gTLASes[0],spv::RayFlagsMaskNone,0xff,ESBTO_PATH,0u,ESBTO_PATH,ray.origin,ray.tMin,ray.direction,ray.tMax,payload); + + // + missed = payload.hasMissed(); + if (missed) + { + const SEnvSample _sample = sampleEnv(rayDir); + color = _sample.color; + aovs = aovs + _sample.aov * rcpSamplesThisFrame; + transparency += rcpSamplesThisFrame; + } + else // TODO: erase the `missed` variable and setup the struct in "wasAHit" + { + closestInfo = payload.closestRet; + rayOrigin = ray.origin; + rayDir = ray.direction; + } + } + // trace further rays + if (!missed) + { + // + MaxContributionEstimator contribEstimator = MaxContributionEstimator::create(unpacked16BitPC.rrThroughputWeights); + const uint16_t lastPathDepth = gSensor.lastPathDepth; + // + color = accum_t(0,0,0); + spectral_t throughput = spectral_t(1,1,1); + float32_t otherTechniqueHeuristic = 0.f; + SAOVThroughputs aovThroughput; + aovThroughput.clear(rcpSamplesThisFrame); + // [0].xyz for BRDF Lobe sampling, then reuse [0].z for Russian Roulette, [1].xyz for BTDF Lobe sampling and [1].z for RIS lobe resampling, [2].xyz for NEE + const uint16_t RandDimTriplesPerDepth = 3; + [[loop]] for (uint16_t depth=1; true; depth++) // ideally peel this loop once + { + // TODO: get the material ID and UVs + + float32_t3 shadingNormal = closestInfo.geometricNormal; + + // TODO: possible SER point based on NEE status, and material flags + + // TODO: get AoVs from material and emission + SAOVThroughputs nextThroughput; + nextThroughput.albedo = float16_t3(0,0,0); + nextThroughput.transparency = 0.f; + SArbitraryOutputValues aovContrib; + aovContrib.albedo = float16_t3(1,1,1); + aovContrib.normal = float16_t3(shadingNormal); + // obtain full next + nextThroughput = aovThroughput * nextThroughput; + // already premultiplied by next throughput complement + aovs = aovs + aovContrib * (aovThroughput - nextThroughput); + aovThroughput = nextThroughput; + + // TODO: handle emission and do NEE MIS for any emission found on current hit + if (false) + { + // get emission stream + float16_t3 emission = float16_t3(0,0,0); + // compute emission + const float32_t WeightThreshold = hlsl::numeric_limits::min; + if (otherTechniqueHeuristic>WeightThreshold) + { + // compute NEE MIS backward weight on the contribution color + // assert not inf + // apply emissive weight + } + // add emissive to the contribution + color += emission*float16_t3(throughput); + } + + // to keep path depths equal for NEE and BxDF sampling, we can't continue and do NEE + if (depth==lastPathDepth) + break; + + // get next random number, compensate for the triplets ray generation used + const uint16_t sequenceProtoDim = (depth-1)*RandDimTriplesPerDepth+PrimaryRayRandTripletsUsed; + float32_t3 randVec = samplingInfo.randgen(sequenceProtoDim,sampleIndex); + + // perform NEE + float32_t neeProb = 0.f; + if (neeProb) + { + if (true) // whether to perform NEE at all for this material + { + // choose regular lights or envmap + + // TODO: SER point, top bits are NEE kind (none, regular light, envmap, then use bits of NEE random number and current position) + + // perform the NEE sampling + + // compute BxDF eval value, another layer of culling + + // trace shadow rays only for contributing samples + + // TODO: another possible SER point before casting shadow rays + } + } + + // TODO: perform shading + { + // TODO: SER point, top bits are material Flags and ID geting executed + + const float pdf = 1.f / 3.14159f; + // consume additional 3 dimensions BTDF sampling and resampling + rayDir = shadingNormal; + color /= pdf; + throughput = throughput / pdf; + // + otherTechniqueHeuristic = 1.f/pdf; + } + + // to keep path depths equal for NEE and BxDF sampling, we + if (contribEstimator.notCulled(throughput,depth<=gSensor.lastNoRussianRouletteDepth,randVec.z)) + { + // advance ray origin + rayOrigin = closestInfo.hitPos; + + // continue the path + { + // TODO: when doing anyhit opacity pass `randVec.z` into the payload + [[vk::ext_storage_class(spv::StorageClassRayPayloadKHR)]] BeautyPayload payload; + payload.markAsMissed(); + // TODO: start at 0 or numeric_limits::min? + const float32_t tMin = 0.f; + spirv::traceRayKHR(gTLASes[0],spv::RayFlagsMaskNone,0xff,ESBTO_PATH,0u,ESBTO_PATH,rayOrigin,tMin,rayDir,hlsl::numeric_limits::max,payload); + if (payload.hasMissed()) + { + SEnvSample _sample = sampleEnv(rayDir); + if (otherTechniqueHeuristic>0.f) + { + // compute NEE MIS backward weight + // assert not inf + // apply MIS to adjust _sample.color + } + color += _sample.color*throughput; + aovs = aovs + _sample.aov*aovThroughput; + transparency += aovThroughput.transparency; + break; + } + } + } + } + } + // color output + // Accumulator beautyAcc; + // beautyAcc.accumulate(launchID.xy,launchID.z,float32_t3(accumulation.color),samplingInfo.rcpNewSampleCount); + } + // albedo + Accumulator albedoAcc; + albedoAcc.accumulate(launchID.xy,launchID.z,aovs.albedo,newSamplesOverTotal); + // normal + Accumulator normalAcc; + normalAcc.accumulate(launchID.xy,launchID.z,correctSNorm10WhenStoringToUnorm(hlsl::normalize(aovs.normal)),newSamplesOverTotal); + // TODO: motion + // mask + Accumulator maskAcc; + vector opacity = float16_t(1)-transparency; + maskAcc.accumulate(launchID.xy,launchID.z,opacity,newSamplesOverTotal); } + [shader("closesthit")] -void closesthit(inout BeautyPayload payload, in BuiltInTriangleIntersectionAttributes attribs) +void closestHit(inout BeautyPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { + SClosestHitRetval closestHitReturn; + // Which method of barycentric interpolation is more precise? Pick your poison! +#define POSITION_RECON_METHOD 0 +#if POSITION_RECON_METHOD!=0 + // compute worldspace hit position + const float32_t3 vertices[3] = spirv::HitTriangleVertexPositionsKHR; +#if POSITION_RECON_METHOD!=2 + // This way at least we stay within the triangle, and compiler can do CSE with the geometric normal calculation + const float32_t3 modelSpacePos = vertices[0] + (vertices[1]-vertices[0]) * attribs.barycentrics[0] + (vertices[2] - vertices[0]) * attribs.barycentrics[1]; +#else + // This way we get less catastrophic cancellation by adding and computing the edges, but can end up outside the triangle + const float32_t modelSpacePos = vertices[0] * (1.f-attribs.barycentrics.u-attribs.barycentrics.v) + vertices[1] * attribs.barycentrics.u + vertices[2] * attribs.barycentrics.v; +#endif + closestHitReturn.hitPos = math::linalg::promoted_mul(spirv::ObjectToWorldKHR,modelSpacePos); +#else + // the way that raytracers have done this before SPV_KHR_ray_tracing_position_fetch + closestHitReturn.hitPos = spirv::WorldRayOriginKHR + spirv::WorldRayDirectionKHR * spirv::RayTmaxKHR; +#endif +#undef POSITION_RECON_METHOD + closestHitReturn.barycentrics = attribs.barycentrics; + closestHitReturn.instancedGeometryID = spirv::InstanceCustomIndexKHR + spirv::RayGeometryIndexKHR; + closestHitReturn.primitiveID = spirv::PrimitiveId; + closestHitReturn.geometricNormal = reconstructGeometricNormal(); + payload.closestRet = closestHitReturn; } -[shader("miss")] -void miss(inout BeautyPayload payload) -{ -} \ No newline at end of file +// TODO: Anyhit transparency \ No newline at end of file diff --git a/40_PathTracer/app_resources/pathtrace/common.hlsl b/40_PathTracer/app_resources/pathtrace/common.hlsl new file mode 100644 index 000000000..709c6407f --- /dev/null +++ b/40_PathTracer/app_resources/pathtrace/common.hlsl @@ -0,0 +1,333 @@ +#include "renderer/shaders/pathtrace/common.hlsl" + +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" + +#include "nbl/examples/common/KeyedQuantizedSequence.hlsl" + + +namespace nbl +{ +namespace this_example +{ + +// There's different ways to accumulate with on-line averaging: +// - one Sample every Frame: `avg + (sample-avg)/N` +// - variable Samples every Frame without skipping: `avg + (sampleSum-avg*sampleCount)/N` or `avg + (sampleAvg-avg)*sampleCount/N` +// the second option has 1 MUL extra compared to regular accumulation, whereas first does 1 MUL extra but it requires cheap averaging +// - variable Samples every Frame with skipping: `avg + (sampleSum-avg*(sampleCount+skippedSamples))/N` equivalently `avg+(sampleSum-avg*(N-oldSamples))/N` +// pre-averaged variant is then `avg + (sampleAvg - avg*(N-oldSamples)/sampleCount)*sampleCount/N` + +template// NBL_PRIMARY_REQUIRES( +// hlsl::concepts::accessors::LoadableImage && +// hlsl::concepts::accessors::StorableImage +//) +struct Accumulator +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t Dimension = LoadStoreImageAccessor::Dimension; + using coded_type = typename LoadStoreImageAccessor::coded_type; + + // TODO: some check that `T` is same integral type and sign + template coord, const uint16_t layer, const vector data, const T rcpNewSampleCount) + { + coded_type val; + + if (rcpNewSampleCount<1.f) + { + composed.get(val,coord,layer); + NBL_UNROLL for (uint16_t i=0; i(hlsl::abs(delta) < LoadStoreImageAccessor::QuantizationThreshold))) + // return; + } + else // clear path + NBL_UNROLL for (uint16_t i=0; i +vector correctSNorm10WhenStoringToUnorm(const vector input) +{ + using vec_t = vector; + return hlsl::mix(input*T(0.499512)+hlsl::promote(0.999022),input*T(0.499512),hlsl::promote(0.f); + +// sample count incrementing function +struct SPixelSamplingInfo +{ + randgen_t randgen; + float32_t rcpNewSampleCount; + uint16_t newSampleCount; + uint16_t firstSample; +}; +SPixelSamplingInfo advanceSampleCount(const uint16_t3 coord, const uint16_t newSamplesThisPixel, const uint16_t dontClear) +{ + SPixelSamplingInfo retval; + // + retval.firstSample = uint16_t(gSampleCount[coord])*dontClear; + // setup randgen + { + retval.randgen.pSampleBuffer = gScene.init.pSampleSequence; + // TODO: experiment with storing every dimension scramble in the texture to not pollute the ray payload + retval.randgen.rng = scramble_state_t::construct(gScrambleKey[uint16_t3(coord.xy & uint16_t(511), 0)]); + retval.randgen.sequenceSamplesLog2 = gScene.init.sequenceSamplesLog2; // TODO: make this compile time constant - Spec Constant? + } + // + retval.newSampleCount = retval.firstSample+newSamplesThisPixel; + gSampleCount[coord] = retval.newSampleCount; + // handle overflow + retval.rcpNewSampleCount = hlsl::select(retval.newSampleCount>retval.firstSample,1.f/float32_t(retval.newSampleCount),0.f); + return retval; +} + +// TODO: split into RayDir +// raygen functions +struct SRay +{ + static SRay create(const SSensorDynamics sensor, const float32_t2 pixelSizeNDC, const float32_t2 ndc, const float16_t2 xi) + { + using namespace nbl::hlsl; + using namespace nbl::hlsl::math::linalg; + + // stochastic reconstruction filter + const float16_t stddev = float16_t(1.2); + const float32_t3 adjNDC = float32_t3(path_tracing::GaussianFilter::create(stddev,stddev).sample(xi)*pixelSizeNDC+ndc,-1.f); + // unproject + const float32_t3 direction = hlsl::normalize(float32_t3(hlsl::mul(sensor.ndcToRay,adjNDC), -1.0)); + const float32_t3 origin = -float32_t3(direction.xy/direction.z,sensor.nearClip); + // rotate with camera + SRay retval; + retval.origin = promoted_mul(sensor.invView,origin); + retval.tMin = sensor.nearClip; + retval.direction = hlsl::normalize(hlsl::mul(truncate<3,3,3,4>(sensor.invView),direction)); + retval.tMax = sensor.tMax; + return retval; + } + + float32_t3 origin; + float32_t tMin; + float32_t3 direction; + float32_t tMax; + // TODO: ray differentials or covariance +}; + +// variables that multiply together +struct SAOVThroughputs +{ + inline void clear(const float16_t weight) + { + albedo = hlsl::promote(transparency = weight); + } + + inline SAOVThroughputs operator-(const SAOVThroughputs factor) + { + SAOVThroughputs retval; + retval.albedo -= factor.albedo; + retval.transparency -= factor.transparency; + return retval; + } + + inline SAOVThroughputs operator*(const float16_t factor) + { + SAOVThroughputs retval; + retval.albedo *= factor; + retval.transparency *= factor; + return retval; + } + inline SAOVThroughputs operator*(const SAOVThroughputs factor) + { + SAOVThroughputs retval; + retval.albedo *= factor.albedo; + retval.transparency *= factor.transparency; + return retval; + } + + inline SAOVThroughputs operator/(const float16_t factor) + { + SAOVThroughputs retval; + retval.albedo /= factor; + retval.transparency /= factor; + return retval; + } + inline SAOVThroughputs operator/(const SAOVThroughputs factor) + { + SAOVThroughputs retval; + retval.albedo /= factor.albedo; + retval.transparency /= factor.transparency; + return retval; + } + + + // RGB transparency of smooth reflections and refractions, used for modulating albedo and most AOVs + float16_t3 albedo; + // Motion is special because Real Time defines it as a mapping of where current pixel was last frame. + // True motion output would require us to implement differentiable rendering and formulate motion as an integral of `Throughput dScreenPos/dTime` which is super tricky because: + // - A turning mirror imparts motion on the reflection of a static object + // - whats the motion vector for a disoccluded part of a reflection? How to even know about a disocclusion/our reflection's motion vector reprojecting badly? + // - its not generally a function, the current pixel could be in multiple places at in the last frame (think about the flow of the reflection of your face in a concave spoon) + // - Non-differentiability, hard edges of triangles and Breps + // - lighting imparts is own motion vectors, e.g. shadows move across static surfaces + // - how to weigh contributions? luma of RGB effect? inidividually, etc. + // TL;DR you can't just blend motions and get something useful (even less than normals), only directly tranmissive paths should be allowed to accumulate their motion vectors (easy to calculate) + // as we pass through surfaces we need to know how much of the outgoing ray distribution is focused around the directly transmissive direction. This can modulate both our masking and motion vectors. + // Albeit for smooth but refractive surfaces we could experiment with accepting transparent masking even though ray direction won't match in a simple Photoshop composting, + // but would you rather have an opaque swimming pool, round glass vase, or water droplet OR composted with no refraction? But then we'd need a motion throughput and track some more metadata. + float16_t transparency; +}; + +using spectral_t = float32_t3; + +// TODO: use the CIE stuff +NBL_CONSTEXPR_INLINE_NSPC_SCOPE_VAR float16_t3 LumaConversionCoeffs = float16_t3(0.39,0.5,0.11); + +struct SArbitraryOutputValues +{ + inline void clear() + { + normal = albedo = float16_t3(0,0,0); + // TODO: motion + } + + inline SArbitraryOutputValues operator+(const SArbitraryOutputValues other) + { + SArbitraryOutputValues retval; + retval.albedo = albedo+other.albedo; + retval.normal = normal+other.normal; + return retval; + } + + inline SArbitraryOutputValues operator*(const float16_t factor) + { + SArbitraryOutputValues retval; + retval.albedo = albedo*factor; + retval.normal = normal*factor; + return retval; + } + + inline SArbitraryOutputValues operator*(const SAOVThroughputs throughput) + { + SArbitraryOutputValues retval; + retval.albedo = albedo*throughput.albedo; + retval.normal = normal*hlsl::dot(throughput.albedo,LumaConversionCoeffs); + return retval; + } + + // AoVs are handled as "special emission", basically the contribution of albedo is same as the material illuminated in a White Furnace + // so for transparent (anyhit) to impart its albedo or normal into the AoV it can add it same way it would add any color emission + float16_t3 albedo; + // One would think that normals can't be blended, but yes they can! Just make sure you weigh then using the Luma of the RGB aovThroughput. + // Here's the problem with dealing with reflections & refractions, the reflection of a wall with an X- normal in the X+ window should have an apparent X+ normal. + // This means that one would need to track the surfaces through which we reflect in a stack along the path, eg: + // `originalNormal - 2 dot(originalNormal, reflectorNormal) * reflectorNormal == (Identity - 2 outerProductMatrix(reflectorNormal)) originalNormal` + // To follow through 2 or more reflections we'd need to multiply these 3x3 matrices together along the ray like so + // `(I - 2 n_0 n_0^T) (I - 2 n_1 n_1^T) = I + 4 n_0 (n_0^T n_1) n_1^T - 2 (n_0 n_0^T + n_1 n_1^T)` + // Theoretically because every series of reflections is just one reflection and a rotation, it could be possible to store this in 3 floats, due to the properties of SO(3) + // "The orthogonal group, consisting of all proper and improper rotations, is generated by reflections. Every proper rotation is the composition of two reflections, a special case of the Cartan–Dieudonné theorem." + // I'm not sure how we could extend that for refractions but probably a similar form is possible - virtual object corresponding under transmission to what's seen under refraction. + // The question is.. is it worth it? Do er really need objects warped by in a labyrynth of wonky mirrors to have warped normals? Or a ceiling reflected in a choppy swimming pool to inherit the pool's wave normals ? + // NO because this is an input to a denoiser to stop it blurring lighting across surfaces oriented in different directions! Doesn't matter what the reflection and refraction normals are as long as they're consistent. + // For the example of a flat building wall reflected in a wavy but smooth reflector, that would actually be a massive self-own and leave behind a lot of noise! + float16_t3 normal; + // TODO: motion (RG vector to past location, B or BA as a measure of spread, e.g. spherical gaussian, direction and its variance, Polar Harmonics - Laplace on a Circle) + //float16_t3or4 motion; +}; + +// accumulated color +using accum_t = float16_t3; + +// only callable from closestHit +inline float32_t3 reconstructGeometricNormal() +{ + using namespace nbl::hlsl; + + // Do diffs in high precision, edges can be very long and dot products can easily overflow 64k max float16_t value and normalizing one extra time makes no sense + const float32_t3 geometricNormal = hlsl::cross( + spirv::HitTriangleVertexPositionsKHR[1]-spirv::HitTriangleVertexPositionsKHR[0], + spirv::HitTriangleVertexPositionsKHR[2]-spirv::HitTriangleVertexPositionsKHR[0] + ); + + // Scales can be absolutely huge, we'd need special per-instance pre-scaled 3x3 matrices and also guarantee `geometricNormal` isn't huge + // this would require a normalization before the matrix multiplication, making everything slower/ + const float32_t3x3 normalMatrix = hlsl::math::linalg::truncate<3,3,3,4>(hlsl::transpose(float32_t4x3(spirv::WorldToObjectKHR))); + // normalization also needs to be done in full floats because length squared can easily be over 64k + return hlsl::normalize(hlsl::mul(normalMatrix,geometricNormal)); +} + + +// This is not only used for Russian Roulette but also for culling low throughput paths early (adds bias but keeps the critical path of the path tracer - pun intended - manageable) +struct MaxContributionEstimator +{ + // TODO: apply inverse exposure so we're sensitive to screen output (previous beauty), but don't go overkill and apply toonemapped luma derivative based on current inverse tonemapping of color accumulation + static inline MaxContributionEstimator create(const float16_t3 constantThroughputWeights) + { + MaxContributionEstimator retval; + // essentially how much can we move the accumulation needle + retval.throughputWeights = constantThroughputWeights; + return retval; + } + + // notCulled instead of culled because of NaN handling + inline bool notCulled(NBL_REF_ARG(float32_t3) throughput, bool skipRussianRoulette, NBL_REF_ARG(float32_t) xi) + { + // recompute after previous hit + const float16_t surviveProb = hlsl::dot(float16_t3(throughput),throughputWeights); + // TODO: prevent "fireflies in AoVs" because AoV targets are not HDR - don't do RR if that will overshoot our albedo and normal contributions + // skipRussianRoulette = skipRussianRoulette && ...; + // cull really low throughput paths (adds bias) + const float16_t RelativeLumaThroughputThreshold = hlsl::numeric_limits::min; + // < instead of <= very important for handling zero probability, note that nextULP correction doesn't need to be applied because we use unclamped probability here + if (surviveProb>RelativeLumaThroughputThreshold && (skipRussianRoulette || xi; - randgen_type randgen = randgen_type::create(gScrambleKey[texCoord], pc.sensorDynamics.pSampleSequence); - float32_t3 NDC = float32_t3(coord * 2.0 - 1.0, -1.0); - - float32_t3 acc_albedo = float32_t3(0,0,0); - float32_t3 acc_normal = float32_t3(0,0,0); - uint32_t sampleCount = pc.sensorDynamics.maxSPP; - float rcpSampleCount = 1.0 / float(sampleCount); - for (uint32_t i = 0; i < sampleCount; i++) - { - float32_t3 randVec = randgen(0u, i); - path_tracing::GaussianFilter filter = path_tracing::GaussianFilter::create(2.5, 1.5); // stochastic reconstruction filter - float32_t3 adjNDC = NDC; - adjNDC.xy += pixOffsetParam * filter.sample(randVec.xy); - float32_t3 direction = hlsl::normalize(float32_t3(hlsl::mul(pc.sensorDynamics.ndcToRay, adjNDC), -1.0)); - float32_t3 origin = -float32_t3(direction.xy/direction.z, pc.sensorDynamics.nearClip); - - RayDesc rayDesc; - rayDesc.Origin = math::linalg::promoted_mul(pc.sensorDynamics.invView, origin); - rayDesc.Direction = hlsl::normalize(hlsl::mul(math::linalg::truncate<3,3,3,4>(pc.sensorDynamics.invView), direction)); - rayDesc.TMin = pc.sensorDynamics.nearClip; - rayDesc.TMax = pc.sensorDynamics.tMax; - - [[vk::ext_storage_class(spv::StorageClassRayPayloadKHR)]] - DebugPayload payload; - payload.albedo = float32_t3(0,0,0); - payload.worldNormal = float32_t3(0,0,0); - spirv::traceRayKHR(gTLASes[0], spv::RayFlagsMaskNone, 0xff, 0u, 0u, 0u, rayDesc.Origin, rayDesc.TMin, rayDesc.Direction, rayDesc.TMax, payload); - - acc_albedo += payload.albedo; - acc_normal += payload.worldNormal * 0.5 + 0.5; + const uint16_t3 launchID = uint16_t3(spirv::LaunchIdKHR); + + SPixelSamplingInfo samplingInfo = advanceSampleCount(launchID,1,uint16_t(pc.sensorDynamics.keepAccumulating)); + // took 64k-1 spp + if (samplingInfo.rcpNewSampleCount==0.f) + return; + + // + const float32_t2 pixelSizeNDC = promote(2.f) / float32_t2(spirv::LaunchSizeKHR.xy); + const float32_t2 NDC = float32_t2(launchID.xy) * pixelSizeNDC - promote(1.f); + + [[vk::ext_storage_class(spv::StorageClassRayPayloadKHR)]] + DebugPayload payload; + // take just one sample per dispatch + { + const float16_t2 randVec = float16_t2(samplingInfo.randgen(0u,samplingInfo.firstSample).xy); + const SRay ray = SRay::create(pc.sensorDynamics,pixelSizeNDC,NDC,randVec); + + payload.aov.clear(); + spirv::traceRayKHR(gTLASes[0], spv::RayFlagsMaskNone, 0xff, 0u, 0u, 0u, ray.origin, ray.tMin, ray.direction, ray.tMax, payload); } - const bool firstFrame = pc.sensorDynamics.rcpFramesDispatched == 1.0; - // clear accumulations totally if beginning a new frame - if (firstFrame) - { - gAlbedo[launchID] = float32_t4(acc_albedo * rcpSampleCount, 1.0); - gNormal[launchID] = float32_t4(acc_normal * rcpSampleCount, 1.0); - } - else - { - float32_t3 prev_albedo = gAlbedo[launchID]; - float32_t3 delta = (acc_albedo * rcpSampleCount - prev_albedo) * pc.sensorDynamics.rcpFramesDispatched; - if (hlsl::any(delta > hlsl::promote(1.0/1024.0))) - gAlbedo[launchID] = float32_t4(prev_albedo + delta, 1.0); + // simple overwrite without accumulation + gRWMCCascades[launchID] = uint32_t2(payload.instanceID,payload.primitiveID); + // can also shove some stuff in `gBeauty`, `gMotion` and `gMask` - float32_t3 prev_normal = gNormal[launchID]; - delta = (acc_normal * rcpSampleCount - prev_normal) * pc.sensorDynamics.rcpFramesDispatched; - if (hlsl::any(delta > hlsl::promote(1.0/512.0))) - gNormal[launchID] = float32_t4(prev_normal + delta, 1.0); - } - + // albedo + Accumulator albedoAcc; + albedoAcc.accumulate(launchID.xy,launchID.z,float32_t3(payload.aov.albedo),samplingInfo.rcpNewSampleCount); + // normal + Accumulator normalAcc; + normalAcc.accumulate(launchID.xy,launchID.z,float32_t3(correctSNorm10WhenStoringToUnorm(payload.aov.normal)),samplingInfo.rcpNewSampleCount); } [shader("closesthit")] void closesthit(inout DebugPayload payload, in BuiltInTriangleIntersectionAttributes attribs) { - const int primID = spirv::PrimitiveId; - const int instanceCustomIndex = spirv::InstanceCustomIndexKHR; - const int geometryIndex = spirv::RayGeometryIndexKHR; - - float32_t3 vertex0 = spirv::HitTriangleVertexPositionsKHR[0]; - float32_t3 vertex1 = spirv::HitTriangleVertexPositionsKHR[1]; - float32_t3 vertex2 = spirv::HitTriangleVertexPositionsKHR[2]; - const float32_t3 geometricNormal = hlsl::cross(vertex1 - vertex0, vertex2 - vertex0); - - const float32_t3x3 normalMatrix = math::linalg::truncate<3,3,3,4>(hlsl::transpose(spirv::WorldToObjectKHR)); - const float32_t3 worldNormal = hlsl::normalize(hlsl::mul(normalMatrix,geometricNormal)); - - payload.instanceID = instanceCustomIndex; - payload.primitiveID = primID; + const uint32_t instanceCustomIndex = spirv::InstanceCustomIndexKHR; + const uint32_t geometryIndex = spirv::RayGeometryIndexKHR; + payload.instanceID = instanceCustomIndex;// TODO: can we get geometry count in instance and "linearize" our geometry into an UUID ? + payload.primitiveID = spirv::PrimitiveId; - payload.albedo = float32_t3(1,1,1); - payload.worldNormal = worldNormal; + payload.aov.albedo = accum_t(1,1,1); + payload.aov.normal = accum_t(reconstructGeometricNormal()); } [shader("miss")] void miss(inout DebugPayload payload) { - payload.albedo = float32_t3(0,0,0); - payload.worldNormal = -spirv::WorldRayDirectionKHR; + const SEnvSample _sample = sampleEnv(spirv::WorldRayDirectionKHR); + _sample.color; + payload.aov = _sample.aov; } \ No newline at end of file diff --git a/40_PathTracer/include/renderer/CRenderer.h b/40_PathTracer/include/renderer/CRenderer.h index 719abea9e..62995369d 100644 --- a/40_PathTracer/include/renderer/CRenderer.h +++ b/40_PathTracer/include/renderer/CRenderer.h @@ -5,11 +5,12 @@ #define _NBL_THIS_EXAMPLE_C_RENDERER_H_INCLUDED_ +#include "nbl/examples/common/CCachedOwenScrambledSequence.hpp" + #include "renderer/CScene.h" #include "renderer/CSession.h" #include "renderer/shaders/pathtrace/push_constants.hlsl" -#include "nbl/examples/common/ScrambleSequence.hpp" #include "nbl/this_example/builtin/build/spirv/keys.hpp" @@ -54,16 +55,6 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl struct SCachedCreationParams { - //! Brief guideline to good path depth limits - // Want to see stuff with indirect lighting on the other side of a pane of glass - // 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light - // Want to see through a glass box, vase, or office - // 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light - // pick higher numbers for better GI and less bias - static inline constexpr uint32_t DefaultPathDepth = 8u; - // TODO: Upload only a subsection of the sample sequence to the GPU, so we can use more samples without trashing VRAM - static inline constexpr uint32_t MaxFreeviewSamples = 0x10000u; - inline operator bool() const { if (!graphicsQueue || !computeQueue || !uploadQueue) @@ -90,7 +81,7 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl struct SCreationParams : SCachedCreationParams { asset::IAssetManager* assMan; - nbl::examples::ScrambleSequence::SCreationParams sampleSequenceCreateParams; + std::string sequenceCachePath; }; static core::smart_refctd_ptr create(SCreationParams&& _params); @@ -106,6 +97,11 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl struct SCachedConstructionParams { constexpr static inline uint8_t FramesInFlight = 3; + + // TODO: Some Constant to Tell us how many dimensions each path vertex consumes + inline auto getSequenceMaxPathDepth() const {return sequenceHeader.maxDimensions/3;} + + core::smart_refctd_ptr semaphore; // per pipeline UBO for other pipelines @@ -124,8 +120,16 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl // core::smart_refctd_ptr commandBuffers[FramesInFlight]; - core::smart_refctd_ptr scrambleKey; - core::smart_refctd_ptr sampleSequence; + // + core::smart_refctd_ptr sobolSequence; + //! Brief guideline to good path depth limits + // Want to see stuff with indirect lighting on the other side of a pane of glass + // 5 = glass frontface->glass backface->diffuse surface->diffuse surface->light + // Want to see through a glass box, vase, or office + // 7 = glass frontface->glass backface->glass frontface->glass backface->diffuse surface->diffuse surface->light + // pick higher numbers for better GI and less bias + // TODO: Upload only a subsection of the sample sequence to the GPU, so we can use more samples without trashing VRAM + examples::CCachedOwenScrambledSequence::SCacheHeader sequenceHeader = {}; }; // inline const SCachedConstructionParams& getConstructionParams() const {return m_construction;} @@ -156,38 +160,13 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl protected: struct SConstructorParams : SCachedCreationParams, SCachedConstructionParams { - - // Each Atom of the sample sequence provides 3N dimensions (3 for BxDF, 3 for NEE, etc.) - // Then Atoms are ordered by sampleID, then dimension (cache will be fully trashed by tracing TLASes until next bounce) -#if 0 - // semi persistent data - struct SampleSequence - { - public: - static inline constexpr auto QuantizedDimensionsBytesize = sizeof(uint64_t); - SampleSequence() : bufferView() {} - - // one less because first path vertex uses a different sequence - static inline uint32_t computeQuantizedDimensions(uint32_t maxPathDepth) {return (maxPathDepth-1)*SAMPLING_STRATEGY_COUNT;} - nbl::core::smart_refctd_ptr createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount); - - // from cache - void createBufferView(nbl::video::IVideoDriver* driver, nbl::core::smart_refctd_ptr&& buff); - // regenerate - nbl::core::smart_refctd_ptr createBufferView(nbl::video::IVideoDriver* driver, uint32_t quantizedDimensions, uint32_t sampleCount); - - auto getBufferView() const {return bufferView;} - - private: - nbl::core::smart_refctd_ptr bufferView; - } sampleSequence; - +#if 0 // Resources used for envmap sampling nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; #endif }; inline CRenderer(SConstructorParams&& _params) : m_creation(std::move(_params)), m_construction(std::move(_params)), - m_frameIx(m_construction.semaphore->getCounterValue()), m_framesDispatched(0) {} + m_frameIx(m_construction.semaphore->getCounterValue()) {} virtual inline ~CRenderer() {} static core::smart_refctd_ptr loadPrecompiledShader_impl(asset::IAssetManager* assMan, const core::string& key, system::logger_opt_ptr logger); @@ -195,7 +174,6 @@ class CRenderer : public core::IReferenceCounted, public core::InterfaceUnmovabl SCachedCreationParams m_creation; SCachedConstructionParams m_construction; uint64_t m_frameIx; - uint32_t m_framesDispatched; }; } diff --git a/40_PathTracer/include/renderer/CSession.h b/40_PathTracer/include/renderer/CSession.h index cac699772..993c3ff19 100644 --- a/40_PathTracer/include/renderer/CSession.h +++ b/40_PathTracer/include/renderer/CSession.h @@ -15,7 +15,7 @@ namespace nbl::this_example { class CScene; -class CSession final : public core::IReferenceCounted, public core::InterfaceUnmovable +class CSession final : public core::IReferenceCounted { public: using sensor_t = CSceneLoader::SLoadResult::SSensor; @@ -39,7 +39,7 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm }; // - bool init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptr sampleSequenceBuffer, core::smart_refctd_ptr scrambleKey); + bool init(video::SIntendedSubmitInfo& info); // inline bool isInitialized() const {return bool(m_active.immutables);} @@ -78,7 +78,6 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm SImageWithViews scrambleKey = {}, sampleCount = {}, beauty = {}, rwmcCascades = {}, albedo = {}, normal = {}, motion = {}, mask = {}; // stores all the sensor data required core::smart_refctd_ptr ds = {}; - core::smart_refctd_ptr sampleSequenceBuffer; }; SImmutables immutables = {}; SSensorDynamics currentSensorState = {}, prevSensorState = {}; @@ -88,7 +87,7 @@ class CSession final : public core::IReferenceCounted, public core::InterfaceUnm inline const SActiveResources& getActiveResources() const {return m_active;} // - bool reset(const SSensorDynamics& newVal, video::IGPUCommandBuffer* cb); + bool reset(const SSensorDynamics& newVal, video::SIntendedSubmitInfo& info); // bool update(const SSensorDynamics& newVal); diff --git a/40_PathTracer/include/renderer/shaders/common.hlsl b/40_PathTracer/include/renderer/shaders/common.hlsl index 178159e62..e086f19fc 100644 --- a/40_PathTracer/include/renderer/shaders/common.hlsl +++ b/40_PathTracer/include/renderer/shaders/common.hlsl @@ -3,7 +3,77 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/accessors/loadable_image.hlsl" +#include "nbl/builtin/hlsl/concepts/accessors/storable_image.hlsl" +// +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +// +#include "nbl/builtin/hlsl/path_tracing/gaussian_filter.hlsl" +// TODO: move to type_traits? +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +template +struct texture_traits; + +template +struct texture_traits > > +{ + NBL_CONSTEXPR_STATIC_INLINE int32_t Dimension = 2; + using coded_type = vector; +}; +//special case +template NBL_PARTIAL_REQ_TOP(is_scalar_v) +struct texture_traits NBL_PARTIAL_REQ_BOT(is_scalar_v)> +{ + NBL_CONSTEXPR_STATIC_INLINE int32_t Dimension = 1; + using coded_type = vector; +}; + +template +struct texture_traits > > +{ + NBL_CONSTEXPR_STATIC_INLINE int32_t Dimension = 2; + using coded_type = vector; +}; +//special case +template NBL_PARTIAL_REQ_TOP(is_scalar_v) +struct texture_traits NBL_PARTIAL_REQ_BOT(is_scalar_v)> +{ + NBL_CONSTEXPR_STATIC_INLINE int32_t Dimension = 1; + using coded_type = vector; +}; +#endif +} +} + +// +#define DEFINE_TEXTURE_ACCESSOR(TEX_NAME) struct ImageAccessor_ ## TEX_NAME \ +{ \ + using texture_traits_t = ::nbl::hlsl::texture_traits; \ + NBL_CONSTEXPR_STATIC_INLINE uint32_t Dimension = texture_traits_t::Dimension; \ + using coded_type = typename texture_traits_t::coded_type; \ + using coded_type_traits = ::nbl::hlsl::vector_traits; \ + using scalar_type = typename coded_type_traits::scalar_type; \ + NBL_CONSTEXPR_STATIC_INLINE uint32_t Components = coded_type_traits::Dimension; \ +\ + template) value, const vector coord, const uint16_t layer) \ + { \ + value = TEX_NAME[vector(coord,layer)]; \ + } \ + template coord, const uint16_t layer, const vector value) \ + { \ + TEX_NAME[vector(coord,layer)] = value; \ + } \ +} + +// +#define MAX_PATH_DEPTH_LOG2 8 #endif // _NBL_THIS_EXAMPLE_COMMON_HLSL_INCLUDED_ diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl index 4e1dee3a9..b64ef92f0 100644 --- a/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl +++ b/40_PathTracer/include/renderer/shaders/pathtrace/push_constants.hlsl @@ -12,6 +12,11 @@ namespace nbl { namespace this_example { + +#define MAX_SPP_LOG2 15 +NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; +// need to be able to count (represent) both 0 and Max +NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSPP = (0x1u << MaxSPPLog2) - 1; struct SSensorDynamics { // assuming input will be ndc = [-1,1]^2 x {-1} @@ -19,13 +24,13 @@ struct SSensorDynamics hlsl::float32_t2x3 ndcToRay; hlsl::float32_t nearClip; hlsl::float32_t tMax; - hlsl::float32_t rcpFramesDispatched; - uint64_t pSampleSequence; - // we can adaptively sample per-pixel, but + // we can adaptively sample per-pixel, but some bounds need to be kept uint32_t minSPP : MAX_SPP_LOG2; uint32_t maxSPP : MAX_SPP_LOG2; - uint32_t unused : BOOST_PP_SUB(32,BOOST_PP_MUL(MAX_SPP_LOG2,2)); + uint32_t unused : 1; + uint32_t keepAccumulating : 1; }; +#undef MAX_SPP_LOG2 struct SPrevisPushConstants { @@ -38,9 +43,34 @@ struct SBeautyPushConstants { NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSppPerDispatchLog2 = MAX_SPP_PER_DISPATCH_LOG2; + // PushConstant16bit access feature isn't ubiquitous + struct S16BitData + { + // Luma conversion coefficients scaled by something proportional to the brightest light in the scene +#ifndef __HLSL_VERSION + hlsl:: +#endif + float16_t3 rrThroughputWeights; + // For a foveated render + uint16_t maxSppPerDispatch; + }; + + SSensorDynamics sensorDynamics; - uint32_t maxSppPerDispatch : MAX_SPP_PER_DISPATCH_LOG2; - uint32_t unused : 27; +#ifdef __HLSL_VERSION + uint32_t __16BitData[sizeof(S16BitData)/sizeof(uint32_t)]; + // + S16BitData get16BitData() + { + S16BitData retval; + // TODO: implement later + retval.rrThroughputWeights = hlsl::promote(hlsl::numeric_limits::max); // always pass RR + retval.maxSppPerDispatch = 3; + return retval; + } +#else + S16BitData __16BitData; +#endif }; #undef MAX_SPP_PER_DISPATCH_LOG2 diff --git a/40_PathTracer/include/renderer/shaders/pathtrace/rand_gen.hlsl b/40_PathTracer/include/renderer/shaders/pathtrace/rand_gen.hlsl deleted file mode 100644 index 9bef5b9ce..000000000 --- a/40_PathTracer/include/renderer/shaders/pathtrace/rand_gen.hlsl +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _NBL_THIS_EXAMPLE_PATHTRACE_RANDGEN_HLSL_INCLUDED_ -#define _NBL_THIS_EXAMPLE_PATHTRACE_RANDGEN_HLSL_INCLUDED_ - -#include "renderer/shaders/pathtrace/common.hlsl" - -#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" -#include "nbl/builtin/hlsl/random/dim_adaptor_recursive.hlsl" - -namespace nbl -{ -namespace this_example -{ - -template -struct RandomUniformND -{ - using rng_type = RNG; - using return_type = vector; - - static RandomUniformND create(uint32_t2 seed, uint64_t pSampleSequence) - { - RandomUniformND retval; - retval.rng = rng_type::construct(seed); - retval.pSampleBuffer = pSampleSequence; - return retval; - } - - // baseDimension: offset index of the sequence - // sampleIndex: iteration number of current pixel (samples per pixel) - return_type operator()(uint32_t baseDimension, uint32_t sampleIndex) - { - using sequence_type = hlsl::sampling::QuantizedSequence; - uint32_t address = hlsl::glsl::bitfieldInsert(baseDimension, sampleIndex, SSensorUniforms::MaxPathDepthLog2, SSensorUniforms::MaxSamplesLog2); - sequence_type tmpSeq = vk::RawBufferLoad(pSampleBuffer + address * sizeof(sequence_type)); - return tmpSeq.template decode(hlsl::random::DimAdaptorRecursive::__call(rng)); - } - - rng_type rng; - uint64_t pSampleBuffer; -}; - -} -} - -#endif diff --git a/40_PathTracer/include/renderer/shaders/scene.hlsl b/40_PathTracer/include/renderer/shaders/scene.hlsl index 4418d3040..df28fe667 100644 --- a/40_PathTracer/include/renderer/shaders/scene.hlsl +++ b/40_PathTracer/include/renderer/shaders/scene.hlsl @@ -12,13 +12,19 @@ struct SSceneUniforms { struct SInit { + NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; + // -// bda_t pQuantizedSequence; + uint64_t pSampleSequence; + uint16_t sequenceSamplesLog2 : 5; + uint16_t lastSequencePathDepth : 11; + static_assert(MaxPathDepthLog2<=11); + uint16_t unused1[3]; + //hlsl::float16_t envmapScale; + // TODO: later when we can save the envmap values to a buffer // because the PDF is rescaled to log2(luma)/log2(Max)*255 // and you get it out as `exp2(texValue)*factor` - hlsl::float32_t envmapPDFNormalizationFactor; - hlsl::float16_t envmapScale; - uint16_t unused; + //hlsl::float32_t envmapPDFNormalizationFactor; } init; }; diff --git a/40_PathTracer/include/renderer/shaders/session.hlsl b/40_PathTracer/include/renderer/shaders/session.hlsl index 7bce02dc7..ea6451c77 100644 --- a/40_PathTracer/include/renderer/shaders/session.hlsl +++ b/40_PathTracer/include/renderer/shaders/session.hlsl @@ -2,6 +2,7 @@ #define _NBL_THIS_EXAMPLE_SESSION_HLSL_INCLUDED_ +#include "renderer/shaders/common.hlsl" #include "renderer/shaders/resolve/rwmc.hlsl" @@ -9,36 +10,22 @@ namespace nbl { namespace this_example { -#define MAX_SPP_LOG2 15 -NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSPPLog2 = MAX_SPP_LOG2; -// need to be able to count (represent) both 0 and Max -NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSPP = (0x1u << MaxSPPLog2) - 1; struct SSensorUniforms { NBL_CONSTEXPR_STATIC_INLINE uint16_t ScrambleKeyTextureSize = 512; - -#define MAX_PATH_DEPTH_LOG2 7 NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxCascadeCountLog2 = MAX_CASCADE_COUNT_LOG2; - NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxPathDepthLog2 = MAX_PATH_DEPTH_LOG2; - NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxBufferDimensions = 3u << MAX_PATH_DEPTH_LOG2; -#define MAX_SAMPLES_LOG2 10 - NBL_CONSTEXPR_STATIC_INLINE uint16_t MaxSamplesLog2 = MAX_SAMPLES_LOG2; - NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxSamplesBuffer = 1u << MAX_SAMPLES_LOG2; hlsl::float32_t2 rcpPixelSize; hlsl::rwmc::SSplattingParameters splatting; hlsl::uint16_t2 renderSize; // bitfield - uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; - uint16_t unused0 : BOOST_PP_SUB(16,MAX_CASCADE_COUNT_LOG2); - // bitfield - uint16_t unused1 : 1; - uint16_t hideEnvironment : 1; uint16_t lastPathDepth : MAX_PATH_DEPTH_LOG2; uint16_t lastNoRussianRouletteDepth : MAX_PATH_DEPTH_LOG2; + uint16_t lastCascadeIndex : MAX_CASCADE_COUNT_LOG2; + uint16_t unused0 : 12; //BOOST_PP_SUB(15, BOOST_PP_ADD(BOOST_PP_MUL(MAX_PATH_DEPTH_LOG2, 2), MAX_CASCADE_COUNT_LOG2)); + uint16_t hideEnvironment : 1; }; -#undef MAX_PATH_DEPTH_LOG2 struct SensorDSBindings { @@ -88,27 +75,51 @@ struct SensorDSBindingCounts #ifdef __HLSL_VERSION [[vk::binding(SensorDSBindings::UBO,SessionDSIndex)]] ConstantBuffer gSensor; -// could be uint32_t2 [[vk::binding(SensorDSBindings::ScrambleKey,SessionDSIndex)]] RWTexture2DArray gScrambleKey; -// could be uint32_t or even uint16_t -[[vk::binding(SensorDSBindings::SampleCount,SessionDSIndex)]] RWTexture2DArray gSampleCount; -// could be uint32_t2 -[[vk::binding(SensorDSBindings::RWMCCascades,SessionDSIndex)]] RWTexture2DArray gRWMCCascades; -// could be uint32_t -[[vk::binding(SensorDSBindings::Beauty,SessionDSIndex)]] RWTexture2DArray gBeauty; +// could be uint16_t were it not for "Expected Sampled Type to be a 32-bit int, 64-bit int or 32-bit float scalar type for Vulkan environment" +[[vk::binding(SensorDSBindings::SampleCount,SessionDSIndex)]] RWTexture2DArray gSampleCount; +[[vk::binding(SensorDSBindings::RWMCCascades,SessionDSIndex)]] RWTexture2DArray gRWMCCascades; +[[vk::binding(SensorDSBindings::Beauty,SessionDSIndex)]] RWTexture2DArray gBeauty; [[vk::binding(SensorDSBindings::Albedo,SessionDSIndex)]] RWTexture2DArray gAlbedo; // thse two are snorm but stored as unorm, care needs to be taken to map: // [-1,1] <-> [0,1] but with 0 being exactly representable, so really [-1,1] <-> [1/1023,1] // Requires x*1022.f/2046.f+1024.f/2046.f shift/adjust for accumulation and storage // Then to decode back into [-1,1] need max(y*2046.f/1022.f-1024.f/1022.f,-1) = x [[vk::binding(SensorDSBindings::Normal,SessionDSIndex)]] RWTexture2DArray gNormal; -[[vk::binding(SensorDSBindings::Motion,SessionDSIndex)]] RWTexture2DArray gMotion; -// could be float32_t -[[vk::binding(SensorDSBindings::Mask,SessionDSIndex)]] RWTexture2DArray gMask; +// TODO: motion confidence mask +[[vk::binding(SensorDSBindings::Motion,SessionDSIndex)]] RWTexture2DArray gMotion; +[[vk::binding(SensorDSBindings::Mask,SessionDSIndex)]] RWTexture2DArray gMask; // [[vk::binding(SensorDSBindings::Samplers,SessionDSIndex)]] SamplerState gSensorSamplers[SensorDSBindingCounts::Samplers]; // [[vk::binding(SensorDSBindings::AsSampledImages,SessionDSIndex)]] Texture2DArray gSensorTextures[SensorDSBindingCounts::AsSampledImages]; + +// RWMC cascades need special treatment +struct RMWCCascadeAccumulator +{ + void accumulate(const uint16_t2 coord, const uint16_t layer, const float16_t3 data, const uint16_t newSampleCount) + { +// coded_type val; + +// if (rcpNewSampleCount<1.f) +// { +// composed.get(val,coord,layer); +// NBL_UNROLL for (uint16_t i=0; icreateSession({ @@ -426,26 +417,35 @@ class PathTracingApp final : public SimpleWindowedApplication, public BuiltinRes inline void workLoopBody() override { CSession* session; - volatile bool skip = false; // skip using the debugger - for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f || skip;) { - skip = false; - if (m_sessionQueue.empty()) + bool sameSession = true; + volatile bool skip = false; // skip using the debugger + for (session=m_resolver->getActiveSession(); !session || session->getProgress()>=1.f || skip;) { - if (!m_args.headless) - handleInputs(); - return; - } - session = m_sessionQueue.front().get(); - // init - m_utils->autoSubmit({.queue=getGraphicsQueue()},[&session, this](SIntendedSubmitInfo& info)->bool + skip = false; + if (m_sessionQueue.empty()) { - const auto& params = m_renderer->getConstructionParams(); - return session->init(info.getCommandBufferForRecording()->cmdbuf, smart_refctd_ptr(params.sampleSequence->buffer), smart_refctd_ptr(params.scrambleKey)); + if (!m_args.headless) + handleInputs(); + return; } - ); - m_resolver->changeSession(std::move(m_sessionQueue.front())); - m_sessionQueue.pop(); + session = m_sessionQueue.front().get(); + // init + m_utils->autoSubmit({.queue=getGraphicsQueue()},[&session,this](SIntendedSubmitInfo& info)->bool + { + return session->init(info); + } + ); + m_resolver->changeSession(std::move(m_sessionQueue.front())); + sameSession = false; + m_sessionQueue.pop(); + } + // TODO: camera movement and UI update + if (sameSession) + { + // no update right now + session->update(session->getActiveResources().prevSensorState); + } } m_api->startCapture(); diff --git a/40_PathTracer/src/renderer/CRenderer.cpp b/40_PathTracer/src/renderer/CRenderer.cpp index c41353f68..ef5eb5461 100644 --- a/40_PathTracer/src/renderer/CRenderer.cpp +++ b/40_PathTracer/src/renderer/CRenderer.cpp @@ -40,6 +40,19 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) { if (!_params) return nullptr; + + // get started with the sequence ASAP + auto* const assMan = _params.assMan; + auto sequenceFuture = std::async(std::launch::async,[assMan](std::string&& cachePath)->auto + { + return nbl::examples::CCachedOwenScrambledSequence::create({ + .cachePath=std::move(cachePath),.assMan=assMan,.header={ + .maxSamplesLog2=12,.maxDimensions=96 + } + }); + },_params.sequenceCachePath + ); + SConstructorParams params = {std::move(_params)}; // @@ -264,122 +277,13 @@ smart_refctd_ptr CRenderer::create(SCreationParams&& _params) return nullptr; } + // upload quantized LDS sequence buffer { - // storage buffer with sobol sequence - params.sampleSequence = examples::ScrambleSequence::create(_params.sampleSequenceCreateParams); - } - - { - // create scramble key filled with noise - asset::ICPUImage::SCreationParams info; - info.format = asset::E_FORMAT::EF_R32G32_UINT; - info.type = asset::ICPUImage::ET_2D; - info.extent.width = SSensorUniforms::ScrambleKeyTextureSize; - info.extent.height = SSensorUniforms::ScrambleKeyTextureSize; - info.extent.depth = 1u; - info.mipLevels = 1u; - info.arrayLayers = 1u; - info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - info.flags = static_cast(0u); - info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT | asset::IImage::EUF_STORAGE_BIT; - - auto scrambleMapCPU = ICPUImage::create(std::move(info)); - const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); - const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); - auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); - - core::RandomSampler rng(0xbadc0ffeu); - auto out = reinterpret_cast(texelBuffer->getPointer()); - for (auto index = 0u; index < texelBufferSize / 4; index++) { - out[index] = rng.nextSample(); - } - - auto regions = core::make_refctd_dynamic_array>(1u); - ICPUImage::SBufferCopy& region = regions->front(); - region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - region.imageSubresource.mipLevel = 0u; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - region.bufferOffset = 0u; - region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(info.extent.width, texelFormatByteSize); - region.bufferImageHeight = 0u; - region.imageOffset = { 0u, 0u, 0u }; - region.imageExtent = scrambleMapCPU->getCreationParameters().extent; - - scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); - // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) - scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); - - auto converter = CAssetConverter::create({ .device = device }); - struct SInputs final : CAssetConverter::SInputs - { - // we also need to override this to have concurrent sharing - inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override - { - if (familyIndices.size() > 1) - return familyIndices; - return {}; - } - - inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return image->getCreationParameters().mipLevels; - } - inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return 0b0u; - } - - std::vector familyIndices; - } inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = logger.get(); - { - const core::set uniqueFamilyIndices = { params.graphicsQueue->getFamilyIndex(), params.computeQueue->getFamilyIndex() }; - inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; - } - - auto cb = params.commandBuffers[0].get(); - cb->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - std::array commandBufferInfo = { cb }; - core::smart_refctd_ptr imgFillSemaphore = device->createSemaphore(0); - imgFillSemaphore->setObjectDebugName("Scramble Key Fill Semaphore"); - SIntendedSubmitInfo transfer = { - .queue = params.graphicsQueue, - .waitSemaphores = {}, - .prevCommandBuffers = {}, - .scratchCommandBuffers = commandBufferInfo, - .scratchSemaphore = { - .semaphore = imgFillSemaphore.get(), - .value = 0, - // because of layout transitions - .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS - } - }; - - { - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - CAssetConverter::SConvertParams convparams = {}; - convparams.transfer = &transfer; - convparams.utilities = params.utilities.get(); - const ICPUImage* cpuImgs[] = { scrambleMapCPU.get() }; - std::get>(inputs.assets) = cpuImgs; - // assert that we don't need to provide patches - assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); - auto reservation = converter->reserve(inputs); - - auto gpuImg = reservation.getGPUObjects().front().value; - if (!params.scrambleKey) - logger.log("Failed to convert scramble key image into an IGPUImage handle", ILogger::ELL_ERROR); - - auto result = reservation.convert(convparams); - if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { - logger.get()->log("Failed to record or submit conversions", ILogger::ELL_ERROR); - std::exit(-1); - } - - params.scrambleKey = gpuImg; - } + auto sequence = sequenceFuture.get(); + params.sequenceHeader = sequence->getHeader(); + auto* const seqBufferCPU = sequence->getBuffer(); + params.utilities->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=params.graphicsQueue},IGPUBuffer::SCreationParams{seqBufferCPU->getCreationParams()},seqBufferCPU->getPointer()).move_into(params.sobolSequence); + params.sobolSequence->setObjectDebugName("Low Discrepancy Sequence"); } return core::smart_refctd_ptr(new CRenderer(std::move(params)),core::dont_grab); @@ -528,7 +432,11 @@ core::smart_refctd_ptr CRenderer::createScene(CScene::SCreationParams&& { tmpBuffers.ubo = ICPUBuffer::create({{.size=sizeof(SSceneUniforms),.usage=BasicBufferUsages|buffer_usage_e::EUF_UNIFORM_BUFFER_BIT},nullptr}); auto& uniforms = *reinterpret_cast(tmpBuffers.ubo->getPointer()); - uniforms.init = {}; // TODO: fill with stuff + uniforms.init = {}; + uniforms.init.pSampleSequence = m_construction.sobolSequence->getDeviceAddress(); + uniforms.init.sequenceSamplesLog2 = m_construction.sequenceHeader.maxSamplesLog2; + // TODO: Some Constant to Tell us how many dimensions each path vertex consumes + uniforms.init.lastSequencePathDepth = m_construction.getSequenceMaxPathDepth(); tmpBuffers.ubo->setContentHash(tmpBuffers.ubo->computeContentHash()); } // SBT @@ -835,9 +743,6 @@ auto CRenderer::render(CSession* session) -> SSubmit const auto& sessionParams = session->getConstructionParams(); auto* const device = getDevice(); - // TODO: reset m_framesDispatched to 0 every time camera moves considerable amount - m_framesDispatched++; - if (m_frameIx>=SCachedConstructionParams::FramesInFlight) { const ISemaphore::SWaitInfo cbDonePending[] = @@ -870,7 +775,6 @@ auto CRenderer::render(CSession* session) -> SSubmit case CSession::RenderMode::Debug: { SDebugPushConstants pc = {sessionResources.currentSensorState}; - pc.sensorDynamics.rcpFramesDispatched = 1.0 / float(m_framesDispatched); success = cb->pushConstants(pipeline->getLayout(),hlsl::ShaderStage::ESS_ALL_RAY_TRACING,0,sizeof(pc),&pc); break; } diff --git a/40_PathTracer/src/renderer/CScene.cpp b/40_PathTracer/src/renderer/CScene.cpp index 08c72f877..ea7709be3 100644 --- a/40_PathTracer/src/renderer/CScene.cpp +++ b/40_PathTracer/src/renderer/CScene.cpp @@ -37,16 +37,16 @@ smart_refctd_ptr CScene::createSession(const CSession::SCreationParams // fill uniforms { - const uint16_t maxPathDepth = hlsl::clamp(mutDefaults.maxPathDepth,1,0x1u<(mutDefaults.maxPathDepth,1,m_construction.renderer->getConstructionParams().getSequenceMaxPathDepth()); const uint16_t russianRouletteDepth = hlsl::clamp(mutDefaults.russianRouletteDepth,1,maxPathDepth); params.uniforms = { .rcpPixelSize = promote(1.f)/float32_t2(renderSize), .splatting = {}, // TODO .renderSize = renderSize, - .lastCascadeIndex = static_cast(constants.cascadeCount-1), - .hideEnvironment = mutDefaults.hideEnvironment, .lastPathDepth = static_cast(maxPathDepth-1), - .lastNoRussianRouletteDepth = static_cast(russianRouletteDepth-1) + .lastNoRussianRouletteDepth = static_cast(russianRouletteDepth-1), + .lastCascadeIndex = static_cast(constants.cascadeCount-1), + .hideEnvironment = mutDefaults.hideEnvironment }; } diff --git a/40_PathTracer/src/renderer/CSession.cpp b/40_PathTracer/src/renderer/CSession.cpp index 7bc09edaf..c83c39d4f 100644 --- a/40_PathTracer/src/renderer/CSession.cpp +++ b/40_PathTracer/src/renderer/CSession.cpp @@ -12,7 +12,7 @@ using namespace nbl::hlsl; using namespace nbl::video; // -bool CSession::init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptr sampleSequenceBuffer, core::smart_refctd_ptr scrambleKey) +bool CSession::init(SIntendedSubmitInfo& info) { auto renderer = m_params.scene->getRenderer(); auto& logger = renderer->getCreationParams().logger; @@ -70,13 +70,14 @@ bool CSession::init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptrupdateBuffer({.size=sizeof(m_params.uniforms),.buffer=ubo},&m_params.uniforms); + info.getCommandBufferForRecording()->cmdbuf->updateBuffer({.size=sizeof(m_params.uniforms),.buffer=ubo},&m_params.uniforms); addWrite(SensorDSBindings::UBO,SBufferRange{.offset=0,.size=sizeof(m_params.uniforms),.buffer=ubo}); } const auto allowedFormatUsages = device->getPhysicalDevice()->getImageFormatUsagesOptimalTiling(); auto createImage = [&]( - const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, std::bitset viewFormats={}, + const std::string_view debugName, const E_FORMAT format, const uint16_t2 resolution, const uint16_t layers, + const IGPUImage::E_CREATE_FLAGS extraFlags=IGPUImage::E_CREATE_FLAGS::ECF_NONE, std::bitset viewFormats={}, const IGPUImage::E_USAGE_FLAGS extraUsages=IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT|IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT ) -> SImageWithViews { @@ -92,13 +93,9 @@ bool CSession::init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptr1) { @@ -142,38 +139,28 @@ bool CSession::init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptrgetCreationParameters(); - const auto viewFormat = params.format; - const auto thisFormatUsages = static_cast>(allowedFormatUsages[viewFormat]); - auto view = device->createImageView({ - .subUsages = immutables.scrambleKey.image->getCreationParameters().usage & thisFormatUsages, - .image = immutables.scrambleKey.image, - .viewType = IGPUImageView::E_TYPE::ET_2D_ARRAY, - .format = viewFormat - }); - string viewDebugName = "Scramble Key " + to_string(viewFormat) + " View"; - if (!view) - { - logger.log("Failed to create Sensor \"%s\"'s \"%s\" in CSession::init()", ILogger::ELL_ERROR, m_params.name.c_str(), viewDebugName.c_str()); - return {}; - } - view->setObjectDebugName(viewDebugName.c_str()); - immutables.scrambleKey.views[viewFormat] = std::move(view); + const auto layers = 1u; // for now, until the crazy Heitz 2019 thing, or if we choose to save 8 bytes in ray payload and read a premade scramble at every depth + immutables.scrambleKey = createImage("Scramble Dimension Keys",E_FORMAT::EF_R32G32_UINT,hlsl::promote(SSensorUniforms::ScrambleKeyTextureSize),layers); } + // auto scrambleKeyView = immutables.scrambleKey.views[E_FORMAT::EF_R32G32_UINT]; addImageWrite(SensorDSBindings::ScrambleKey,scrambleKeyView); - immutables.sampleSequenceBuffer = sampleSequenceBuffer; - // create the render-sized images - auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, Args&&... args)->SImageWithViews + auto createScreenSizedImage = [&](const std::string_view debugName, const E_FORMAT format, uint16_t layers=1, Args&&... args)->SImageWithViews { - return createImage(debugName,format,m_params.uniforms.renderSize,std::forward(args)...); + using create_flags_e = IGPUImage::E_CREATE_FLAGS; + create_flags_e flags = create_flags_e::ECF_NONE; + if (m_params.type==sensor_type_e::Env) + { + layers *= 6; + flags = IGPUImage::E_CREATE_FLAGS::ECF_CUBE_COMPATIBLE_BIT; + } + return createImage(debugName,format,m_params.uniforms.renderSize,layers,flags,std::forward(args)...); }; - immutables.sampleCount = createScreenSizedImage("Current Sample Count",E_FORMAT::EF_R16_UINT,1); + immutables.sampleCount = createScreenSizedImage("Current Sample Count",E_FORMAT::EF_R16_UINT); auto sampleCountView = immutables.sampleCount.views[E_FORMAT::EF_R16_UINT]; addImageWrite(SensorDSBindings::SampleCount,sampleCountView); immutables.rwmcCascades = createScreenSizedImage("RWMC Cascades",E_FORMAT::EF_R32G32_UINT,m_params.uniforms.lastCascadeIndex+1); @@ -181,17 +168,17 @@ bool CSession::init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptr().set(E_FORMAT::EF_R32_UINT)); addImageWrite(SensorDSBindings::Beauty,immutables.beauty.views[E_FORMAT::EF_R32_UINT]); - immutables.albedo = createScreenSizedImage("Albedo",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + immutables.albedo = createScreenSizedImage("Albedo",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); auto albedoView = immutables.albedo.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; addImageWrite(SensorDSBindings::Albedo,albedoView); // Normal and Albedo should have used `EF_A2B10G10R10_SNORM_PACK32` but Nvidia doesn't support - immutables.normal = createScreenSizedImage("Normal",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + immutables.normal = createScreenSizedImage("Normal",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); auto normalView = immutables.normal.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; addImageWrite(SensorDSBindings::Normal,normalView); - immutables.motion = createScreenSizedImage("Motion",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32,1); + immutables.motion = createScreenSizedImage("Motion",E_FORMAT::EF_A2B10G10R10_UNORM_PACK32); auto motionView = immutables.motion.views[E_FORMAT::EF_A2B10G10R10_UNORM_PACK32]; addImageWrite(SensorDSBindings::Motion,motionView); - immutables.mask = createScreenSizedImage("Mask",E_FORMAT::EF_R16_UNORM,1); + immutables.mask = createScreenSizedImage("Mask",E_FORMAT::EF_R16_UNORM); auto maskView = immutables.mask.views[E_FORMAT::EF_R16_UNORM]; addImageWrite(SensorDSBindings::Mask,maskView); // shorthand a little bit @@ -237,19 +224,17 @@ bool CSession::init(video::IGPUCommandBuffer* cb, core::smart_refctd_ptrgetDeviceAddress(); - return true; } -bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) +bool CSession::reset(const SSensorDynamics& newVal, video::SIntendedSubmitInfo& info) { if (!isInitialized()) return false; @@ -277,7 +262,7 @@ bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) }; before.reserve(SensorDSBindingCounts::AsSampledImages); - auto enqueueClear = [&before,beforeBase](const SImageWithViews& img)->void + auto enqueueBarrier = [&before,beforeBase](const SImageWithViews& img)->void { auto& out = before.emplace_back(beforeBase); out.image = img.image.get(); @@ -287,37 +272,68 @@ bool CSession::reset(const SSensorDynamics& newVal, IGPUCommandBuffer* cb) .layerCount = out.image->getCreationParameters().arrayLayers }; }; - enqueueClear(immutables.sampleCount); - enqueueClear(immutables.beauty); - enqueueClear(immutables.rwmcCascades); - enqueueClear(immutables.albedo); - enqueueClear(immutables.normal); - enqueueClear(immutables.motion); - enqueueClear(immutables.mask); - success = success && cb->pipelineBarrier(asset::EDF_NONE,{.imgBarriers=before}); + { + enqueueBarrier(immutables.scrambleKey); + before.back().barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; + } + enqueueBarrier(immutables.sampleCount); + enqueueBarrier(immutables.beauty); + enqueueBarrier(immutables.rwmcCascades); + enqueueBarrier(immutables.albedo); + enqueueBarrier(immutables.normal); + enqueueBarrier(immutables.motion); + enqueueBarrier(immutables.mask); + success = success && info.getCommandBufferForRecording()->cmdbuf->pipelineBarrier(asset::EDF_NONE,{.imgBarriers=before}); } + // fill scramble with noise + { + auto* const utils = m_params.scene->getRenderer()->getCreationParams().utilities.get(); + const auto& params = immutables.scrambleKey.image->getCreationParameters(); + core::vector data(params.extent.width*params.extent.height*params.arrayLayers); + { + core::RandomSampler rng(0xbadc0ffeu); + for (auto& el : data) + el = {rng.nextSample(),rng.nextSample()}; + } + const ICPUImage::SBufferCopy region = { + .bufferRowLength = params.extent.width, + .bufferImageHeight = params.extent.height, + .imageSubresource = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .mipLevel = 0u, + .baseArrayLayer = 0u, + .layerCount = params.arrayLayers + }, + .imageExtent = params.extent + }; + utils->updateImageViaStagingBuffer(info,data.data(),params.format,immutables.scrambleKey.image.get(),IGPUImage::LAYOUT::GENERAL,{®ion,1}); + } + // clear all other images { IGPUCommandBuffer::SClearColorValue color; memset(&color,0,sizeof(color)); for (const auto& entry : before) - { - success = success && cb->clearColorImage(const_cast(entry.image),IGPUImage::LAYOUT::GENERAL,&color,1,&entry.subresourceRange); - } + if (entry.image!=immutables.scrambleKey.image.get()) + success = success && info.getCommandBufferForRecording()->cmdbuf->clearColorImage(const_cast(entry.image),IGPUImage::LAYOUT::GENERAL,&color,1,&entry.subresourceRange); } const SMemoryBarrier after[] = { { - .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT, + .srcStageMask = PIPELINE_STAGE_FLAGS::CLEAR_BIT|PIPELINE_STAGE_FLAGS::COPY_BIT, .srcAccessMask = ACCESS_FLAGS::MEMORY_WRITE_BITS, .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS } }; - success = success && cb->pipelineBarrier(asset::EDF_NONE,{.memBarriers=after}); + success = success && info.getCommandBufferForRecording()->cmdbuf->pipelineBarrier(asset::EDF_NONE,{.memBarriers=after}); if (success) - m_active.prevSensorState = m_active.currentSensorState = newVal; + { + m_active.currentSensorState = newVal; + m_active.currentSensorState.keepAccumulating = false; + m_active.prevSensorState = m_active.currentSensorState; + } return success; } @@ -328,6 +344,8 @@ bool CSession::update(const SSensorDynamics& newVal) m_active.prevSensorState = m_active.currentSensorState; m_active.currentSensorState = newVal; + // TODO: reset m_framesDispatched to 0 every time camera moves considerable amount + m_active.currentSensorState.keepAccumulating = true; return true; } diff --git a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp index cca4bad40..8f73b4aec 100644 --- a/40_PathTracer/src/renderer/present/CWindowPresenter.cpp +++ b/40_PathTracer/src/renderer/present/CWindowPresenter.cpp @@ -237,7 +237,7 @@ auto CWindowPresenter::acquire_impl(const CSession* session, ISemaphore::SWaitIn winMgr->show(window); m_pushConstants.layer = 0; // TODO: cubemaps and RWMC debug - m_pushConstants.imageIndex = uint8_t(SensorDSBindings::SampledImageIndex::Albedo); + m_pushConstants.imageIndex = uint8_t(SensorDSBindings::SampledImageIndex::Normal); auto acquireResult = m_construction.surface->acquireNextImage(); *p_currentImageAcquire = {.semaphore=acquireResult.semaphore,.value=acquireResult.acquireCount}; diff --git a/CMakeLists.txt b/CMakeLists.txt index a93a86a4f..17c9c4999 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,9 +71,6 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(29_Arithmetic2Bench) # add_subdirectory(36_CUDAInterop) - # Showcase compute pathtracing - add_subdirectory(30_ComputeShaderPathTracer) - add_subdirectory(31_HLSLPathTracer) add_subdirectory(34_DebugDraw) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index b3e57da6f..b91dfc91e 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -6,7 +6,7 @@ The PCH includes Nabla.h + example common interface headers and takes around 1 GB per configuration, so sharing it avoids significant disk space waste -]] +]] nbl_create_ext_library_project(ExamplesAPI "" "${CMAKE_CURRENT_SOURCE_DIR}/src/nbl/examples/pch.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/include" "" "") diff --git a/common/include/nbl/examples/common/CCachedOwenScrambledSequence.hpp b/common/include/nbl/examples/common/CCachedOwenScrambledSequence.hpp new file mode 100644 index 000000000..52c6dfb08 --- /dev/null +++ b/common/include/nbl/examples/common/CCachedOwenScrambledSequence.hpp @@ -0,0 +1,198 @@ +// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXAMPLES_COMMON_C_CACHED_OWEN_SCRAMBLED_SEQUENCE_HPP_INCLUDED_ +#define _NBL_EXAMPLES_COMMON_C_CACHED_OWEN_SCRAMBLED_SEQUENCE_HPP_INCLUDED_ + + +#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include + + +namespace nbl::examples +{ + +// Each Atom of the Quantized Sample Sequence provides 3N dimensions (3 for BxDF, 3 for NEE, etc.) +// If we implement Heitz's Ranking and Scrambling Blue noise then each pixel gets its own scramble (texture read) - thats fine +// but it also gets a rank scramble, meaning that for the same sample ID within a progressive render, the sampleID will be scrambled. +// Since the sequence can be several MB, it would make sense to keep samples together first, then dimensions. +// Then Atoms are ordered by sampleID, then dimension (cache will be fully trashed by tracing TLASes until next bounce) +class CCachedOwenScrambledSequence final : public core::IReferenceCounted +{ + public: + // for 1024 spp renders `uint32_t` would have been enough + using sequence_type = hlsl::sampling::QuantizedSequence; + + struct SCacheHeader + { + constexpr static inline const char* Magic = "NBL_LDS_CACHE"; + constexpr static inline size_t MagicLen = std::string_view(Magic).size(); + + inline uint64_t sequenceByteSize() const + { + const uint32_t quantizedDimensions = (maxDimensions + 2u) / 3u; + return quantizedDimensions * sizeof(sequence_type) << maxSamplesLog2; + } + + uint32_t maxSamplesLog2 : 5 = 24; + uint32_t maxDimensions : 27 = 96; + }; + constexpr static inline size_t HeaderSize = SCacheHeader::MagicLen+sizeof(SCacheHeader); + + struct SCreationParams + { + constexpr static inline const char* DefaultFilename = "owen_sampler_buffer.bin"; + + inline operator bool() const {return assMan && !cachePath.empty();} + + std::string cachePath = DefaultFilename; + asset::IAssetManager* assMan = nullptr; + SCacheHeader header = {}; + }; + + static inline core::smart_refctd_ptr create(const SCreationParams& params) + { + if (!params) + return nullptr; + + using namespace nbl::core; + using namespace nbl::system; + using namespace nbl::asset; + using namespace nbl::video; + + // read cache file + SCacheHeader oldHeader = {.maxSamplesLog2=0,.maxDimensions=0}; + smart_refctd_ptr oldBuffer; + { + IAssetLoader::SAssetLoadParams loadParams = {}; + loadParams.cacheFlags = IAssetLoader::E_CACHING_FLAGS::ECF_DUPLICATE_REFERENCES; + auto bundle = params.assMan->getAsset(params.cachePath,{}); + if (const auto contents=bundle.getContents(); contents.size() && bundle.getAssetType()==IAsset::E_TYPE::ET_BUFFER) + { + oldBuffer = IAsset::castDown(*contents.begin()); + // check the magic number + if (oldBuffer->getSize()>HeaderSize && memcmp(oldBuffer->getPointer(),SCacheHeader::Magic,SCacheHeader::MagicLen)==0) + { + oldHeader = *reinterpret_cast(reinterpret_cast(oldBuffer->getPointer())+SCacheHeader::MagicLen); + if (oldBuffer->getSize()!=oldHeader.sequenceByteSize()+HeaderSize) + oldHeader = {.maxSamplesLog2=0,.maxDimensions=0}; + } + } + } + + ICPUBuffer::SCreationParams bufparams = {}; + bufparams.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + bufparams.size = params.header.sequenceByteSize(); + auto buffer = ICPUBuffer::create(std::move(bufparams)); + if (!buffer) + return nullptr; + + // keep on getting bigger and bigger + const bool oldFullyContains = oldHeader.maxSamplesLog2>=params.header.maxSamplesLog2 && oldHeader.maxDimensions>=params.header.maxDimensions; + + auto* const system = params.assMan->getSystem(); + if (!oldFullyContains) + system->deleteFile(params.cachePath); + + auto* const out = reinterpret_cast(buffer->getPointer()); + // generate missing bits of the sequence + { + std::unique_ptr> sampler; + if (!oldFullyContains) + sampler = std::make_unique>(params.header.maxDimensions,0xdeadbeefu); // TODO: put the seed in the header to check or replace + // + const sequence_type* const in = oldBuffer ? reinterpret_cast(reinterpret_cast(oldBuffer->getPointer())+HeaderSize):nullptr; + // thread this so it doesn't take forever + const auto range = std::ranges::iota_view{0u,params.header.maxDimensions}; + std::for_each(std::execution::par,range.begin(),range.end(),[out,params,&sampler,oldHeader,in](const uint32_t dim)->void + { + const uint32_t quant_dim = dim / 3u; + const uint32_t quant_comp = dim % 3; + auto* const outDimSamples = out+(quant_dim<=params.header.maxSamplesLog2) + return; + firstInvalidSample = 0x1u << oldHeader.maxSamplesLog2; + } + const auto dimSampler = sampler->prepareDimension(dim); + // generate samples that werent in the original sequence + for (uint32_t i=firstInvalidSample; (i>>params.header.maxSamplesLog2)==0; i++) + { + const auto _sample = dimSampler.sample(i); + outDimSamples[i].set(quant_comp,_sample); + const auto recovered = outDimSamples[i].get(quant_comp); + assert(recovered==_sample>>11); + } + } + ); + } +#if 0 + for (auto d=0u; d<(params.header.maxDimensions+2)/3; d++) + { + core::vector stratification[3]; // TODO: check stratification and (t,s) sequence property in base 2 + printf("Dimension Triplet %d\n",d); + for (auto s=0u; s<(0x1u<(hlsl::uint32_t3(0,0,0)); + printf("{%f,%f,%f}\n",fp.x,fp.y,fp.z); + } + } +#endif + if (!oldFullyContains) + { + IFile::success_t succ; + { + // TODO: until Arek makes an option to create directories on the way on a new file path + const auto dir = path(params.cachePath).parent_path(); + if (!system->exists(dir,IFileBase::E_CREATE_FLAGS::ECF_WRITE)) + system->createDirectory(dir); + smart_refctd_ptr file; + { + ISystem::future_t> future; + system->createFile(future,params.cachePath,IFile::ECF_WRITE); + if (auto lock=future.acquire(); lock) + lock.move_into(file); + } + if (file) + { + IFile::success_t succ2; + file->write(succ2,SCacheHeader::Magic,0,SCacheHeader::MagicLen); + if (succ2) + { + IFile::success_t succ1; + file->write(succ1,¶ms.header,SCacheHeader::MagicLen,sizeof(params.header)); + if (succ1) + file->write(succ,out,HeaderSize,buffer->getSize()); + } + } + } + if (!succ) + system->deleteFile(params.cachePath); + } + + return core::smart_refctd_ptr(new CCachedOwenScrambledSequence(std::move(buffer),params.header)); + } + + inline const asset::ICPUBuffer* getBuffer() const {return buffer.get();} + + inline const SCacheHeader& getHeader() const {return header;} + + private: + inline CCachedOwenScrambledSequence(core::smart_refctd_ptr&& _buffer, const SCacheHeader& _header) : buffer(std::move(_buffer)), header(_header) {} + + core::smart_refctd_ptr buffer; + SCacheHeader header; +}; + +} + +#endif diff --git a/common/include/nbl/examples/common/KeyedQuantizedSequence.hlsl b/common/include/nbl/examples/common/KeyedQuantizedSequence.hlsl new file mode 100644 index 000000000..5c795a023 --- /dev/null +++ b/common/include/nbl/examples/common/KeyedQuantizedSequence.hlsl @@ -0,0 +1,45 @@ +#ifndef _NBL_EXAMPLES_KEYED_QUANTIZED_SEQUENCE_HLSL_ +#define _NBL_EXAMPLES_KEYED_QUANTIZED_SEQUENCE_HLSL_ + + +#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace examples +{ + +template +struct KeyedQuantizedSequence +{ + using rng_type = RNG; // legacy + using key_rng_type = RNG; + using sequence_type = hlsl::sampling::QuantizedSequence; + using return_type = vector; + + // baseDimension: offset index of the sequence + // sampleIndex: iteration number of current pixel (samples per pixel) + return_type operator()(uint32_t baseDimension, const uint32_t sampleIndex) + { + const uint32_t address = sampleIndex|(baseDimension<(pSampleBuffer + address * sizeof(sequence_type)); + sequence_type scramble; + scramble.data[0] = rng(); + scramble.data[1] = rng(); + return tmpSeq.template decode(scramble); + } + + // could be vk::BufferPointer but no arithmetic + uint64_t pSampleBuffer; + key_rng_type rng; + uint16_t sequenceSamplesLog2; +}; + +} +} +} +#endif diff --git a/common/include/nbl/examples/common/ScrambleSequence.hpp b/common/include/nbl/examples/common/ScrambleSequence.hpp deleted file mode 100644 index a62fb18c9..000000000 --- a/common/include/nbl/examples/common/ScrambleSequence.hpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2023-2026 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_EXAMPLES_COMMON_SCRAMBLE_SEQUENCE_HPP_INCLUDED_ -#define _NBL_EXAMPLES_COMMON_SCRAMBLE_SEQUENCE_HPP_INCLUDED_ - -#include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" -#include - -namespace nbl::examples -{ - -class ScrambleSequence : public core::IReferenceCounted -{ -public: - struct SCreationParams - { - video::CThreadSafeQueueAdapter* queue = nullptr; - core::smart_refctd_ptr utilities = nullptr; - core::smart_refctd_ptr system = nullptr; - system::path localOutputCWD; - system::path sharedOutputCWD; - std::string owenSamplerCachePath = ""; - - uint32_t MaxBufferDimensions; - uint32_t MaxSamplesBuffer; - }; - - static core::smart_refctd_ptr create(const SCreationParams& params) - { - auto createBufferFromCacheFile = [&]( - system::path filename, - size_t bufferSize, - void* data, - core::smart_refctd_ptr& buffer - ) -> std::pair, bool> - { - system::ISystem::future_t> owenSamplerFileFuture; - system::ISystem::future_t owenSamplerFileReadFuture; - size_t owenSamplerFileBytesRead; - - params.system->createFile(owenSamplerFileFuture, params.localOutputCWD / filename, system::IFile::ECF_READ); - core::smart_refctd_ptr owenSamplerFile; - - if (owenSamplerFileFuture.wait()) - { - owenSamplerFileFuture.acquire().move_into(owenSamplerFile); - if (!owenSamplerFile) - return { nullptr, false }; - - owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); - if (owenSamplerFileReadFuture.wait()) - { - owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); - - if (owenSamplerFileBytesRead < bufferSize) - { - buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); - return { owenSamplerFile, false }; - } - - buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); - } - } - - return { owenSamplerFile, true }; - }; - auto writeBufferIntoCacheFile = [&](core::smart_refctd_ptr file, size_t bufferSize, void* data) - { - system::ISystem::future_t owenSamplerFileWriteFuture; - size_t owenSamplerFileBytesWritten; - - file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); - if (owenSamplerFileWriteFuture.wait()) - owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); - }; - - const uint32_t quantizedDimensions = params.MaxBufferDimensions / 3u; - const size_t bufferSize = quantizedDimensions * params.MaxSamplesBuffer; - using sequence_type = hlsl::sampling::QuantizedSequence; - std::vector data(bufferSize); - core::smart_refctd_ptr sampleSeq; - - auto cacheBufferResult = createBufferFromCacheFile(params.sharedOutputCWD / params.owenSamplerCachePath, bufferSize, data.data(), sampleSeq); - if (!cacheBufferResult.second) - { - core::OwenSampler sampler(params.MaxBufferDimensions, 0xdeadbeefu); - - asset::ICPUBuffer::SCreationParams bufparams = {}; - bufparams.size = quantizedDimensions * params.MaxSamplesBuffer * sizeof(sequence_type); - sampleSeq = asset::ICPUBuffer::create(std::move(bufparams)); - - auto out = reinterpret_cast(sampleSeq->getPointer()); - for (auto dim = 0u; dim < params.MaxBufferDimensions; dim++) - for (uint32_t i = 0; i < params.MaxSamplesBuffer; i++) - { - const uint32_t quant_dim = dim / 3u; - const uint32_t offset = dim % 3u; - auto& seq = out[i * quantizedDimensions + quant_dim]; - const uint32_t sample = sampler.sample(dim, i); - seq.set(offset, sample); - } - if (cacheBufferResult.first) - writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); - } - - video::IGPUBuffer::SCreationParams bufparams = {}; - bufparams.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - bufparams.size = bufferSize; - - core::smart_refctd_ptr buffer; - params.utilities->createFilledDeviceLocalBufferOnDedMem( - video::SIntendedSubmitInfo{ .queue = params.queue }, - std::move(bufparams), - sampleSeq->getPointer() - ).move_into(buffer); - - buffer->setObjectDebugName("Sequence buffer"); - - return core::smart_refctd_ptr(new ScrambleSequence(std::move(buffer))); - } - - ScrambleSequence(core::smart_refctd_ptr&& buffer) : buffer(std::move(buffer)) {} - - core::smart_refctd_ptr buffer; -}; - -} - -#endif