[[vk::constant_id(0)]] const int highlight_cache_misses = 0;
[[vk::constant_id(1)]] const int first_hit_diffuse_only = 0;
[[vk::constant_id(2)]] const int mask_mode = 0;
[[vk::constant_id(3)]] const int hybrid_cache = 0;
[[vk::constant_id(4)]] const int sc_has_radiance = 1;
[[vk::constant_id(5)]] const int adjust_hash_lod_by_normal = 0;
[[vk::constant_id(6)]] const int cache_glossy = 0;
[[vk::constant_id(7)]] const int shading_mode = 0;

#include "core.hlsl"
#include "scene.h"
#include "radiance_caching.h"

cbuffer g_lubo { radiance_caching_ubo_t g_lubo; };
RWTexture2DArray<uint> g_cache_mask;
RWTexture2DArray<float4> g_cache_fh;
RWTexture2DArray<float4> g_cache_diffuse;
HASH_CACHE_SHADER_TEXTURE_TYPE<HASH_MASK_SHADER_TYPE> g_hash_mask;
HASH_CACHE_SHADER_TEXTURE_TYPE<float4> g_hash_cache;
RWStructuredBuffer<shading_statistics_t> g_shading_statistics;
RWTexture2DArray<float4> g_dbg;

#include "bindings.hlsl"
#include "gltf.hlsl"
#include "scene.hlsl"
#include "pathtracer.hlsl"
#include "common.hlsl"
#include "radiance_caching.hlsl"
#include "sample_mask.hlsl"
#include "sample_cache.hlsl"
#include "hash_cache.hlsl"

#ifdef RGEN
// clang-format off
[shader("raygeneration")]
void main()
{
    // clang-format on
    
    uint3 LaunchID = DispatchRaysIndex();
    const uint3 LaunchSize = DispatchRaysDimensions();
    LaunchID.z = g_lubo.sample_idx % LaunchSize.z ? (1 - LaunchID.z) : LaunchID.z;
    seed = tea(
        g_lubo.sample_idx * (LaunchSize.x * LaunchSize.y * LaunchSize.z) + LaunchID.z * (LaunchSize.x * LaunchSize.y)
            + LaunchID.y * LaunchSize.x + LaunchID.x,
        g_ubo.generation
    );

    const uint frame_idx = g_lubo.sample_idx;
    const float target_density = g_lubo.screen_space_target_density;
    const float shading_rate = g_lubo.screen_space_shading_rate;
    const uint3 display_res = g_lubo.display_res;
    const uint3 cache_res = g_lubo.screen_cache_res;
    const uint num_slots = num_sample_slots();
    const CameraState cam = g_lubo.cam[LaunchID.z];

    const uint mask_val = g_cache_mask[LaunchID];

    uint shade_mask = 0u;
    if (shading_mode == SINGLE_SHADING)
        shade_mask = (1u << choose_shading_sample(mask_val, frame_idx, num_slots));
    else if (shading_mode == MULTI_SHADING)
        shade_mask = calculate_masks(mask_val, num_slots).visible_samples_mask;
    else
        shade_mask = reshade_decision(
            LaunchID,
            frame_idx,
            display_res,
            cache_res,
            mask_val,
            num_slots,
            target_density,
            shading_rate,
            g_cache_fh,
            g_cache_diffuse,
            g_shading_statistics,
            g_dbg
        );

    uint num_shading_samples = countbits(shade_mask);
    for (uint sample_id = 0; sample_id < num_shading_samples; sample_id++)
    {
        uint target_sample = firstbitlow(shade_mask);
        shade_mask &= ~(1u << target_sample);

        const uint3 cache_idx = decode_cache_idx(LaunchID, target_sample);
        const first_hit_t fh = decode_first_hit(g_cache_fh[cache_idx]);
        uint instance_id = fh.instance_id;
        uint prim_id = fh.prim_id;
        const uint mat_id = fh.mat_id;
        const float2 bary = fh.bary;
        const LocalGeometry lg = get_local_geometry(instance_id, prim_id, bary);

        if (instance_id == UINT_MAX)
            return;

        float4 ray_cone = compute_ray_cone(display_res, lg, cam);
        const float3 out_dir = ray_cone.xyz;
        const float cone_width = ray_cone.w;
        const GltfShadeParams sp = get_shading_params(mat_id, out_dir, cone_width, lg);

        // lookup in hashcache for radiance values :)
        cache_query_result_t qresult = init_cache_query_result();
        if (hybrid_cache)
        {
            // cache lookup
            float lod = log2(cone_width);
            if (adjust_hash_lod_by_normal)
                lod -= log2(abs(dot(out_dir, lg.surface_normal)));

            // we filter by lod to smooth out LoD transisions, so we need to choose between two LoDs
            if (rnd(seed) < (lod - floor(lod)))
                lod += 1;

            cache_query_t query;
            query.sample_idx = g_lubo.sample_idx;
            query.hash_map_run_length = g_lubo.hash_map_run_length;
            query.hash_map_cell_lifetime = g_lubo.hash_map_cell_lifetime;
            query.hash_map_block_exp = g_lubo.hash_block_size_exp;
            query.use_dir = false;
            query.use_normal = true;
            query.instance_id = instance_id;
            query.local_pos = lg.local_pos;
            query.lod = lod + g_lubo.spatial_lod_bias;
            query.dir = out_dir;
            query.roughness = sp.roughness;
            query.normal = dot(-out_dir, sp.normal) > 0 ? -sp.normal : sp.normal;
            qresult = hash_cache_lookup(g_hash_mask, g_hash_cache, true, query);
        }

        float3 albedo = sp.base_color.rgb;

        float4 new_diffuse = float4(0, 0, 0, 1);
        float4 new_glossy = float4(0, 0, 0, 1);

        // not the same as !cache_miss as cache might be hit but updated by other thread!
        bool update_entry = hybrid_cache ? qresult.cache_idx.x != UINT_MAX : true;
        if (update_entry)
        {
            float2x4 direct_light = compute_direct_light(g_ubo.max_bounces, g_ubo.env, lg, out_dir, sp, cache_glossy, false);
            new_diffuse.rgb += direct_light[0].rgb / max(albedo, albedo_eps);
            new_glossy.rgb += direct_light[1].rgb / max(albedo, albedo_eps);

            const float cone_diff = cam.getConeDiff(display_res.xy);
            float2x4 indirect_light = compute_indirect_light(g_ubo.max_bounces, g_ubo.env, lg, out_dir, cone_diff, cone_width, sp, cache_glossy);
            new_diffuse.rgb += indirect_light[0].rgb / max(albedo, albedo_eps);
            new_glossy.rgb += indirect_light[1].rgb / max(albedo, albedo_eps);
        }

        new_diffuse.rgb = clip_fireflies(new_diffuse.rgb);
        new_glossy.rgb = clip_fireflies(new_glossy.rgb);

        if (cache_glossy && !hybrid_cache)
            new_diffuse.rgb += new_glossy.rgb;

        if (update_entry)
        {
            float max_history = g_lubo.max_sample_history;
            if (sp.has_glossy() || sp.has_specular())
                max_history = lerp(1, g_lubo.max_sample_history, sp.roughness);
            g_cache_diffuse[cache_idx] = temporal_filter(g_cache_diffuse[cache_idx], new_diffuse, max_history);
            if (hybrid_cache)
            {
                float3 old_out_dir = normalize(get_old_local_geometry(instance_id, prim_id, bary).pos - g_lubo.old_cam[LaunchID.z].makeRayOrigin());
                update_hash_cache_entry(
                    LaunchID.z,
                    g_lubo.max_sample_history,
                    g_hash_cache,
                    qresult,
                    sp.roughness,
                    new_diffuse,
                    new_glossy,
                    out_dir,
                    old_out_dir
                );
            }
        }
    }
}
#endif // RGEN
