[[vk::constant_id(0)]] const int highlight_cache_misses = 0;
[[vk::constant_id(1)]] const int first_hit_diffuse_only = 0;
[[vk::constant_id(2)]] const int mask_mode = 0;
[[vk::constant_id(3)]] const int hybrid_cache = 0;
[[vk::constant_id(4)]] const int sc_has_radiance = 1;
[[vk::constant_id(5)]] const int adjust_hash_lod_by_normal = 0;
[[vk::constant_id(6)]] const int cache_glossy = 0;
[[vk::constant_id(7)]] const int shading_mode = 0;

#include "core.hlsl"
#include "scene.h"
#include "radiance_caching.h"

cbuffer g_lubo { radiance_caching_ubo_t g_lubo; };
RWTexture2DArray<uint> g_cache_mask;
RWTexture2DArray<float4> g_cache_fh;
RWTexture2DArray<float4> g_cache_diffuse;
HASH_CACHE_SHADER_TEXTURE_TYPE<HASH_MASK_SHADER_TYPE> g_hash_mask;
HASH_CACHE_SHADER_TEXTURE_TYPE<float4> g_hash_cache;
RWTexture1D<uint> g_shading_samples_size;
RWStructuredBuffer<uint> g_shading_samples;

#include "bindings.hlsl"
#include "gltf.hlsl"
#include "scene.hlsl"
#include "pathtracer.hlsl"
#include "common.hlsl"
#include "radiance_caching.hlsl"
#include "sample_mask.hlsl"
#include "sample_cache.hlsl"
#include "hash_cache.hlsl"

#ifdef RGEN
// clang-format off
[shader("raygeneration")]
void main()
{
    // clang-format on

    uint tid = DispatchRaysIndex().x;
    const uint num_threads = DispatchRaysDimensions().x;
    dbg = (tid == (num_threads / 2)); // enable printouts for debug pixel
    seed = tea(g_lubo.sample_idx * num_threads + tid.x, g_ubo.generation);

    if (tid > g_shading_samples_size[0])
        return;

    const uint3 cache_res = g_lubo.screen_cache_res;
    const uint3 cache_idx = unpack_cache_idx(g_shading_samples[tid]);
    uint eye = cache_idx.z / g_lubo.screen_space_num_layers;
    uint target_sample = cache_idx.z % g_lubo.screen_space_num_layers;
    uint3 pixel = uint3(cache_idx.xy, eye);
    uint3 img_res = g_lubo.display_res;
    
    const CameraState cam = g_lubo.cam[pixel.z];
    first_hit_t fh = decode_first_hit(g_cache_fh[cache_idx]);
    uint instance_id = fh.instance_id;
    uint prim_id = fh.prim_id;
    const uint mat_id = fh.mat_id;
    const float2 bary = fh.bary;
    const LocalGeometry lg = get_local_geometry(instance_id, prim_id, bary);

    if (instance_id == UINT_MAX)
        return;

    float4 ray_cone = compute_ray_cone(img_res, lg, cam);
    const float3 out_dir = ray_cone.xyz;
    const float cone_width = ray_cone.w;
    const GltfShadeParams sp = get_shading_params(mat_id, out_dir, cone_width, lg);

    // lookup in hashcache for radiance values :)
    cache_query_result_t qresult = init_cache_query_result();
    if (hybrid_cache)
    {
        // cache lookup
        float lod = log2(cone_width);
        if (adjust_hash_lod_by_normal)
            lod -= log2(abs(dot(out_dir, lg.surface_normal)));

        // we filter by lod to smooth out LoD transisions, so we need to choose between two LoDs
        if (rnd(seed) < (lod - floor(lod)))
            lod += 1;

        cache_query_t query;
        query.sample_idx = g_lubo.sample_idx;
        query.hash_map_run_length = g_lubo.hash_map_run_length;
        query.hash_map_cell_lifetime = g_lubo.hash_map_cell_lifetime;
        query.hash_map_block_exp = g_lubo.hash_block_size_exp;
        query.use_dir = false;
        query.use_normal = true;
        query.instance_id = instance_id;
        query.local_pos = lg.local_pos;
        query.lod = lod + g_lubo.spatial_lod_bias;
        query.dir = out_dir;
        query.roughness = sp.roughness;
        query.normal = dot(-out_dir, sp.normal) > 0 ? -sp.normal : sp.normal;
        qresult = hash_cache_lookup(g_hash_mask, g_hash_cache, true, query);
    }

    float3 albedo = sp.base_color.rgb;

    float4 new_diffuse = float4(0, 0, 0, 1);
    float4 new_glossy = float4(0, 0, 0, 1);

    // not the same as !cache_miss as cache might be hit but updated by other thread!
    bool update_entry = hybrid_cache ? qresult.cache_idx.x != UINT_MAX : true;
    if (update_entry)
    {
        float2x4 direct_light = compute_direct_light(g_ubo.max_bounces, g_ubo.env, lg, out_dir, sp, cache_glossy, false);
        new_diffuse.rgb += direct_light[0].rgb / max(albedo, albedo_eps);
        new_glossy.rgb += direct_light[1].rgb / max(albedo, albedo_eps);

        const float cone_diff = cam.getConeDiff(img_res.xy);
        float2x4 indirect_light = compute_indirect_light(g_ubo.max_bounces, g_ubo.env, lg, out_dir, cone_diff, cone_width, sp, cache_glossy);
        new_diffuse.rgb += indirect_light[0].rgb / max(albedo, albedo_eps);
        new_glossy.rgb += indirect_light[1].rgb / max(albedo, albedo_eps);
    }

    new_diffuse.rgb = clip_fireflies(new_diffuse.rgb);
    new_glossy.rgb = clip_fireflies(new_glossy.rgb);

    
    if (cache_glossy && !hybrid_cache)
        new_diffuse.rgb += new_glossy.rgb;

    if (update_entry)
    {
        g_cache_diffuse[cache_idx] = temporal_filter(g_cache_diffuse[cache_idx], new_diffuse, g_lubo.max_sample_history);
        if (hybrid_cache)
        {
            float3 old_out_dir = normalize(get_old_local_geometry(instance_id, prim_id, bary).pos - g_lubo.old_cam[pixel.z].makeRayOrigin());
            update_hash_cache_entry(
                pixel.z,
                g_lubo.max_sample_history,
                g_hash_cache,
                qresult,
                sp.roughness,
                new_diffuse, // * float4(albedo, 1.0f),
                new_glossy,
                out_dir,
                old_out_dir
            );
        }
    }
}
#endif // RGEN
