template <typename T3> uint3 dim3(T3 tex)
{
    uint3 ret;
    tex.GetDimensions(ret.x, ret.y, ret.z);
    return ret;
}

// Code adopted from https://www.shadertoy.com/view/4sfyzM CC BY-NC-SA 3.0 by won3d
// An approximate implementation using bilinear samples and using the 8 (out of 9) most significant samples.
// We could also use a more conventional Catmull-Rom Resampling kernel.
template <typename T>
T DodgsonQuadraticFast(
    in Texture2DArray<T> tex,
    in SamplerState linearSampler,
    in float3 uv,
    in float2 texSize,
    in float R
)
{
    float2 texUV = uv.xy * texSize;
    int2 itexel = int2(texUV);
    float2 texelCenter = float2(itexel) + 0.5;
    float2 ftexel = texUV - texelCenter;

    float2 texelSign;
    texelSign.x = ftexel.x < 0.0 ? -1.0 : 1.0;
    texelSign.y = ftexel.y < 0.0 ? -1.0 : 1.0;

    float2 w0 = 0.5 * (R + 1.0) - 2.0 * R * (ftexel - texelSign) * (ftexel - texelSign);
    float2 w1 = R * ftexel * ftexel - (2.0 * R + 0.5) * abs(x) + 0.75 * (R + 1.0);
    float2 w2 = float2(1, 1) - w0 - w1;

    float2 texScale = float2(1, 1) / texSize;
    texelCenter *= texScale;
    texelSign *= texScale;

    float2 nudge = w0 / (w0 + w1);
    float2 texelNudge = texelCenter + texelSign * nudge;

    float2 texelFar = texelCenter - texelSign;

    return w2.x * tex.SampleLevel(linearSampler, float3(texelFar.x, texelNudge.y, uv.z), 0)
        + w2.y * tex.SampleLevel(linearSampler, float3(texelNudge.x, texelFar.y, uv.z), 0)
        + (1.0 - w2.x - w2.y) * tex.SampleLevel(linearSampler, float3(texelNudge, uv.z), 0);
}

struct ReprojectionInfo
{
    Camera cam[2];
};

cbuffer g_cams { ReprojectionInfo g_cams; };
Texture2DArray<float4> g_in;
SamplerState g_inSampler;
Texture2DArray<float> g_depth;
SamplerState g_depthSampler;
RWTexture2DArray<float4> g_out;

static const int kernel_size = 9;

#ifdef PRECOMPUTED_VARIANCE
static const bool precomputed_variance = true;
Texture2DArray<float4> g_variance;
[[vk::constant_id(1)]] const float sharpness = 0.0f;
static const float kernel[kernel_size] = {
    1.0f - sharpness, 1.0f - sharpness, 1.0f - sharpness,
    1.0f - sharpness, 1.0f,             1.0f - sharpness,
    1.0f - sharpness, 1.0f - sharpness, 1.0f - sharpness
};
#else
static const bool precomputed_variance = false;
static const float sharpness = 0.0f;
static const float kernel[kernel_size] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 };
#endif

static const float sampling_R = precomputed_variance ? 0.5 + 0.5 * sharpness : dodgsonR;

[numthreads(8, 8, 1)]
void main(uint3 gId : SV_DispatchThreadID)
{
    if (any(gId >= dim3(g_out)))
        return;

    const float hypdepth = g_depth[gId];
    float3 screenuv = float3((float2(gId.xy) + 0.5) / float2(dim3(g_out).xy), hypdepth);
    float3 hitpoint = g_cams.dst_cam[gId.z].screenToWorld(screenuv);
    float3 ruv3 = float3(g_cams.cam[gId.z].worldtoScreen(hitpoint).xy, 1 - gId.z);
    float predictedRDepth = screenuv.z;
    bool outside = any(float3(0, 0, 0) > ruv3 | ruv3 > float3(1, 1, 1));
    if (outside)
    {
        g_out[gId] = float4(g_in[gId].rgb, 1);
        return;
    }

    // fetch this eye's and other eye's radiance values
    float4 l1 = g_in[gId];
    float rhypdepth = g_depth.SampleLevel(g_depthSampler, ruv3, 0);
    float4 r1 = max(0.0, DodgsonQuadraticFast(g_in, g_inSampler, ruv3, dim3(g_in).xy, sampling_R));
    
    float4 r2 = 0.0f;
    float4 l2 = 0.0f;
#ifdef PRECOMPUTED_VARIANCE
    // if variance is precomputed, we still filter the current eye's variance, for the other eye we rely on the resampling blur.
    l2 = g_variance[gId];
    r2 = max(0.0, DodgsonQuadraticFast(g_variance, g_inSampler, ruv3, dim3(g_variance).xy, sampling_R));
#endif

    static const int2 offsets[kernel_size] = {
        { -1, -1 }, { 0, -1 }, { 1, -1 },
        { -1,  0 }, { 0,  0 }, { 1,  0 },
        { -1,  1 }, { 0,  1 }, { 1,  1 },
    };

    // collect image statistics for this pixel neighbourhood
    float3 ll1 = l1.rgb; // 1st moment radiance
    float3 ll2 = precomputed_variance ? l2.rgb : sqr(l1.rgb); // 2nd moment radiance
    float3 error = 0.0;  // square difference between left and right eye
    float w = 1.0f;      // normalization weight
    for (uint i = 0; i < kernel_size; i++)
    {
        int3 p = gId + int3(offsets[i], 0);
        if (all(offsets[i].xy == 0) || any(p < int3(0, 0, 0)) || any(p >= dim3(g_in)) || kernel[i] == 0.0f)
            continue;
        float3 c = g_in[p].rgb;
        ll1 += kernel[i] * c;
#ifdef PRECOMPUTED_VARIANCE
        ll2 += kernel[i] * g_variance[p].rgb;
#else
        ll2 += kernel[i] * sqr(c);
        error += kernel[i] * sqr(r1.rgb - c);
#endif
        w += kernel[i];
    }
    ll1 /= w;
    ll2 /= w;
    error /= w;

    // finally, compute the blending parameter beta which determines how much of the other eye is blended in
    float beta_ = 0.0f;
    if (precomputed_variance)
    {
        float varX = dot(ll2.rgb - sqr(ll1.rgb), 1);
        float sqr_diff = dot(ll2.rgb - 2 * ll1.rgb * r1.rgb + r2.rgb, 1);
        beta_ = clamp((varX + 5e-7) / (sqr_diff + 1e-6), 0, 1.0);
    }
    else
    {
        beta_ = clamp((dot(ll2 - sqr(ll1), 1) + 5e-7) / (dot(error, 1) + 1e-6), 0, 1.0);
    }

    // mask out regions where the depth value does not match, those are reprojection errors and any blending in those regions will look wrong
    const float depth_beta = abs(rhypdepth - predictedRDepth) > 0.0000100 ? 0.0 : 1.0;
    float beta = clamp(beta_, 0.0f, depth_beta);

    g_out[gId] = float4(lerp(l1.rgb, r1.rgb, beta), 1);
}
