// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. // // NVIDIA CORPORATION and its licensors retain all intellectual property // and proprietary rights in and to this software, related documentation // and any modifications thereto. Any use, reproduction, disclosure or // distribution of this software and related documentation without an express // license agreement from NVIDIA CORPORATION is strictly prohibited. #include "common.h" #include "texture.h" //------------------------------------------------------------------------ // Memory access and math helpers. static __device__ __forceinline__ void accum_from_mem(float* a, int s, float b, float c) { a[0] += b * c; } static __device__ __forceinline__ void accum_from_mem(float* a, int s, float2 b, float c) { a[0] += b.x * c; a[s] += b.y * c; } static __device__ __forceinline__ void accum_from_mem(float* a, int s, float4 b, float c) { a[0] += b.x * c; a[s] += b.y * c; a[2*s] += b.z * c; a[3*s] += b.w * c; } static __device__ __forceinline__ void accum_to_mem(float& a, float* b, int s) { a += b[0]; } static __device__ __forceinline__ void accum_to_mem(float2& a, float* b, int s) { float2 v = a; v.x += b[0]; v.y += b[s]; a = v; } static __device__ __forceinline__ void accum_to_mem(float4& a, float* b, int s) { float4 v = a; v.x += b[0]; v.y += b[s]; v.z += b[2*s]; v.w += b[3*s]; a = v; } static __device__ __forceinline__ bool isfinite_vec3(const float3& a) { return isfinite(a.x) && isfinite(a.y) && isfinite(a.z); } static __device__ __forceinline__ bool isfinite_vec4(const float4& a) { return isfinite(a.x) && isfinite(a.y) && isfinite(a.z) && isfinite(a.w); } template static __device__ __forceinline__ T lerp (const T& a, const T& b, float c) { return a + c * (b - a); } template static __device__ __forceinline__ T bilerp(const T& a, const T& b, const T& c, const T& d, const float2& e) { return lerp(lerp(a, b, e.x), lerp(c, d, e.x), e.y); } //------------------------------------------------------------------------ // Cube map wrapping for smooth filtering across edges and corners. At corners, // one of the texture coordinates will be negative. For correct interpolation, // the missing texel must take the average color of the other three. static __constant__ uint32_t c_cubeWrapMask1[48] = { 0x1530a440, 0x1133a550, 0x6103a110, 0x1515aa44, 0x6161aa11, 0x40154a04, 0x44115a05, 0x04611a01, 0x2630a440, 0x2233a550, 0x5203a110, 0x2626aa44, 0x5252aa11, 0x40264a04, 0x44225a05, 0x04521a01, 0x32608064, 0x3366a055, 0x13062091, 0x32328866, 0x13132299, 0x50320846, 0x55330a55, 0x05130219, 0x42508064, 0x4455a055, 0x14052091, 0x42428866, 0x14142299, 0x60420846, 0x66440a55, 0x06140219, 0x5230a044, 0x5533a055, 0x1503a011, 0x5252aa44, 0x1515aa11, 0x40520a44, 0x44550a55, 0x04150a11, 0x6130a044, 0x6633a055, 0x2603a011, 0x6161aa44, 0x2626aa11, 0x40610a44, 0x44660a55, 0x04260a11, }; static __constant__ uint8_t c_cubeWrapMask2[48] = { 0x26, 0x33, 0x11, 0x05, 0x00, 0x09, 0x0c, 0x04, 0x04, 0x00, 0x00, 0x05, 0x00, 0x81, 0xc0, 0x40, 0x02, 0x03, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x02, 0x64, 0x30, 0x90, 0x55, 0xa0, 0x99, 0xcc, 0x64, 0x24, 0x30, 0x10, 0x05, 0x00, 0x01, 0x00, 0x00, 0x06, 0x03, 0x01, 0x05, 0x00, 0x89, 0xcc, 0x44, }; static __device__ __forceinline__ int4 wrapCubeMap(int face, int ix0, int ix1, int iy0, int iy1, int w) { // Calculate case number. int cx = (ix0 < 0) ? 0 : (ix1 >= w) ? 2 : 1; int cy = (iy0 < 0) ? 0 : (iy1 >= w) ? 6 : 3; int c = cx + cy; if (c >= 5) c--; c = (face << 3) + c; // Compute coordinates and faces. unsigned int m = c_cubeWrapMask1[c]; int x0 = (m >> 0) & 3; x0 = (x0 == 0) ? 0 : (x0 == 1) ? ix0 : iy0; int x1 = (m >> 2) & 3; x1 = (x1 == 0) ? 0 : (x1 == 1) ? ix1 : iy0; int x2 = (m >> 4) & 3; x2 = (x2 == 0) ? 0 : (x2 == 1) ? ix0 : iy1; int x3 = (m >> 6) & 3; x3 = (x3 == 0) ? 0 : (x3 == 1) ? ix1 : iy1; int y0 = (m >> 8) & 3; y0 = (y0 == 0) ? 0 : (y0 == 1) ? ix0 : iy0; int y1 = (m >> 10) & 3; y1 = (y1 == 0) ? 0 : (y1 == 1) ? ix1 : iy0; int y2 = (m >> 12) & 3; y2 = (y2 == 0) ? 0 : (y2 == 1) ? ix0 : iy1; int y3 = (m >> 14) & 3; y3 = (y3 == 0) ? 0 : (y3 == 1) ? ix1 : iy1; int f0 = ((m >> 16) & 15) - 1; int f1 = ((m >> 20) & 15) - 1; int f2 = ((m >> 24) & 15) - 1; int f3 = ((m >> 28) ) - 1; // Flips. unsigned int f = c_cubeWrapMask2[c]; int w1 = w - 1; if (f & 0x01) x0 = w1 - x0; if (f & 0x02) x1 = w1 - x1; if (f & 0x04) x2 = w1 - x2; if (f & 0x08) x3 = w1 - x3; if (f & 0x10) y0 = w1 - y0; if (f & 0x20) y1 = w1 - y1; if (f & 0x40) y2 = w1 - y2; if (f & 0x80) y3 = w1 - y3; // Done. int4 tcOut; tcOut.x = x0 + (y0 + f0 * w) * w; tcOut.y = x1 + (y1 + f1 * w) * w; tcOut.z = x2 + (y2 + f2 * w) * w; tcOut.w = x3 + (y3 + f3 * w) * w; return tcOut; } //------------------------------------------------------------------------ // Cube map indexing and gradient functions. // Map a 3D lookup vector into an (s,t) face coordinates (returned in first . // two parameters) and face index. static __device__ __forceinline__ int indexCubeMap(float& x, float& y, float z) { float ax = fabsf(x); float ay = fabsf(y); float az = fabsf(z); int idx; float c; if (az > fmaxf(ax, ay)) { idx = 4; c = z; } else if (ay > ax) { idx = 2; c = y; y = z; } else { idx = 0; c = x; x = z; } if (c < 0.f) idx += 1; float m = __frcp_rz(fabsf(c)) * .5; float m0 = __uint_as_float(__float_as_uint(m) ^ ((0x21u >> idx) << 31)); float m1 = (idx != 2) ? -m : m; x = x * m0 + .5; y = y * m1 + .5; if (!isfinite(x) || !isfinite(y)) return -1; // Invalid uv. x = fminf(fmaxf(x, 0.f), 1.f); y = fminf(fmaxf(y, 0.f), 1.f); return idx; } // Based on dA/d{s,t}, compute dA/d{x,y,z} at a given 3D lookup vector. static __device__ __forceinline__ float3 indexCubeMapGrad(float3 uv, float gu, float gv) { float ax = fabsf(uv.x); float ay = fabsf(uv.y); float az = fabsf(uv.z); int idx; float c; float c0 = gu; float c1 = gv; if (az > fmaxf(ax, ay)) { idx = 0x10; c = uv.z; c0 *= uv.x; c1 *= uv.y; } else if (ay > ax) { idx = 0x04; c = uv.y; c0 *= uv.x; c1 *= uv.z; } else { idx = 0x01; c = uv.x; c0 *= uv.z; c1 *= uv.y; } if (c < 0.f) idx += idx; float m = __frcp_rz(fabsf(c)); c0 = (idx & 0x34) ? -c0 : c0; c1 = (idx & 0x2e) ? -c1 : c1; float gl = (c0 + c1) * m; float gx = (idx & 0x03) ? gl : (idx & 0x20) ? -gu : gu; float gy = (idx & 0x0c) ? gl : -gv; float gz = (idx & 0x30) ? gl : (idx & 0x03) ? gu : gv; gz = (idx & 0x09) ? -gz : gz; float3 res = make_float3(gx, gy, gz) * (m * .5f); if (!isfinite_vec3(res)) return make_float3(0.f, 0.f, 0.f); // Invalid uv. return res; } // Based on dL/d(d{s,t}/s{X,Y}), compute dL/d(d{x,y,z}/d{X,Y}). This is just two // indexCubeMapGrad() functions rolled together. static __device__ __forceinline__ void indexCubeMapGrad4(float3 uv, float4 dw, float3& g0, float3& g1) { float ax = fabsf(uv.x); float ay = fabsf(uv.y); float az = fabsf(uv.z); int idx; float c, c0, c1; if (az > fmaxf(ax, ay)) { idx = 0x10; c = uv.z; c0 = uv.x; c1 = uv.y; } else if (ay > ax) { idx = 0x04; c = uv.y; c0 = uv.x; c1 = uv.z; } else { idx = 0x01; c = uv.x; c0 = uv.z; c1 = uv.y; } if (c < 0.f) idx += idx; float m = __frcp_rz(fabsf(c)); c0 = (idx & 0x34) ? -c0 : c0; c1 = (idx & 0x2e) ? -c1 : c1; float gl0 = (dw.x * c0 + dw.z * c1) * m; float gl1 = (dw.y * c0 + dw.w * c1) * m; float gx0 = (idx & 0x03) ? gl0 : (idx & 0x20) ? -dw.x : dw.x; float gx1 = (idx & 0x03) ? gl1 : (idx & 0x20) ? -dw.y : dw.y; float gy0 = (idx & 0x0c) ? gl0 : -dw.z; float gy1 = (idx & 0x0c) ? gl1 : -dw.w; float gz0 = (idx & 0x30) ? gl0 : (idx & 0x03) ? dw.x : dw.z; float gz1 = (idx & 0x30) ? gl1 : (idx & 0x03) ? dw.y : dw.w; if (idx & 0x09) { gz0 = -gz0; gz1 = -gz1; } g0 = make_float3(gx0, gy0, gz0) * (m * .5f); g1 = make_float3(gx1, gy1, gz1) * (m * .5f); if (!isfinite_vec3(g0) || !isfinite_vec3(g1)) { g0 = make_float3(0.f, 0.f, 0.f); // Invalid uv. g1 = make_float3(0.f, 0.f, 0.f); } } // Compute d{s,t}/d{X,Y} based on d{x,y,z}/d{X,Y} at a given 3D lookup vector. // Result is (ds/dX, ds/dY, dt/dX, dt/dY). static __device__ __forceinline__ float4 indexCubeMapGradST(float3 uv, float3 dvdX, float3 dvdY) { float ax = fabsf(uv.x); float ay = fabsf(uv.y); float az = fabsf(uv.z); int idx; float c, gu, gv; if (az > fmaxf(ax, ay)) { idx = 0x10; c = uv.z; gu = uv.x; gv = uv.y; } else if (ay > ax) { idx = 0x04; c = uv.y; gu = uv.x; gv = uv.z; } else { idx = 0x01; c = uv.x; gu = uv.z; gv = uv.y; } if (c < 0.f) idx += idx; if (idx & 0x09) { dvdX.z = -dvdX.z; dvdY.z = -dvdY.z; } float m = __frcp_rz(fabsf(c)); float dm = m * .5f; float mm = m * dm; gu *= (idx & 0x34) ? -mm : mm; gv *= (idx & 0x2e) ? -mm : mm; float4 res; if (idx & 0x03) { res = make_float4(gu * dvdX.x + dm * dvdX.z, gu * dvdY.x + dm * dvdY.z, gv * dvdX.x - dm * dvdX.y, gv * dvdY.x - dm * dvdY.y); } else if (idx & 0x0c) { res = make_float4(gu * dvdX.y + dm * dvdX.x, gu * dvdY.y + dm * dvdY.x, gv * dvdX.y + dm * dvdX.z, gv * dvdY.y + dm * dvdY.z); } else // (idx & 0x30) { res = make_float4(gu * dvdX.z + copysignf(dm, c) * dvdX.x, gu * dvdY.z + copysignf(dm, c) * dvdY.x, gv * dvdX.z - dm * dvdX.y, gv * dvdY.z - dm * dvdY.y); } if (!isfinite_vec4(res)) return make_float4(0.f, 0.f, 0.f, 0.f); return res; } // Compute d(d{s,t}/d{X,Y})/d{x,y,z}, i.e., how the pixel derivatives of 2D face // coordinates change w.r.t. 3D texture coordinate vector, returned as follows: // | d(ds/dX)/dx d(ds/dY)/dx d(dt/dX)/dx d(dt/dY)/dx | // | d(ds/dX)/dy d(ds/dY)/dy d(dt/dX)/dy d(dt/dY)/dy | // | d(ds/dX)/dz d(ds/dY)/dz d(dt/dX)/dz d(dt/dY)/dz | static __device__ __forceinline__ void indexCubeMapGrad2(float3 uv, float3 dvdX, float3 dvdY, float4& dx, float4& dy, float4& dz) { float ax = fabsf(uv.x); float ay = fabsf(uv.y); float az = fabsf(uv.z); int idx; float c, gu, gv; if (az > fmaxf(ax, ay)) { idx = 0x10; c = uv.z; gu = uv.x; gv = uv.y; } else if (ay > ax) { idx = 0x04; c = uv.y; gu = uv.x; gv = uv.z; } else { idx = 0x01; c = uv.x; gu = uv.z; gv = uv.y; } if (c < 0.f) idx += idx; if (idx & 0x09) { dvdX.z = -dvdX.z; dvdY.z = -dvdY.z; } float m = __frcp_rz(c); float dm = -m * fabsf(m) * .5; float mm = m * m * .5; float mu = (idx & 0x34) ? -mm : mm; float mv = (idx & 0x2e) ? -mm : mm; gu *= -2.0 * m * mu; gv *= -2.0 * m * mv; if (idx & 0x03) { dx.x = gu * dvdX.x + dm * dvdX.z; dx.y = gu * dvdY.x + dm * dvdY.z; dx.z = gv * dvdX.x - dm * dvdX.y; dx.w = gv * dvdY.x - dm * dvdY.y; dy.x = 0.f; dy.y = 0.f; dy.z = mv * dvdX.x; dy.w = mv * dvdY.x; dz.x = mu * dvdX.x; dz.y = mu * dvdY.x; dz.z = 0.f; dz.w = 0.f; } else if (idx & 0x0c) { dx.x = mu * dvdX.y; dx.y = mu * dvdY.y; dx.z = 0.f; dx.w = 0.f; dy.x = gu * dvdX.y + dm * dvdX.x; dy.y = gu * dvdY.y + dm * dvdY.x; dy.z = gv * dvdX.y + dm * dvdX.z; dy.w = gv * dvdY.y + dm * dvdY.z; dz.x = 0.f; dz.y = 0.f; dz.z = mv * dvdX.y; dz.w = mv * dvdY.y; } else // (idx & 0x30) { dx.x = mu * dvdX.z; dx.y = mu * dvdY.z; dx.z = 0.f; dx.w = 0.f; dy.x = 0.f; dy.y = 0.f; dy.z = mv * dvdX.z; dy.w = mv * dvdY.z; dz.x = gu * dvdX.z - fabsf(dm) * dvdX.x; dz.y = gu * dvdY.z - fabsf(dm) * dvdY.x; dz.z = gv * dvdX.z - dm * dvdX.y; dz.w = gv * dvdY.z - dm * dvdY.y; } } //------------------------------------------------------------------------ // General texture indexing. template static __device__ __forceinline__ int indexTextureNearest(const TextureKernelParams& p, float3 uv, int tz) { int w = p.texWidth; int h = p.texHeight; float u = uv.x; float v = uv.y; // Cube map indexing. if (CUBE_MODE) { // No wrap. Fold face index into tz right away. int idx = indexCubeMap(u, v, uv.z); // Rewrites u, v. if (idx < 0) return -1; // Invalid uv. tz = 6 * tz + idx; } else { // Handle boundary. if (p.boundaryMode == TEX_BOUNDARY_MODE_WRAP) { u = u - (float)__float2int_rd(u); v = v - (float)__float2int_rd(v); } } u = u * (float)w; v = v * (float)h; int iu = __float2int_rd(u); int iv = __float2int_rd(v); // In zero boundary mode, return texture address -1. if (!CUBE_MODE && p.boundaryMode == TEX_BOUNDARY_MODE_ZERO) { if (iu < 0 || iu >= w || iv < 0 || iv >= h) return -1; } // Otherwise clamp and calculate the coordinate properly. iu = min(max(iu, 0), w-1); iv = min(max(iv, 0), h-1); return iu + w * (iv + tz * h); } template static __device__ __forceinline__ float2 indexTextureLinear(const TextureKernelParams& p, float3 uv, int tz, int4& tcOut, int level) { // Mip level size. int2 sz = mipLevelSize(p, level); int w = sz.x; int h = sz.y; // Compute texture-space u, v. float u = uv.x; float v = uv.y; bool clampU = false; bool clampV = false; // Cube map indexing. int face = 0; if (CUBE_MODE) { // Neither clamp or wrap. face = indexCubeMap(u, v, uv.z); // Rewrites u, v. if (face < 0) { tcOut.x = tcOut.y = tcOut.z = tcOut.w = -1; // Invalid uv. return make_float2(0.f, 0.f); } u = u * (float)w - 0.5f; v = v * (float)h - 0.5f; } else { if (p.boundaryMode == TEX_BOUNDARY_MODE_WRAP) { // Wrap. u = u - (float)__float2int_rd(u); v = v - (float)__float2int_rd(v); } // Move to texel space. u = u * (float)w - 0.5f; v = v * (float)h - 0.5f; if (p.boundaryMode == TEX_BOUNDARY_MODE_CLAMP) { // Clamp to center of edge texels. u = fminf(fmaxf(u, 0.f), w - 1.f); v = fminf(fmaxf(v, 0.f), h - 1.f); clampU = (u == 0.f || u == w - 1.f); clampV = (v == 0.f || v == h - 1.f); } } // Compute texel coordinates and weights. int iu0 = __float2int_rd(u); int iv0 = __float2int_rd(v); int iu1 = iu0 + (clampU ? 0 : 1); // Ensure zero u/v gradients with clamped. int iv1 = iv0 + (clampV ? 0 : 1); u -= (float)iu0; v -= (float)iv0; // Cube map wrapping. bool cubeWrap = CUBE_MODE && (iu0 < 0 || iv0 < 0 || iu1 >= w || iv1 >= h); if (cubeWrap) { tcOut = wrapCubeMap(face, iu0, iu1, iv0, iv1, w); tcOut += 6 * tz * w * h; // Bring in tz. return make_float2(u, v); // Done. } // Fold cube map face into tz. if (CUBE_MODE) tz = 6 * tz + face; // Wrap overflowing texel indices. if (!CUBE_MODE && p.boundaryMode == TEX_BOUNDARY_MODE_WRAP) { if (iu0 < 0) iu0 += w; if (iv0 < 0) iv0 += h; if (iu1 >= w) iu1 -= w; if (iv1 >= h) iv1 -= h; } // Coordinates with tz folded in. int iu0z = iu0 + tz * w * h; int iu1z = iu1 + tz * w * h; tcOut.x = iu0z + w * iv0; tcOut.y = iu1z + w * iv0; tcOut.z = iu0z + w * iv1; tcOut.w = iu1z + w * iv1; // Invalidate texture addresses outside unit square if we are in zero mode. if (!CUBE_MODE && p.boundaryMode == TEX_BOUNDARY_MODE_ZERO) { bool iu0_out = (iu0 < 0 || iu0 >= w); bool iu1_out = (iu1 < 0 || iu1 >= w); bool iv0_out = (iv0 < 0 || iv0 >= h); bool iv1_out = (iv1 < 0 || iv1 >= h); if (iu0_out || iv0_out) tcOut.x = -1; if (iu1_out || iv0_out) tcOut.y = -1; if (iu0_out || iv1_out) tcOut.z = -1; if (iu1_out || iv1_out) tcOut.w = -1; } // All done. return make_float2(u, v); } //------------------------------------------------------------------------ // Mip level calculation. template static __device__ __forceinline__ void calculateMipLevel(int& level0, int& level1, float& flevel, const TextureKernelParams& p, int pidx, float3 uv, float4* pdw, float3* pdfdv) { // Do nothing if mips not in use. if (FILTER_MODE == TEX_MODE_NEAREST || FILTER_MODE == TEX_MODE_LINEAR) return; // Determine mip level based on UV pixel derivatives. If no derivatives are given (mip level bias only), leave as zero. if (!BIAS_ONLY) { // Get pixel derivatives of texture coordinates. float4 uvDA; float3 dvdX, dvdY; // Gradients use these later. if (CUBE_MODE) { // Fetch. float2 d0 = ((const float2*)p.uvDA)[3 * pidx + 0]; float2 d1 = ((const float2*)p.uvDA)[3 * pidx + 1]; float2 d2 = ((const float2*)p.uvDA)[3 * pidx + 2]; // Map d{x,y,z}/d{X,Y} into d{s,t}/d{X,Y}. dvdX = make_float3(d0.x, d1.x, d2.x); // d{x,y,z}/dX dvdY = make_float3(d0.y, d1.y, d2.y); // d{x,y,z}/dY uvDA = indexCubeMapGradST(uv, dvdX, dvdY); // d{s,t}/d{X,Y} } else { // Fetch. uvDA = ((const float4*)p.uvDA)[pidx]; } // Scaling factors. float uscl = p.texWidth; float vscl = p.texHeight; // d[s,t]/d[X,Y]. float dsdx = uvDA.x * uscl; float dsdy = uvDA.y * uscl; float dtdx = uvDA.z * vscl; float dtdy = uvDA.w * vscl; // Calculate footprint axis lengths. float A = dsdx*dsdx + dtdx*dtdx; float B = dsdy*dsdy + dtdy*dtdy; float C = dsdx*dsdy + dtdx*dtdy; float l2b = 0.5 * (A + B); float l2n = 0.25 * (A-B)*(A-B) + C*C; float l2a = sqrt(l2n); float lenMinorSqr = fmaxf(0.0, l2b - l2a); float lenMajorSqr = l2b + l2a; // Footprint vs. mip level gradient. if (pdw && FILTER_MODE == TEX_MODE_LINEAR_MIPMAP_LINEAR) { float dw = 0.72134752f / (l2n + l2a * l2b); // Constant is 0.5/ln(2). float AB = dw * .5f * (A - B); float Cw = dw * C; float l2aw = dw * l2a; float d_f_ddsdX = uscl * (dsdx * (l2aw + AB) + dsdy * Cw); float d_f_ddsdY = uscl * (dsdy * (l2aw - AB) + dsdx * Cw); float d_f_ddtdX = vscl * (dtdx * (l2aw + AB) + dtdy * Cw); float d_f_ddtdY = vscl * (dtdy * (l2aw - AB) + dtdx * Cw); float4 d_f_dw = make_float4(d_f_ddsdX, d_f_ddsdY, d_f_ddtdX, d_f_ddtdY); if (!CUBE_MODE) *pdw = isfinite_vec4(d_f_dw) ? d_f_dw : make_float4(0.f, 0.f, 0.f, 0.f); // In cube maps, there is also a texture coordinate vs. mip level gradient. // Only output nonzero vectors if both are free of inf/Nan garbage. if (CUBE_MODE) { float4 dx, dy, dz; indexCubeMapGrad2(uv, dvdX, dvdY, dx, dy, dz); float3 d_dsdX_dv = make_float3(dx.x, dy.x, dz.x); float3 d_dsdY_dv = make_float3(dx.y, dy.y, dz.y); float3 d_dtdX_dv = make_float3(dx.z, dy.z, dz.z); float3 d_dtdY_dv = make_float3(dx.w, dy.w, dz.w); float3 d_f_dv = make_float3(0.f, 0.f, 0.f); d_f_dv += d_dsdX_dv * d_f_ddsdX; d_f_dv += d_dsdY_dv * d_f_ddsdY; d_f_dv += d_dtdX_dv * d_f_ddtdX; d_f_dv += d_dtdY_dv * d_f_ddtdY; bool finite = isfinite_vec4(d_f_dw) && isfinite_vec3(d_f_dv); *pdw = finite ? d_f_dw : make_float4(0.f, 0.f, 0.f, 0.f); *pdfdv = finite ? d_f_dv : make_float3(0.f, 0.f, 0.f); } } // Finally, calculate mip level. flevel = .5f * __log2f(lenMajorSqr); // May be inf/NaN, but clamp fixes it. } // Bias the mip level and clamp. if (p.mipLevelBias) flevel += p.mipLevelBias[pidx]; flevel = fminf(fmaxf(flevel, 0.f), (float)p.mipLevelMax); // Calculate levels depending on filter mode. level0 = __float2int_rd(flevel); // Leave everything else at zero if flevel == 0 (magnification) or when in linear-mipmap-nearest mode. if (FILTER_MODE == TEX_MODE_LINEAR_MIPMAP_LINEAR && flevel > 0.f) { level1 = min(level0 + 1, p.mipLevelMax); flevel -= level0; // Fractional part. Zero if clamped on last level. } } //------------------------------------------------------------------------ // Texel fetch and accumulator helpers that understand cube map corners. template static __device__ __forceinline__ void fetchQuad(T& a00, T& a10, T& a01, T& a11, const float* pIn, int4 tc, bool corner) { // For invalid cube map uv, tc will be all negative, and all texel values will be zero. if (corner) { T avg = zero_value(); if (tc.x >= 0) avg += (a00 = *((const T*)&pIn[tc.x])); if (tc.y >= 0) avg += (a10 = *((const T*)&pIn[tc.y])); if (tc.z >= 0) avg += (a01 = *((const T*)&pIn[tc.z])); if (tc.w >= 0) avg += (a11 = *((const T*)&pIn[tc.w])); avg *= 0.33333333f; if (tc.x < 0) a00 = avg; if (tc.y < 0) a10 = avg; if (tc.z < 0) a01 = avg; if (tc.w < 0) a11 = avg; } else { a00 = (tc.x >= 0) ? *((const T*)&pIn[tc.x]) : zero_value(); a10 = (tc.y >= 0) ? *((const T*)&pIn[tc.y]) : zero_value(); a01 = (tc.z >= 0) ? *((const T*)&pIn[tc.z]) : zero_value(); a11 = (tc.w >= 0) ? *((const T*)&pIn[tc.w]) : zero_value(); } } static __device__ __forceinline__ void accumQuad(float4 c, float* pOut, int level, int4 tc, bool corner, CA_TEMP_PARAM) { // For invalid cube map uv, tc will be all negative, and no accumulation will take place. if (corner) { float cb; if (tc.x < 0) cb = c.x; if (tc.y < 0) cb = c.y; if (tc.z < 0) cb = c.z; if (tc.w < 0) cb = c.w; cb *= 0.33333333f; if (tc.x >= 0) caAtomicAddTexture(pOut, level, tc.x, c.x + cb); if (tc.y >= 0) caAtomicAddTexture(pOut, level, tc.y, c.y + cb); if (tc.z >= 0) caAtomicAddTexture(pOut, level, tc.z, c.z + cb); if (tc.w >= 0) caAtomicAddTexture(pOut, level, tc.w, c.w + cb); } else { if (tc.x >= 0) caAtomicAddTexture(pOut, level, tc.x, c.x); if (tc.y >= 0) caAtomicAddTexture(pOut, level, tc.y, c.y); if (tc.z >= 0) caAtomicAddTexture(pOut, level, tc.z, c.z); if (tc.w >= 0) caAtomicAddTexture(pOut, level, tc.w, c.w); } } //------------------------------------------------------------------------ // Mip builder kernel. template static __forceinline__ __device__ void MipBuildKernelTemplate(const TextureKernelParams p) { // Sizes. int2 sz_in = mipLevelSize(p, p.mipLevelOut - 1); int2 sz_out = mipLevelSize(p, p.mipLevelOut); // Calculate pixel position. int px = blockIdx.x * blockDim.x + threadIdx.x; int py = blockIdx.y * blockDim.y + threadIdx.y; int pz = blockIdx.z; if (px >= sz_out.x || py >= sz_out.y) return; // Pixel indices. int pidx_in0 = p.channels * (((px + sz_in.x * py) << 1) + (pz * sz_in.x * sz_in.y)); int pidx_in1 = pidx_in0 + p.channels * sz_in.x; // Next pixel down. int pidx_out = p.channels * (px + sz_out.x * (py + sz_out.y * pz)); // Input and output pointers. const float* pin = p.tex[p.mipLevelOut - 1]; float* pout = (float*)p.tex[p.mipLevelOut]; // Special case: Input texture height or width is 1. if (sz_in.x == 1 || sz_in.y == 1) { if (sz_in.y == 1) pidx_in1 = pidx_in0 + p.channels; // Next pixel on the right. for (int i=0; i < p.channels; i += C) { T v0 = *((const T*)&pin[pidx_in0 + i]); T v1 = *((const T*)&pin[pidx_in1 + i]); T avg = .5f * (v0 + v1); #if TEX_DEBUG_MIP_RETAIN_VARIANCE avg = (avg - .5f) * 1.41421356f + .5f; #endif *((T*)&pout[pidx_out + i]) = avg; } return; } for (int i=0; i < p.channels; i += C) { T v0 = *((const T*)&pin[pidx_in0 + i]); T v1 = *((const T*)&pin[pidx_in0 + i + p.channels]); T v2 = *((const T*)&pin[pidx_in1 + i]); T v3 = *((const T*)&pin[pidx_in1 + i + p.channels]); T avg = .25f * (v0 + v1 + v2 + v3); #if TEX_DEBUG_MIP_RETAIN_VARIANCE avg = (avg - .5f) * 2.f + .5f; #endif *((T*)&pout[pidx_out + i]) = avg; } } // Template specializations. __global__ void MipBuildKernel1(const TextureKernelParams p) { MipBuildKernelTemplate(p); } __global__ void MipBuildKernel2(const TextureKernelParams p) { MipBuildKernelTemplate(p); } __global__ void MipBuildKernel4(const TextureKernelParams p) { MipBuildKernelTemplate(p); } //------------------------------------------------------------------------ // Forward kernel. template static __forceinline__ __device__ void TextureFwdKernelTemplate(const TextureKernelParams p) { // Calculate pixel position. int px = blockIdx.x * blockDim.x + threadIdx.x; int py = blockIdx.y * blockDim.y + threadIdx.y; int pz = blockIdx.z; int tz = (p.texDepth == 1) ? 0 : pz; if (px >= p.imgWidth || py >= p.imgHeight || pz >= p.n) return; // Pixel index. int pidx = px + p.imgWidth * (py + p.imgHeight * pz); // Output ptr. float* pOut = p.out + pidx * p.channels; // Get UV. float3 uv; if (CUBE_MODE) uv = ((const float3*)p.uv)[pidx]; else uv = make_float3(((const float2*)p.uv)[pidx], 0.f); // Nearest mode. if (FILTER_MODE == TEX_MODE_NEAREST) { int tc = indexTextureNearest(p, uv, tz); tc *= p.channels; const float* pIn = p.tex[0]; // Copy if valid tc, otherwise output zero. for (int i=0; i < p.channels; i += C) *((T*)&pOut[i]) = (tc >= 0) ? *((const T*)&pIn[tc + i]) : zero_value(); return; // Exit. } // Calculate mip level. In 'linear' mode these will all stay zero. float flevel = 0.f; // Fractional level. int level0 = 0; // Discrete level 0. int level1 = 0; // Discrete level 1. calculateMipLevel(level0, level1, flevel, p, pidx, uv, 0, 0); // Get texel indices and pointer for level 0. int4 tc0 = make_int4(0, 0, 0, 0); float2 uv0 = indexTextureLinear(p, uv, tz, tc0, level0); const float* pIn0 = p.tex[level0]; bool corner0 = CUBE_MODE && ((tc0.x | tc0.y | tc0.z | tc0.w) < 0); tc0 *= p.channels; // Bilinear fetch. if (FILTER_MODE == TEX_MODE_LINEAR || FILTER_MODE == TEX_MODE_LINEAR_MIPMAP_NEAREST) { // Interpolate. for (int i=0; i < p.channels; i += C, tc0 += C) { T a00, a10, a01, a11; fetchQuad(a00, a10, a01, a11, pIn0, tc0, corner0); *((T*)&pOut[i]) = bilerp(a00, a10, a01, a11, uv0); } return; // Exit. } // Get texel indices and pointer for level 1. int4 tc1 = make_int4(0, 0, 0, 0); float2 uv1 = indexTextureLinear(p, uv, tz, tc1, level1); const float* pIn1 = p.tex[level1]; bool corner1 = CUBE_MODE && ((tc1.x | tc1.y | tc1.z | tc1.w) < 0); tc1 *= p.channels; // Trilinear fetch. for (int i=0; i < p.channels; i += C, tc0 += C, tc1 += C) { // First level. T a00, a10, a01, a11; fetchQuad(a00, a10, a01, a11, pIn0, tc0, corner0); T a = bilerp(a00, a10, a01, a11, uv0); // Second level unless in magnification mode. if (flevel > 0.f) { T b00, b10, b01, b11; fetchQuad(b00, b10, b01, b11, pIn1, tc1, corner1); T b = bilerp(b00, b10, b01, b11, uv1); a = lerp(a, b, flevel); // Interpolate between levels. } // Write. *((T*)&pOut[i]) = a; } } // Template specializations. __global__ void TextureFwdKernelNearest1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelNearest2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelNearest4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinear1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinear2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinear4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapNearest1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapNearest2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapNearest4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapLinear1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapLinear2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapLinear4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeNearest1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeNearest2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeNearest4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinear1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinear2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinear4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapNearest1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapNearest2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapNearest4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapLinear1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapLinear2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapLinear4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapNearestBO1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapNearestBO2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapNearestBO4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapLinearBO1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapLinearBO2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelLinearMipmapLinearBO4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapNearestBO1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapNearestBO2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapNearestBO4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapLinearBO1 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapLinearBO2 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } __global__ void TextureFwdKernelCubeLinearMipmapLinearBO4 (const TextureKernelParams p) { TextureFwdKernelTemplate(p); } //------------------------------------------------------------------------ // Gradient mip puller kernel. template static __forceinline__ __device__ void MipGradKernelTemplate(const TextureKernelParams p) { // Calculate pixel position. int px = blockIdx.x * blockDim.x + threadIdx.x; int py = blockIdx.y * blockDim.y + threadIdx.y; int pz = blockIdx.z; if (px >= p.texWidth || py >= p.texHeight) return; // Number of wide elements. int c = p.channels; if (C == 2) c >>= 1; if (C == 4) c >>= 2; // Dynamically allocated shared memory for holding a texel. extern __shared__ float s_texelAccum[]; int sharedOfs = threadIdx.x + threadIdx.y * blockDim.x; int sharedStride = blockDim.x * blockDim.y; # define TEXEL_ACCUM(_i) (s_texelAccum + (sharedOfs + (_i) * sharedStride)) // Clear the texel. for (int i=0; i < p.channels; i++) *TEXEL_ACCUM(i) = 0.f; // Track texel position and accumulation weight over the mip stack. int x = px; int y = py; float w = 1.f; // Pull gradients from all levels. int2 sz = mipLevelSize(p, 0); // Previous level size. for (int level=1; level <= p.mipLevelMax; level++) { // Weight decay depends on previous level size. if (sz.x > 1) w *= .5f; if (sz.y > 1) w *= .5f; // Current level size and coordinates. sz = mipLevelSize(p, level); x >>= 1; y >>= 1; T* pIn = (T*)(p.gradTex[level] + (x + sz.x * (y + sz.y * pz)) * p.channels); for (int i=0; i < c; i++) accum_from_mem(TEXEL_ACCUM(i * C), sharedStride, pIn[i], w); } // Add to main texture gradients. T* pOut = (T*)(p.gradTex[0] + (px + p.texWidth * (py + p.texHeight * pz)) * p.channels); for (int i=0; i < c; i++) accum_to_mem(pOut[i], TEXEL_ACCUM(i * C), sharedStride); } // Template specializations. __global__ void MipGradKernel1(const TextureKernelParams p) { MipGradKernelTemplate(p); } __global__ void MipGradKernel2(const TextureKernelParams p) { MipGradKernelTemplate(p); } __global__ void MipGradKernel4(const TextureKernelParams p) { MipGradKernelTemplate(p); } //------------------------------------------------------------------------ // Gradient kernel. template static __forceinline__ __device__ void TextureGradKernelTemplate(const TextureKernelParams p) { // Temporary space for coalesced atomics. CA_DECLARE_TEMP(TEX_GRAD_MAX_KERNEL_BLOCK_WIDTH * TEX_GRAD_MAX_KERNEL_BLOCK_HEIGHT); // Calculate pixel position. int px = blockIdx.x * blockDim.x + threadIdx.x; int py = blockIdx.y * blockDim.y + threadIdx.y; int pz = blockIdx.z; int tz = (p.texDepth == 1) ? 0 : pz; if (px >= p.imgWidth || py >= p.imgHeight || pz >= p.n) return; // Pixel index. int pidx = px + p.imgWidth * (py + p.imgHeight * pz); // Early exit if output gradients are zero. const float* pDy = p.dy + pidx * p.channels; unsigned int dmax = 0u; if ((p.channels & 3) == 0) { for (int i=0; i < p.channels; i += 4) { uint4 dy = *((const uint4*)&pDy[i]); dmax |= (dy.x | dy.y | dy.z | dy.w); } } else { for (int i=0; i < p.channels; i++) dmax |= __float_as_uint(pDy[i]); } // Store zeros and exit. if (__uint_as_float(dmax) == 0.f) { if (CUBE_MODE) { if (FILTER_MODE != TEX_MODE_NEAREST) ((float3*)p.gradUV)[pidx] = make_float3(0.f, 0.f, 0.f); if (FILTER_MODE == TEX_MODE_LINEAR_MIPMAP_LINEAR) { if (p.gradUVDA) { ((float2*)p.gradUVDA)[3 * pidx + 0] = make_float2(0.f, 0.f); ((float2*)p.gradUVDA)[3 * pidx + 1] = make_float2(0.f, 0.f); ((float2*)p.gradUVDA)[3 * pidx + 2] = make_float2(0.f, 0.f); } if (p.gradMipLevelBias) p.gradMipLevelBias[pidx] = 0.f; } } else { if (FILTER_MODE != TEX_MODE_NEAREST) ((float2*)p.gradUV)[pidx] = make_float2(0.f, 0.f); if (FILTER_MODE == TEX_MODE_LINEAR_MIPMAP_LINEAR) { if (p.gradUVDA) ((float4*)p.gradUVDA)[pidx] = make_float4(0.f, 0.f, 0.f, 0.f); if (p.gradMipLevelBias) p.gradMipLevelBias[pidx] = 0.f; } } return; } // Get UV. float3 uv; if (CUBE_MODE) uv = ((const float3*)p.uv)[pidx]; else uv = make_float3(((const float2*)p.uv)[pidx], 0.f); // Nearest mode - texture gradients only. if (FILTER_MODE == TEX_MODE_NEAREST) { int tc = indexTextureNearest(p, uv, tz); if (tc < 0) return; // Outside texture. tc *= p.channels; float* pOut = p.gradTex[0]; // Accumulate texture gradients. for (int i=0; i < p.channels; i++) caAtomicAddTexture(pOut, 0, tc + i, pDy[i]); return; // Exit. } // Calculate mip level. In 'linear' mode these will all stay zero. float4 dw = make_float4(0.f, 0.f, 0.f, 0.f); float3 dfdv = make_float3(0.f, 0.f, 0.f); float flevel = 0.f; // Fractional level. int level0 = 0; // Discrete level 0. int level1 = 0; // Discrete level 1. calculateMipLevel(level0, level1, flevel, p, pidx, uv, &dw, &dfdv); // UV gradient accumulators. float gu = 0.f; float gv = 0.f; // Get texel indices and pointers for level 0. int4 tc0 = make_int4(0, 0, 0, 0); float2 uv0 = indexTextureLinear(p, uv, tz, tc0, level0); const float* pIn0 = p.tex[level0]; float* pOut0 = p.gradTex[level0]; bool corner0 = CUBE_MODE && ((tc0.x | tc0.y | tc0.z | tc0.w) < 0); tc0 *= p.channels; // Texel weights. float uv011 = uv0.x * uv0.y; float uv010 = uv0.x - uv011; float uv001 = uv0.y - uv011; float uv000 = 1.f - uv0.x - uv001; float4 tw0 = make_float4(uv000, uv010, uv001, uv011); // Attribute weights. int2 sz0 = mipLevelSize(p, level0); float sclu0 = (float)sz0.x; float sclv0 = (float)sz0.y; // Bilinear mode - texture and uv gradients. if (FILTER_MODE == TEX_MODE_LINEAR || FILTER_MODE == TEX_MODE_LINEAR_MIPMAP_NEAREST) { for (int i=0; i < p.channels; i++, tc0 += 1) { float dy = pDy[i]; accumQuad(tw0 * dy, pOut0, level0, tc0, corner0, CA_TEMP); float a00, a10, a01, a11; fetchQuad(a00, a10, a01, a11, pIn0, tc0, corner0); float ad = (a11 + a00 - a10 - a01); gu += dy * ((a10 - a00) + uv0.y * ad) * sclu0; gv += dy * ((a01 - a00) + uv0.x * ad) * sclv0; } // Store UV gradients and exit. if (CUBE_MODE) ((float3*)p.gradUV)[pidx] = indexCubeMapGrad(uv, gu, gv); else ((float2*)p.gradUV)[pidx] = make_float2(gu, gv); return; } // Accumulate fractional mip level gradient. float df = 0; // dL/df. // Get texel indices and pointers for level 1. int4 tc1 = make_int4(0, 0, 0, 0); float2 uv1 = indexTextureLinear(p, uv, tz, tc1, level1); const float* pIn1 = p.tex[level1]; float* pOut1 = p.gradTex[level1]; bool corner1 = CUBE_MODE && ((tc1.x | tc1.y | tc1.z | tc1.w) < 0); tc1 *= p.channels; // Texel weights. float uv111 = uv1.x * uv1.y; float uv110 = uv1.x - uv111; float uv101 = uv1.y - uv111; float uv100 = 1.f - uv1.x - uv101; float4 tw1 = make_float4(uv100, uv110, uv101, uv111); // Attribute weights. int2 sz1 = mipLevelSize(p, level1); float sclu1 = (float)sz1.x; float sclv1 = (float)sz1.y; // Trilinear mode. for (int i=0; i < p.channels; i++, tc0 += 1, tc1 += 1) { float dy = pDy[i]; float dy0 = (1.f - flevel) * dy; accumQuad(tw0 * dy0, pOut0, level0, tc0, corner0, CA_TEMP); // UV gradients for first level. float a00, a10, a01, a11; fetchQuad(a00, a10, a01, a11, pIn0, tc0, corner0); float ad = (a11 + a00 - a10 - a01); gu += dy0 * ((a10 - a00) + uv0.y * ad) * sclu0; gv += dy0 * ((a01 - a00) + uv0.x * ad) * sclv0; // Second level unless in magnification mode. if (flevel > 0.f) { // Texture gradients for second level. float dy1 = flevel * dy; accumQuad(tw1 * dy1, pOut1, level1, tc1, corner1, CA_TEMP); // UV gradients for second level. float b00, b10, b01, b11; fetchQuad(b00, b10, b01, b11, pIn1, tc1, corner1); float bd = (b11 + b00 - b10 - b01); gu += dy1 * ((b10 - b00) + uv1.y * bd) * sclu1; gv += dy1 * ((b01 - b00) + uv1.x * bd) * sclv1; // Mip level gradient. float a = bilerp(a00, a10, a01, a11, uv0); float b = bilerp(b00, b10, b01, b11, uv1); df += (b-a) * dy; } } // Store UV gradients. if (CUBE_MODE) ((float3*)p.gradUV)[pidx] = indexCubeMapGrad(uv, gu, gv) + (dfdv * df); else ((float2*)p.gradUV)[pidx] = make_float2(gu, gv); // Store mip level bias gradient. if (p.gradMipLevelBias) p.gradMipLevelBias[pidx] = df; // Store UV pixel differential gradients. if (!BIAS_ONLY) { // Final gradients. dw *= df; // dL/(d{s,y}/d{X,Y}) = df/(d{s,y}/d{X,Y}) * dL/df. // Store them. if (CUBE_MODE) { // Remap from dL/(d{s,t}/s{X,Y}) to dL/(d{x,y,z}/d{X,Y}). float3 g0, g1; indexCubeMapGrad4(uv, dw, g0, g1); ((float2*)p.gradUVDA)[3 * pidx + 0] = make_float2(g0.x, g1.x); ((float2*)p.gradUVDA)[3 * pidx + 1] = make_float2(g0.y, g1.y); ((float2*)p.gradUVDA)[3 * pidx + 2] = make_float2(g0.z, g1.z); } else ((float4*)p.gradUVDA)[pidx] = dw; } } // Template specializations. __global__ void TextureGradKernelNearest (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelLinear (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelLinearMipmapNearest (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelLinearMipmapLinear (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelCubeNearest (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelCubeLinear (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelCubeLinearMipmapNearest (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelCubeLinearMipmapLinear (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelLinearMipmapNearestBO (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelLinearMipmapLinearBO (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelCubeLinearMipmapNearestBO (const TextureKernelParams p) { TextureGradKernelTemplate(p); } __global__ void TextureGradKernelCubeLinearMipmapLinearBO (const TextureKernelParams p) { TextureGradKernelTemplate(p); } //------------------------------------------------------------------------