mirror of
https://github.com/stenzek/duckstation.git
synced 2026-02-05 22:04:33 +00:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
de77b764a7 | ||
|
|
c21ea3c85b |
124
data/resources/shaders/system/texscale-mmpx.comp
Normal file
124
data/resources/shaders/system/texscale-mmpx.comp
Normal file
@@ -0,0 +1,124 @@
|
||||
#version 460 core
|
||||
|
||||
// EPX.glc
|
||||
// Copyright 2020 Morgan McGuire & Mara Gagiu,
|
||||
// provided under the Open Source MIT license https://opensource.org/licenses/MIT
|
||||
|
||||
// Implementation of Eric Johnston and Andrea Mazzoleni's
|
||||
// EPX aka Scale2X algorithm based on https://www.scale2x.it/algorithm
|
||||
|
||||
#define ABGR8 uint
|
||||
|
||||
UNIFORM_BLOCK_LAYOUT uniform UBOBlock {
|
||||
ivec2 src_size;
|
||||
ivec2 dst_size;
|
||||
};
|
||||
|
||||
TEXTURE_LAYOUT(0) uniform sampler2D samp0;
|
||||
IMAGE_LAYOUT(0, rgba8) uniform restrict writeonly image2D dst_image;
|
||||
|
||||
ABGR8 src(int x, int y) {
|
||||
return packUnorm4x8(texelFetch(samp0, ivec2(x, y), 0));
|
||||
}
|
||||
|
||||
void dst(int x, int y, ABGR8 value) {
|
||||
imageStore(dst_image, ivec2(x, y), unpackUnorm4x8(value));
|
||||
}
|
||||
|
||||
uint luma(ABGR8 C) {
|
||||
uint alpha = (C & 0xFF000000u) >> 24;
|
||||
return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha);
|
||||
}
|
||||
|
||||
bool all_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) {
|
||||
return ((B ^ A0) | (B ^ A1)) == 0u;
|
||||
}
|
||||
|
||||
bool all_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) {
|
||||
return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u;
|
||||
}
|
||||
|
||||
bool all_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) {
|
||||
return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u;
|
||||
}
|
||||
|
||||
bool any_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) {
|
||||
return B == A0 || B == A1 || B == A2;
|
||||
}
|
||||
|
||||
bool none_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) {
|
||||
return (B != A0) && (B != A1);
|
||||
}
|
||||
|
||||
bool none_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) {
|
||||
return B != A0 && B != A1 && B != A2 && B != A3;
|
||||
}
|
||||
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main () {
|
||||
// EPX first falls back to Nearest Neighbour
|
||||
int srcX = int(gl_GlobalInvocationID.x);
|
||||
int srcY = int(gl_GlobalInvocationID.y);
|
||||
if (srcX >= src_size.x || srcY >= src_size.y)
|
||||
return;
|
||||
|
||||
ABGR8 A = src(srcX - 1, srcY - 1), B = src(srcX, srcY - 1), C = src(srcX + 1, srcY - 1);
|
||||
ABGR8 D = src(srcX - 1, srcY + 0), E = src(srcX, srcY + 0), F = src(srcX + 1, srcY + 0);
|
||||
ABGR8 G = src(srcX - 1, srcY + 1), H = src(srcX, srcY + 1), I = src(srcX + 1, srcY + 1);
|
||||
|
||||
ABGR8 J = E, K = E, L = E, M = E;
|
||||
|
||||
if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) {
|
||||
ABGR8 P = src(srcX, srcY - 2), S = src(srcX, srcY + 2);
|
||||
ABGR8 Q = src(srcX - 2, srcY), R = src(srcX + 2, srcY);
|
||||
ABGR8 Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H);
|
||||
|
||||
// 1:1 slope rules
|
||||
if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D;
|
||||
if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B;
|
||||
if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H;
|
||||
if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F;
|
||||
|
||||
// Intersection rules
|
||||
if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(srcX + 3, srcY))) K = M = F;
|
||||
if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(srcX - 3, srcY))) J = L = D;
|
||||
if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(srcX, srcY + 3))) L = M = H;
|
||||
if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(srcX, srcY - 3))) J = K = B;
|
||||
if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B;
|
||||
if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H;
|
||||
if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F;
|
||||
if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D;
|
||||
|
||||
// 2:1 slope rules
|
||||
if (H != B) {
|
||||
if (H != A && H != E && H != C) {
|
||||
if (all_eq3(H, G, F, R) && none_eq2(H, D, src(srcX + 2, srcY - 1))) L = M;
|
||||
if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(srcX - 2, srcY - 1))) M = L;
|
||||
}
|
||||
|
||||
if (B != I && B != G && B != E) {
|
||||
if (all_eq3(B, A, F, R) && none_eq2(B, D, src(srcX + 2, srcY + 1))) J = K;
|
||||
if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(srcX - 2, srcY + 1))) K = J;
|
||||
}
|
||||
} // H !== B
|
||||
|
||||
if (F != D) {
|
||||
if (D != I && D != E && D != C) {
|
||||
if (all_eq3(D, A, H, S) && none_eq2(D, B, src(srcX + 1, srcY + 2))) J = L;
|
||||
if (all_eq3(D, G, B, P) && none_eq2(D, H, src(srcX + 1, srcY - 2))) L = J;
|
||||
}
|
||||
|
||||
if (F != E && F != A && F != G) {
|
||||
if (all_eq3(F, C, H, S) && none_eq2(F, B, src(srcX - 1, srcY + 2))) K = M;
|
||||
if (all_eq3(F, I, B, P) && none_eq2(F, H, src(srcX - 1, srcY - 2))) M = K;
|
||||
}
|
||||
} // F !== D
|
||||
} // not constant
|
||||
|
||||
// Write four pixels at once
|
||||
dst(srcX * 2, srcY * 2, J);
|
||||
dst(srcX * 2 + 1, srcY * 2, K);
|
||||
dst(srcX * 2, srcY * 2 + 1, L);
|
||||
dst(srcX * 2 + 1, srcY * 2 + 1, M);
|
||||
}
|
||||
55
data/resources/shaders/system/texscale-scale2x.comp
Normal file
55
data/resources/shaders/system/texscale-scale2x.comp
Normal file
@@ -0,0 +1,55 @@
|
||||
#version 460 core
|
||||
|
||||
// EPX.glc
|
||||
// Copyright 2020 Morgan McGuire & Mara Gagiu,
|
||||
// provided under the Open Source MIT license https://opensource.org/licenses/MIT
|
||||
|
||||
// Implementation of Eric Johnston and Andrea Mazzoleni's
|
||||
// EPX aka Scale2X algorithm based on https://www.scale2x.it/algorithm
|
||||
|
||||
#define ABGR8 uint
|
||||
|
||||
UNIFORM_BLOCK_LAYOUT uniform UBOBlock {
|
||||
ivec2 src_size;
|
||||
ivec2 dst_size;
|
||||
};
|
||||
|
||||
TEXTURE_LAYOUT(0) uniform sampler2D samp0;
|
||||
IMAGE_LAYOUT(0, rgba8) uniform restrict writeonly image2D dst_image;
|
||||
|
||||
ABGR8 src(int x, int y) {
|
||||
return packUnorm4x8(texelFetch(samp0, ivec2(x, y), 0));
|
||||
}
|
||||
|
||||
void dst(int x, int y, ABGR8 value) {
|
||||
imageStore(dst_image, ivec2(x, y), unpackUnorm4x8(value));
|
||||
}
|
||||
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main () {
|
||||
// EPX first falls back to Nearest Neighbour
|
||||
int srcX = int(gl_GlobalInvocationID.x);
|
||||
int srcY = int(gl_GlobalInvocationID.y);
|
||||
if (srcX >= src_size.x || srcY >= src_size.y)
|
||||
return;
|
||||
|
||||
ABGR8 E = src(srcX, srcY);
|
||||
ABGR8 J = E, K = E, L = E, M = E;
|
||||
|
||||
ABGR8 B = src(srcX + 0, srcY - 1);
|
||||
ABGR8 D = src(srcX - 1, srcY + 0);
|
||||
ABGR8 F = src(srcX + 1, srcY + 0);
|
||||
ABGR8 H = src(srcX + 0, srcY + 1);
|
||||
|
||||
if (D == B && B != F && D != H) J = D;
|
||||
if (B == F && D != F && H != F) K = F;
|
||||
if (H == D && F != D && B != D) L = D;
|
||||
if (H == F && D != H && B != F) M = F;
|
||||
|
||||
// Write four pixels at once
|
||||
dst(srcX * 2, srcY * 2, J);
|
||||
dst(srcX * 2 + 1, srcY * 2, K);
|
||||
dst(srcX * 2, srcY * 2 + 1, L);
|
||||
dst(srcX * 2 + 1, srcY * 2 + 1, M);
|
||||
}
|
||||
248
data/resources/shaders/system/texscale-xbr.frag
Normal file
248
data/resources/shaders/system/texscale-xbr.frag
Normal file
@@ -0,0 +1,248 @@
|
||||
#version 460 core
|
||||
|
||||
layout(location = 0) in VertexData {
|
||||
vec2 v_tex0;
|
||||
};
|
||||
|
||||
layout(location = 0) out vec4 dest;
|
||||
|
||||
TEXTURE_LAYOUT(0) uniform sampler2D samp0;
|
||||
|
||||
vec4 SrcGet(vec2 uv)
|
||||
{
|
||||
return texelFetch(samp0, ivec2(uv), 0);
|
||||
}
|
||||
|
||||
// XBR.pix
|
||||
// Copyright 2020 Morgan McGuire & Mara Gagiu,
|
||||
// provided under the Open Source MIT license https://opensource.org/licenses/MIT
|
||||
|
||||
#define XBR_Y_WEIGHT 48.0
|
||||
#define XBR_EQ_THRESHOLD 15.0
|
||||
#define XBR_LV1_COEFFICIENT 0.5
|
||||
#define XBR_LV2_COEFFICIENT 2.0
|
||||
// END PARAMETERS //
|
||||
|
||||
|
||||
// XBR GLSL implementation source:
|
||||
// https://github.com/libretro/glsl-shaders/blob/master/xbr/shaders/xbr-lv2.glsl
|
||||
/*
|
||||
Hyllian's xBR-lv2 Shader
|
||||
Copyright (C) 2011-2015 Hyllian - sergiogdb@gmail.com
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
Incorporates some of the ideas from SABR shader. Thanks to Joshua Street.
|
||||
|
||||
*/
|
||||
|
||||
// Uncomment just one of the three params below to choose the corner detection
|
||||
#define CORNER_A
|
||||
//#define CORNER_B
|
||||
//#define CORNER_C
|
||||
//#define CORNER_D
|
||||
|
||||
#ifndef CORNER_A
|
||||
#define SMOOTH_TIPS
|
||||
#endif
|
||||
|
||||
#define XBR_SCALE 2.0
|
||||
#define lv2_cf XBR_LV2_COEFFICIENT
|
||||
//=================================================================================
|
||||
// XBR Helper Functions
|
||||
//=================================================================================
|
||||
const float coef = 2.0;
|
||||
const vec3 rgbw = vec3(14.352, 28.176, 5.472);
|
||||
const vec4 eq_threshold = vec4(15.0, 15.0, 15.0, 15.0);
|
||||
|
||||
const vec4 delta = vec4(1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE);
|
||||
const vec4 delta_l = vec4(0.5/XBR_SCALE, 1.0/XBR_SCALE, 0.5/XBR_SCALE, 1.0/XBR_SCALE);
|
||||
const vec4 delta_u = delta_l.yxwz;
|
||||
|
||||
const vec4 Ao = vec4( 1.0, -1.0, -1.0, 1.0 );
|
||||
const vec4 Bo = vec4( 1.0, 1.0, -1.0,-1.0 );
|
||||
const vec4 Co = vec4( 1.5, 0.5, -0.5, 0.5 );
|
||||
const vec4 Ax = vec4( 1.0, -1.0, -1.0, 1.0 );
|
||||
const vec4 Bx = vec4( 0.5, 2.0, -0.5,-2.0 );
|
||||
const vec4 Cx = vec4( 1.0, 1.0, -0.5, 0.0 );
|
||||
const vec4 Ay = vec4( 1.0, -1.0, -1.0, 1.0 );
|
||||
const vec4 By = vec4( 2.0, 0.5, -2.0,-0.5 );
|
||||
const vec4 Cy = vec4( 2.0, 0.0, -1.0, 0.5 );
|
||||
const vec4 Ci = vec4(0.25, 0.25, 0.25, 0.25);
|
||||
|
||||
// Difference between vector components.
|
||||
vec4 df(vec4 A, vec4 B)
|
||||
{
|
||||
return vec4(abs(A-B));
|
||||
}
|
||||
|
||||
// Compare two vectors and return their components are different.
|
||||
vec4 diff(vec4 A, vec4 B)
|
||||
{
|
||||
return vec4(notEqual(A, B));
|
||||
}
|
||||
|
||||
// Determine if two vector components are equal based on a threshold.
|
||||
vec4 eq(vec4 A, vec4 B)
|
||||
{
|
||||
return (step(df(A, B), vec4(XBR_EQ_THRESHOLD)));
|
||||
}
|
||||
|
||||
// Determine if two vector components are NOT equal based on a threshold.
|
||||
vec4 neq(vec4 A, vec4 B)
|
||||
{
|
||||
return (vec4(1.0, 1.0, 1.0, 1.0) - eq(A, B));
|
||||
}
|
||||
|
||||
// Weighted distance.
|
||||
vec4 wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h)
|
||||
{
|
||||
return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h));
|
||||
}
|
||||
|
||||
float c_df(vec3 c1, vec3 c2)
|
||||
{
|
||||
vec3 df = abs(c1 - c2);
|
||||
return df.r + df.g + df.b;
|
||||
}
|
||||
|
||||
vec4 XBR()
|
||||
{
|
||||
vec4 proxy_dest = vec4(0, 0, 0, 1);
|
||||
ivec2 tex_fetch_coords = ivec2(gl_FragCoord.xy / 2.0);
|
||||
ivec2 tex_coords = ivec2(gl_FragCoord.xy);
|
||||
|
||||
|
||||
|
||||
vec4 edri, edr, edr_l, edr_u, px; // px = pixel, edr = edge detection rule
|
||||
vec4 irlv0, irlv1, irlv2l, irlv2u, block_3d;
|
||||
vec4 fx, fx_l, fx_u; // inequations of straight lines.
|
||||
|
||||
vec2 fp = fract(gl_FragCoord.xy / 2.0);
|
||||
|
||||
vec3 A1 = SrcGet(tex_fetch_coords + ivec2(-1, -2)).xyz;
|
||||
vec3 B1 = SrcGet(tex_fetch_coords + ivec2( 0, -2)).xyz;
|
||||
vec3 C1 = SrcGet(tex_fetch_coords + ivec2(+1, -2)).xyz;
|
||||
vec3 A = SrcGet(tex_fetch_coords + ivec2(-1, -1)).xyz;
|
||||
vec3 B = SrcGet(tex_fetch_coords + ivec2( 0, -1)).xyz;
|
||||
vec3 C = SrcGet(tex_fetch_coords + ivec2(+1, -1)).xyz;
|
||||
vec3 D = SrcGet(tex_fetch_coords + ivec2(-1, 0)).xyz;
|
||||
vec4 Eo = SrcGet(tex_fetch_coords);
|
||||
vec3 E = Eo.xyz;
|
||||
vec3 F = SrcGet(tex_fetch_coords + ivec2(+1, 0)).xyz;
|
||||
vec3 G = SrcGet(tex_fetch_coords + ivec2(-1, +1)).xyz;
|
||||
vec3 H = SrcGet(tex_fetch_coords + ivec2( 0, +1)).xyz;
|
||||
vec3 I = SrcGet(tex_fetch_coords + ivec2(+1, +1)).xyz;
|
||||
vec3 G5 = SrcGet(tex_fetch_coords + ivec2(-1, +2)).xyz;
|
||||
vec3 H5 = SrcGet(tex_fetch_coords + ivec2( 0, +2) ).xyz;
|
||||
vec3 I5 = SrcGet(tex_fetch_coords + ivec2(+1, +2)).xyz;
|
||||
vec3 A0 = SrcGet(tex_fetch_coords + ivec2(-2, -1)).xyz;
|
||||
vec3 D0 = SrcGet(tex_fetch_coords + ivec2(-2, 0)).xyz;
|
||||
vec3 G0 = SrcGet(tex_fetch_coords + ivec2(-2, +1)).xyz;
|
||||
vec3 C4 = SrcGet(tex_fetch_coords + ivec2(+2, -1)).xyz;
|
||||
vec3 F4 = SrcGet(tex_fetch_coords + ivec2(+2, 0)).xyz;
|
||||
vec3 I4 = SrcGet(tex_fetch_coords + ivec2(+2, +1)).xyz;
|
||||
|
||||
vec4 b = vec4(dot(B ,rgbw), dot(D ,rgbw), dot(H ,rgbw), dot(F ,rgbw));
|
||||
vec4 c = vec4(dot(C ,rgbw), dot(A ,rgbw), dot(G ,rgbw), dot(I ,rgbw));
|
||||
vec4 d = b.yzwx;
|
||||
vec4 e = vec4(dot(E,rgbw));
|
||||
vec4 f = b.wxyz;
|
||||
vec4 g = c.zwxy;
|
||||
vec4 h = b.zwxy;
|
||||
vec4 i = c.wxyz;
|
||||
vec4 i4 = vec4(dot(I4,rgbw), dot(C1,rgbw), dot(A0,rgbw), dot(G5,rgbw));
|
||||
vec4 i5 = vec4(dot(I5,rgbw), dot(C4,rgbw), dot(A1,rgbw), dot(G0,rgbw));
|
||||
vec4 h5 = vec4(dot(H5,rgbw), dot(F4,rgbw), dot(B1,rgbw), dot(D0,rgbw));
|
||||
vec4 f4 = h5.yzwx;
|
||||
|
||||
// These inequations define the line below which interpolation occurs.
|
||||
fx = (Ao*fp.y+Bo*fp.x);
|
||||
fx_l = (Ax*fp.y+Bx*fp.x);
|
||||
fx_u = (Ay*fp.y+By*fp.x);
|
||||
|
||||
irlv1 = irlv0 = diff(e,f) * diff(e,h);
|
||||
|
||||
#ifdef CORNER_B
|
||||
|
||||
// E1/K case (X odd, Y even)
|
||||
irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) );
|
||||
|
||||
#endif
|
||||
#ifdef CORNER_D
|
||||
|
||||
// E3/M case (X odd, Y odd)
|
||||
vec4 c1 = i4.yzwx;
|
||||
vec4 g0 = i5.wxyz;
|
||||
irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0)));
|
||||
|
||||
#endif
|
||||
#ifdef CORNER_C
|
||||
|
||||
irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) );
|
||||
|
||||
#endif
|
||||
|
||||
irlv2l = diff(e,g) * diff(d,g);
|
||||
irlv2u = diff(e,c) * diff(b,c);
|
||||
|
||||
vec4 fx45i = clamp((fx + delta -Co - Ci)/(2.0*delta ), 0.0, 1.0);
|
||||
vec4 fx45 = clamp((fx + delta -Co )/(2.0*delta ), 0.0, 1.0);
|
||||
vec4 fx30 = clamp((fx_l + delta_l -Cx )/(2.0*delta_l), 0.0, 1.0);
|
||||
vec4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0);
|
||||
|
||||
vec4 wd1 = wd( e, c, g, i, h5, f4, h, f);
|
||||
vec4 wd2 = wd( h, d, i5, f, i4, b, e, i);
|
||||
|
||||
edri = step(wd1, wd2) * irlv0;
|
||||
edr = step(wd1 + vec4(0.1, 0.1, 0.1, 0.1), wd2) * step(vec4(0.5, 0.5, 0.5, 0.5), irlv1);
|
||||
edr_l = step( lv2_cf*df(f,g), df(h,c) ) * irlv2l * edr;
|
||||
edr_u = step( lv2_cf*df(h,c), df(f,g) ) * irlv2u * edr;
|
||||
|
||||
fx45 = edr * fx45;
|
||||
fx30 = edr_l * fx30;
|
||||
fx60 = edr_u * fx60;
|
||||
fx45i = edri * fx45i;
|
||||
|
||||
px = step(df(e,f), df(e,h));
|
||||
|
||||
#ifdef SMOOTH_TIPS
|
||||
//vec4 maximos = max(max(fx30, fx60), max(fx45, fx45i));
|
||||
#endif
|
||||
#ifndef SMOOTH_TIPS
|
||||
vec4 maximos = max(max(fx30, fx60), fx45);
|
||||
#endif
|
||||
|
||||
vec3 res1 = E;
|
||||
res1 = mix(res1, mix(H, F, px.x), maximos.x);
|
||||
res1 = mix(res1, mix(B, D, px.z), maximos.z);
|
||||
|
||||
vec3 res2 = E;
|
||||
res2 = mix(res2, mix(F, B, px.y), maximos.y);
|
||||
res2 = mix(res2, mix(D, H, px.w), maximos.w);
|
||||
|
||||
vec3 res = mix(res1, res2, step(c_df(E, res1), c_df(E, res2)));
|
||||
|
||||
proxy_dest.rgb = res;
|
||||
proxy_dest.a = Eo.a;
|
||||
return proxy_dest;
|
||||
}
|
||||
|
||||
void main () {
|
||||
dest = XBR();
|
||||
}
|
||||
14
data/resources/shaders/system/texscale.vert
Normal file
14
data/resources/shaders/system/texscale.vert
Normal file
@@ -0,0 +1,14 @@
|
||||
#version 460 core
|
||||
|
||||
layout(location = 0) out VertexData {
|
||||
vec2 v_tex0;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
v_tex0 = vec2(float((gl_VertexIndex << 1) & 2), float(gl_VertexIndex & 2u));
|
||||
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
|
||||
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
|
||||
gl_Position.y = -gl_Position.y;
|
||||
#endif
|
||||
}
|
||||
@@ -4591,6 +4591,11 @@ void FullscreenUI::DrawGraphicsSettingsPage()
|
||||
"Hacks", "UseOldMDECRoutines", false);
|
||||
|
||||
const bool texture_cache_enabled = GetEffectiveBoolSetting(bsi, "GPU", "EnableTextureCache", false);
|
||||
DrawEnumSetting(bsi, FSUI_ICONSTR(ICON_FA_EXPAND_ALT, "Texture Scaling"),
|
||||
FSUI_CSTR("Applies a texture scaling filter to textures as a pre-processing step."), "GPU",
|
||||
"TextureScaling", GPUTextureScaling::Disabled, &Settings::ParseGPUTextureScalingName,
|
||||
&Settings::GetGPUTextureScalingName, &Settings::GetGPUTextureScalingDisplayName,
|
||||
GPUTextureScaling::MaxCount, texture_cache_enabled);
|
||||
DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_FILE_IMPORT, "Enable Texture Replacements"),
|
||||
FSUI_CSTR("Enables loading of replacement textures. Not compatible with all games."),
|
||||
"TextureReplacements", "EnableTextureReplacements", false, texture_cache_enabled);
|
||||
@@ -7686,6 +7691,7 @@ TRANSLATE_NOOP("FullscreenUI", "Allow Booting Without SBI File");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Allows loading protected games without subchannel information.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "An error occurred while deleting empty game settings:\n{}");
|
||||
TRANSLATE_NOOP("FullscreenUI", "An error occurred while saving game settings:\n{}");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Applies a texture scaling filter to textures as a pre-processing step.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Apply Image Patches");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Are you sure you want to clear the current post-processing chain? All configuration will be lost.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Aspect Ratio");
|
||||
@@ -7969,7 +7975,10 @@ TRANSLATE_NOOP("FullscreenUI", "Log To File");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Log To System Console");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Logging");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Logging Settings");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Logging in to RetroAchievements...");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Login");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Login Error");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Login Failed.\nError: {}\nPlease check your username and password, and try again.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Login token generated on {}");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Logout");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Logs BIOS calls to printf(). Not all games contain debugging messages.");
|
||||
@@ -8026,6 +8035,7 @@ TRANSLATE_NOOP("FullscreenUI", "PGXP (Precision Geometry Transform Pipeline)");
|
||||
TRANSLATE_NOOP("FullscreenUI", "PGXP Depth Buffer");
|
||||
TRANSLATE_NOOP("FullscreenUI", "PGXP Geometry Correction");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Parent Directory");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Password: ");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Patches");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Patches the BIOS to skip the boot animation. Safe to enable.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Path");
|
||||
@@ -8041,6 +8051,7 @@ TRANSLATE_NOOP("FullscreenUI", "Performance enhancement - jumps directly between
|
||||
TRANSLATE_NOOP("FullscreenUI", "Perspective Correct Colors");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Perspective Correct Textures");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Plays sound effects for events such as achievement unlocks and leaderboard submissions.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Please enter your user name and password for retroachievements.org below. Your password will not be saved in DuckStation, an access token will be generated and used instead.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Port {} Controller Type");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Post-Processing Settings");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Post-processing chain cleared.");
|
||||
@@ -8088,6 +8099,7 @@ TRANSLATE_NOOP("FullscreenUI", "Resolution change will be applied after restarti
|
||||
TRANSLATE_NOOP("FullscreenUI", "Restores the state of the system prior to the last state loaded.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Resume Game");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Resume Last Session");
|
||||
TRANSLATE_NOOP("FullscreenUI", "RetroAchievements Login");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Return To Game");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Return to desktop mode, or exit the application.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Return to the previous menu.");
|
||||
@@ -8222,6 +8234,7 @@ TRANSLATE_NOOP("FullscreenUI", "Temporarily disables all enhancements, useful wh
|
||||
TRANSLATE_NOOP("FullscreenUI", "Test Unofficial Achievements");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Texture Filtering");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Texture Replacements");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Texture Scaling");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Textures Directory");
|
||||
TRANSLATE_NOOP("FullscreenUI", "The SDL input source supports most controllers.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "The XInput source provides support for XBox 360/XBox One/XBox Series controllers.");
|
||||
@@ -8259,6 +8272,7 @@ TRANSLATE_NOOP("FullscreenUI", "Use Light Theme");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Use Old MDEC Routines");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Use Single Card For Multi-Disc Games");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Use Software Renderer For Readbacks");
|
||||
TRANSLATE_NOOP("FullscreenUI", "User Name: ");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Username: {}");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Uses PGXP for all instructions, not just memory operations.");
|
||||
TRANSLATE_NOOP("FullscreenUI", "Uses a blit presentation model instead of flipping. This may be needed on some systems.");
|
||||
|
||||
@@ -240,7 +240,8 @@ static bool ShouldTrackVRAMWrites();
|
||||
static bool IsDumpingVRAMWriteTextures();
|
||||
static void UpdateVRAMTrackingState();
|
||||
|
||||
static bool CompilePipelines();
|
||||
static bool CompileReplacementPipelines();
|
||||
static bool CompileTextureScalingPipeline();
|
||||
static void DestroyPipelines();
|
||||
|
||||
static const Source* ReturnSource(Source* source, const GSVector4i uv_rect, PaletteRecordFlags flags);
|
||||
@@ -284,6 +285,7 @@ static void DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 h
|
||||
static void DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, u32 dest_stride);
|
||||
static void DecodeTexture16(const u16* page, u32 width, u32 height, u32* dest, u32 dest_stride);
|
||||
static void DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture);
|
||||
static std::unique_ptr<GPUTexture> ScaleTexture(std::unique_ptr<GPUTexture> texture);
|
||||
|
||||
static std::optional<TextureReplacementType> GetTextureReplacementTypeFromFileTitle(const std::string_view file_title);
|
||||
static bool HasValidReplacementExtension(const std::string_view path);
|
||||
@@ -501,6 +503,20 @@ ALWAYS_INLINE static float RectDistance(const GSVector4i& lhs, const GSVector4i&
|
||||
}
|
||||
|
||||
namespace {
|
||||
enum TextureScaler
|
||||
{
|
||||
None,
|
||||
XBR,
|
||||
};
|
||||
struct TextureScalerInfo
|
||||
{
|
||||
u32 scale;
|
||||
u32 compute_local_size;
|
||||
const char* vertex_shader_path;
|
||||
const char* fragment_shader_path;
|
||||
|
||||
ALWAYS_INLINE bool IsComputeShader() const { return (compute_local_size > 0); }
|
||||
};
|
||||
struct GPUTextureCacheState
|
||||
{
|
||||
Settings::TextureReplacementSettings::Configuration config;
|
||||
@@ -509,6 +525,7 @@ struct GPUTextureCacheState
|
||||
VRAMWrite* last_vram_write = nullptr;
|
||||
bool track_vram_writes = false;
|
||||
|
||||
const TextureScalerInfo* texture_scaler;
|
||||
HashCache hash_cache;
|
||||
|
||||
/// List of candidates for purging when the hash cache gets too large.
|
||||
@@ -517,6 +534,8 @@ struct GPUTextureCacheState
|
||||
/// List of VRAM writes collected when saving state.
|
||||
std::vector<VRAMWrite*> temp_vram_write_list;
|
||||
|
||||
std::unique_ptr<GPUPipeline> texture_scaler_pipeline;
|
||||
|
||||
std::unique_ptr<GPUTexture> replacement_texture_render_target;
|
||||
std::unique_ptr<GPUPipeline> replacement_draw_pipeline; // copies alpha as-is
|
||||
std::unique_ptr<GPUPipeline> replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent)
|
||||
@@ -540,6 +559,15 @@ struct GPUTextureCacheState
|
||||
|
||||
ALIGN_TO_CACHE_LINE GPUTextureCacheState s_state;
|
||||
|
||||
static constexpr const TextureScalerInfo s_texture_scalers[] = {
|
||||
{2, 0, "shaders/system/texscale.vert", "shaders/system/texscale-hq2x.frag"}, // HQ2x
|
||||
{3, 0, "shaders/system/texscale.vert", "shaders/system/texscale-hq3x.frag"}, // HQ3x
|
||||
{4, 0, "shaders/system/texscale.vert", "shaders/system/texscale-hq4x.frag"}, // HQ4x
|
||||
{2, 8, nullptr, "shaders/system/texscale-mmpx.comp"}, // MMPX
|
||||
{2, 8, "shaders/system/texscale.vert", "shaders/system/texscale-scale2x.comp"}, // Scale2x
|
||||
{2, 0, "shaders/system/texscale.vert", "shaders/system/texscale-xbr.frag"}, // XBR
|
||||
};
|
||||
|
||||
} // namespace GPUTextureCache
|
||||
|
||||
bool GPUTextureCache::ShouldTrackVRAMWrites()
|
||||
@@ -562,11 +590,18 @@ bool GPUTextureCache::IsDumpingVRAMWriteTextures()
|
||||
|
||||
bool GPUTextureCache::Initialize()
|
||||
{
|
||||
s_state.texture_scaler = (g_settings.gpu_texture_scaling == GPUTextureScaling::Disabled) ?
|
||||
nullptr :
|
||||
&s_texture_scalers[static_cast<size_t>(g_settings.gpu_texture_scaling) - 1];
|
||||
|
||||
LoadLocalConfiguration(false, false);
|
||||
UpdateVRAMTrackingState();
|
||||
if (!CompilePipelines())
|
||||
if (!CompileReplacementPipelines())
|
||||
return false;
|
||||
|
||||
if (s_state.texture_scaler && !CompileTextureScalingPipeline()) [[unlikely]]
|
||||
s_state.texture_scaler = nullptr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -582,11 +617,16 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old
|
||||
Invalidate();
|
||||
|
||||
DestroyPipelines();
|
||||
if (!CompilePipelines()) [[unlikely]]
|
||||
if (!CompileReplacementPipelines()) [[unlikely]]
|
||||
Panic("Failed to compile pipelines on TC settings change");
|
||||
}
|
||||
}
|
||||
|
||||
const TextureScalerInfo* old_scaler = s_state.texture_scaler;
|
||||
s_state.texture_scaler = (!use_texture_cache || g_settings.gpu_texture_scaling == GPUTextureScaling::Disabled) ?
|
||||
nullptr :
|
||||
&s_texture_scalers[static_cast<size_t>(g_settings.gpu_texture_scaling) - 1];
|
||||
|
||||
// Reload textures if configuration changes.
|
||||
const bool old_replacement_scale_linear_filter = s_state.config.replacement_scale_linear_filter;
|
||||
if (LoadLocalConfiguration(false, false) ||
|
||||
@@ -599,13 +639,21 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old
|
||||
{
|
||||
if (s_state.config.replacement_scale_linear_filter != old_replacement_scale_linear_filter)
|
||||
{
|
||||
if (!CompilePipelines()) [[unlikely]]
|
||||
if (!CompileReplacementPipelines()) [[unlikely]]
|
||||
Panic("Failed to compile pipelines on TC replacement settings change");
|
||||
}
|
||||
}
|
||||
|
||||
ReloadTextureReplacements(false);
|
||||
}
|
||||
|
||||
if (use_texture_cache && s_state.texture_scaler != old_scaler)
|
||||
{
|
||||
if (!CompileTextureScalingPipeline()) [[unlikely]]
|
||||
s_state.texture_scaler = nullptr;
|
||||
|
||||
Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUTextureCache::DoState(StateWrapper& sw, bool skip)
|
||||
@@ -756,7 +804,7 @@ void GPUTextureCache::Shutdown()
|
||||
s_state.game_id = {};
|
||||
}
|
||||
|
||||
bool GPUTextureCache::CompilePipelines()
|
||||
bool GPUTextureCache::CompileReplacementPipelines()
|
||||
{
|
||||
if (!g_settings.texture_replacements.enable_texture_replacements)
|
||||
return true;
|
||||
@@ -807,6 +855,7 @@ bool GPUTextureCache::CompilePipelines()
|
||||
|
||||
void GPUTextureCache::DestroyPipelines()
|
||||
{
|
||||
s_state.texture_scaler_pipeline.reset();
|
||||
s_state.replacement_draw_pipeline.reset();
|
||||
s_state.replacement_semitransparent_draw_pipeline.reset();
|
||||
}
|
||||
@@ -2057,6 +2106,8 @@ GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key,
|
||||
}
|
||||
|
||||
DecodeTexture(key.page, key.palette, key.mode, entry.texture.get());
|
||||
if (s_state.texture_scaler)
|
||||
entry.texture = ScaleTexture(std::move(entry.texture));
|
||||
|
||||
if (g_settings.texture_replacements.enable_texture_replacements)
|
||||
ApplyTextureReplacements(key, tex_hash, pal_hash, &entry);
|
||||
@@ -3340,4 +3391,197 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash,
|
||||
entry->texture = std::move(replacement_tex);
|
||||
|
||||
g_gpu->RestoreDeviceContext();
|
||||
}
|
||||
}
|
||||
|
||||
bool GPUTextureCache::CompileTextureScalingPipeline()
|
||||
{
|
||||
s_state.texture_scaler_pipeline.reset();
|
||||
|
||||
const TextureScalerInfo* const info = s_state.texture_scaler;
|
||||
if (!info)
|
||||
return true;
|
||||
|
||||
static constexpr auto add_defines = [](std::string& source) {
|
||||
std::string::size_type pos = source.find("#version ");
|
||||
if (pos == std::string::npos)
|
||||
return;
|
||||
|
||||
pos = source.find('\n', pos);
|
||||
if (pos == std::string::npos)
|
||||
return;
|
||||
|
||||
const RenderAPI render_api = g_gpu_device->GetRenderAPI();
|
||||
const bool vulkan = (render_api == RenderAPI::Vulkan);
|
||||
const std::string macros =
|
||||
fmt::format("#define API_D3D11 {}\n"
|
||||
"#define API_D3D12 {}\n"
|
||||
"#define API_OPENGL {}\n"
|
||||
"#define API_OPENGL_ES {}\n"
|
||||
"#define API_VULKAN {}\n"
|
||||
"#define API_METAL {}\n"
|
||||
"#define UNIFORM_BLOCK_LAYOUT layout(push_constant)\n"
|
||||
"#define TEXTURE_LAYOUT(index) layout(set = {}, binding = index)\n"
|
||||
"#define IMAGE_LAYOUT(index, format) layout(set = {}, binding = index, format)\n",
|
||||
BoolToUInt32(render_api == RenderAPI::D3D11), BoolToUInt32(render_api == RenderAPI::D3D12),
|
||||
BoolToUInt32(render_api == RenderAPI::OpenGL), BoolToUInt32(render_api == RenderAPI::OpenGLES),
|
||||
BoolToUInt32(render_api == RenderAPI::Vulkan), BoolToUInt32(render_api == RenderAPI::Metal),
|
||||
vulkan ? 0 : 1, vulkan ? 1 : 2);
|
||||
|
||||
source.insert(pos + 1, macros);
|
||||
};
|
||||
|
||||
Error error;
|
||||
std::optional<std::string> source;
|
||||
if (!info->IsComputeShader())
|
||||
{
|
||||
source = Host::ReadResourceFileToString(info->vertex_shader_path, true, &error);
|
||||
if (!source.has_value())
|
||||
{
|
||||
ERROR_LOG("Failed to read scaling vertex shader '{}': {}", info->vertex_shader_path, error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
|
||||
add_defines(source.value());
|
||||
|
||||
std::unique_ptr<GPUShader> vertex_shader =
|
||||
g_gpu_device->CreateShader(GPUShaderStage::Vertex, GPUShaderLanguage::GLSLVK, source.value(), &error);
|
||||
if (!vertex_shader)
|
||||
{
|
||||
ERROR_LOG("Failed to compile scaling vertex shader '{}': {}", info->vertex_shader_path, error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
|
||||
source = Host::ReadResourceFileToString(info->fragment_shader_path, true, &error);
|
||||
if (!source.has_value())
|
||||
{
|
||||
ERROR_LOG("Failed to read scaling fragment shader '{}': {}", info->fragment_shader_path, error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
|
||||
add_defines(source.value());
|
||||
|
||||
std::unique_ptr<GPUShader> fragment_shader =
|
||||
g_gpu_device->CreateShader(GPUShaderStage::Fragment, GPUShaderLanguage::GLSLVK, source.value(), &error);
|
||||
if (!fragment_shader)
|
||||
{
|
||||
ERROR_LOG("Failed to compile scaling fragment shader '{}': {}", info->fragment_shader_path,
|
||||
error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
|
||||
GPUPipeline::GraphicsConfig config;
|
||||
config.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
|
||||
config.primitive = GPUPipeline::Primitive::Triangles;
|
||||
config.input_layout = {};
|
||||
config.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
|
||||
config.depth = GPUPipeline::DepthState::GetNoTestsState();
|
||||
config.blend = GPUPipeline::BlendState::GetNoBlendingState();
|
||||
config.vertex_shader = vertex_shader.get();
|
||||
config.fragment_shader = fragment_shader.get();
|
||||
config.geometry_shader = nullptr;
|
||||
config.SetTargetFormats(GPUTexture::Format::RGBA8);
|
||||
config.samples = 1;
|
||||
config.per_sample_shading = false;
|
||||
config.render_pass_flags = GPUPipeline::NoRenderPassFlags;
|
||||
|
||||
s_state.texture_scaler_pipeline = g_gpu_device->CreatePipeline(config, &error);
|
||||
if (!s_state.texture_scaler_pipeline)
|
||||
{
|
||||
ERROR_LOG("Failed to compile scaling pipeline {}", error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
source = Host::ReadResourceFileToString(info->fragment_shader_path, true, &error);
|
||||
if (!source.has_value())
|
||||
{
|
||||
ERROR_LOG("Failed to read scaling compute shader '{}': {}", info->fragment_shader_path, error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
|
||||
add_defines(source.value());
|
||||
|
||||
std::unique_ptr<GPUShader> compute_shader =
|
||||
g_gpu_device->CreateShader(GPUShaderStage::Compute, GPUShaderLanguage::GLSLVK, source.value(), &error);
|
||||
if (!compute_shader)
|
||||
{
|
||||
ERROR_LOG("Failed to compile scaling compute shader '{}': {}", info->fragment_shader_path,
|
||||
error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
|
||||
GPUPipeline::ComputeConfig config;
|
||||
config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants;
|
||||
config.compute_shader = compute_shader.get();
|
||||
|
||||
s_state.texture_scaler_pipeline = g_gpu_device->CreatePipeline(config, &error);
|
||||
if (!s_state.texture_scaler_pipeline)
|
||||
{
|
||||
ERROR_LOG("Failed to compile scaling pipeline {}", error.GetDescription());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUTexture> GPUTextureCache::ScaleTexture(std::unique_ptr<GPUTexture> texture)
|
||||
{
|
||||
const TextureScalerInfo* const info = s_state.texture_scaler;
|
||||
|
||||
// TODO: rounds
|
||||
const u32 new_width = texture->GetWidth() * info->scale;
|
||||
const u32 new_height = texture->GetHeight() * info->scale;
|
||||
const GPUTexture::Type rt_type =
|
||||
info->IsComputeShader() ? GPUTexture::Type::RWTexture : GPUTexture::Type::RenderTarget;
|
||||
auto rt = g_gpu_device->FetchAutoRecycleTexture(new_width, new_height, 1, 1, 1, rt_type, texture->GetFormat());
|
||||
if (!rt) [[unlikely]]
|
||||
{
|
||||
WARNING_LOG("Failed to create {}x{} RT for scaling", new_width, new_height);
|
||||
return texture;
|
||||
}
|
||||
|
||||
g_gpu_device->SetPipeline(s_state.texture_scaler_pipeline.get());
|
||||
g_gpu_device->SetRenderTarget(rt.get(), nullptr,
|
||||
info->IsComputeShader() ? GPUPipeline::BindRenderTargetsAsImages :
|
||||
GPUPipeline::NoRenderPassFlags);
|
||||
g_gpu_device->SetTextureSampler(0, texture.get(), g_gpu_device->GetNearestSampler());
|
||||
if (!info->IsComputeShader())
|
||||
{
|
||||
g_gpu_device->InvalidateRenderTarget(rt.get());
|
||||
g_gpu_device->SetViewportAndScissor(rt->GetRect());
|
||||
g_gpu_device->Draw(3, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
struct ComputeUBO
|
||||
{
|
||||
u32 src_size[2];
|
||||
u32 dst_size[2];
|
||||
};
|
||||
|
||||
const ComputeUBO uniforms = {.src_size = {texture->GetWidth(), texture->GetHeight()},
|
||||
.dst_size = {new_width, new_height}};
|
||||
const auto& [dispatch_x, dispatch_y, dispatch_z] = GPUDevice::GetDispatchCount(
|
||||
texture->GetWidth(), texture->GetHeight(), 1, info->compute_local_size, info->compute_local_size, 1);
|
||||
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
|
||||
g_gpu_device->Dispatch(dispatch_x, dispatch_y, dispatch_z);
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUTexture> new_texture =
|
||||
g_gpu_device->CreateTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, rt->GetFormat());
|
||||
if (!new_texture)
|
||||
{
|
||||
WARNING_LOG("Failed to create {}x{} texture for scaling", new_width, new_height);
|
||||
return texture;
|
||||
}
|
||||
|
||||
rt->MakeReadyForSampling();
|
||||
g_gpu_device->CopyTextureRegion(new_texture.get(), 0, 0, 0, 0, rt.get(), 0, 0, 0, 0, new_width, new_height);
|
||||
g_gpu_device->RecycleTexture(std::move(texture));
|
||||
|
||||
g_gpu->RestoreDeviceContext();
|
||||
|
||||
return new_texture;
|
||||
}
|
||||
|
||||
@@ -217,6 +217,10 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro
|
||||
ParseTextureFilterName(
|
||||
si.GetStringValue("GPU", "SpriteTextureFilter", GetTextureFilterName(gpu_texture_filter)).c_str())
|
||||
.value_or(gpu_texture_filter);
|
||||
gpu_texture_scaling =
|
||||
ParseGPUTextureScalingName(
|
||||
si.GetStringValue("GPU", "TextureScaling", GetGPUTextureScalingName(gpu_texture_scaling)).c_str())
|
||||
.value_or(gpu_texture_scaling);
|
||||
gpu_line_detect_mode =
|
||||
ParseLineDetectModeName(
|
||||
si.GetStringValue("GPU", "LineDetectMode", GetLineDetectModeName(DEFAULT_GPU_LINE_DETECT_MODE)).c_str())
|
||||
@@ -542,6 +546,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const
|
||||
si.SetStringValue(
|
||||
"GPU", "SpriteTextureFilter",
|
||||
(gpu_sprite_texture_filter != gpu_texture_filter) ? GetTextureFilterName(gpu_sprite_texture_filter) : "");
|
||||
si.SetStringValue("GPU", "TextureScaling", GetGPUTextureScalingName(gpu_texture_scaling));
|
||||
si.SetStringValue("GPU", "LineDetectMode", GetLineDetectModeName(gpu_line_detect_mode));
|
||||
si.SetStringValue("GPU", "DownsampleMode", GetDownsampleModeName(gpu_downsample_mode));
|
||||
si.SetUIntValue("GPU", "DownsampleScale", gpu_downsample_scale);
|
||||
@@ -992,6 +997,10 @@ void Settings::FixIncompatibleSettings(bool display_osd_messages)
|
||||
}
|
||||
}
|
||||
|
||||
// scaling depends on TC
|
||||
g_settings.gpu_texture_scaling =
|
||||
g_settings.gpu_texture_cache ? g_settings.gpu_texture_scaling : GPUTextureScaling::Disabled;
|
||||
|
||||
// if challenge mode is enabled, disable things like rewind since they use save states
|
||||
if (Achievements::IsHardcoreModeActive())
|
||||
{
|
||||
@@ -1580,6 +1589,46 @@ const char* Settings::GetGPUDumpCompressionModeDisplayName(GPUDumpCompressionMod
|
||||
"GPUDumpCompressionMode");
|
||||
}
|
||||
|
||||
static constexpr const std::array s_texture_scaling_names = {
|
||||
"Disabled", "HQ2x", "HQ3x", "HQ4x", "MMPX", "Scale2x", "xBR",
|
||||
};
|
||||
static constexpr const std::array s_texture_scaling_display_names = {
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Disabled", "GPUTextureScaling"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "HQ2x", "GPUTextureScaling"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "HQ3x", "GPUTextureScaling"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "HQ4x", "GPUTextureScaling"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "MMPX", "GPUTextureScaling"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "Scale2x", "GPUTextureScaling"),
|
||||
TRANSLATE_DISAMBIG_NOOP("Settings", "xBR", "GPUTextureScaling"),
|
||||
};
|
||||
static_assert(s_texture_scaling_names.size() == static_cast<size_t>(GPUTextureScaling::MaxCount));
|
||||
static_assert(s_texture_scaling_display_names.size() == static_cast<size_t>(GPUTextureScaling::MaxCount));
|
||||
|
||||
std::optional<GPUTextureScaling> Settings::ParseGPUTextureScalingName(const char* str)
|
||||
{
|
||||
int index = 0;
|
||||
for (const char* name : s_texture_scaling_names)
|
||||
{
|
||||
if (StringUtil::Strcasecmp(name, str) == 0)
|
||||
return static_cast<GPUTextureScaling>(index);
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const char* Settings::GetGPUTextureScalingName(GPUTextureScaling scaler)
|
||||
{
|
||||
return s_texture_scaling_names[static_cast<size_t>(scaler)];
|
||||
}
|
||||
|
||||
const char* Settings::GetGPUTextureScalingDisplayName(GPUTextureScaling scaler)
|
||||
{
|
||||
return Host::TranslateToCString("Settings", s_texture_scaling_display_names[static_cast<size_t>(scaler)],
|
||||
"GPUTextureScaling");
|
||||
}
|
||||
|
||||
static constexpr const std::array s_display_deinterlacing_mode_names = {
|
||||
"Disabled", "Weave", "Blend", "Adaptive", "Progressive",
|
||||
};
|
||||
|
||||
@@ -127,6 +127,7 @@ struct Settings
|
||||
ForceVideoTimingMode gpu_force_video_timing = DEFAULT_FORCE_VIDEO_TIMING_MODE;
|
||||
GPUTextureFilter gpu_texture_filter = DEFAULT_GPU_TEXTURE_FILTER;
|
||||
GPUTextureFilter gpu_sprite_texture_filter = DEFAULT_GPU_TEXTURE_FILTER;
|
||||
GPUTextureScaling gpu_texture_scaling = GPUTextureScaling::Disabled;
|
||||
GPULineDetectMode gpu_line_detect_mode = DEFAULT_GPU_LINE_DETECT_MODE;
|
||||
GPUDownsampleMode gpu_downsample_mode = DEFAULT_GPU_DOWNSAMPLE_MODE;
|
||||
u8 gpu_downsample_scale = 1;
|
||||
@@ -419,6 +420,10 @@ struct Settings
|
||||
static const char* GetGPUDumpCompressionModeName(GPUDumpCompressionMode mode);
|
||||
static const char* GetGPUDumpCompressionModeDisplayName(GPUDumpCompressionMode mode);
|
||||
|
||||
static std::optional<GPUTextureScaling> ParseGPUTextureScalingName(const char* str);
|
||||
static const char* GetGPUTextureScalingName(GPUTextureScaling scaler);
|
||||
static const char* GetGPUTextureScalingDisplayName(GPUTextureScaling scaler);
|
||||
|
||||
static std::optional<DisplayDeinterlacingMode> ParseDisplayDeinterlacingMode(const char* str);
|
||||
static const char* GetDisplayDeinterlacingModeName(DisplayDeinterlacingMode mode);
|
||||
static const char* GetDisplayDeinterlacingModeDisplayName(DisplayDeinterlacingMode mode);
|
||||
|
||||
@@ -4360,6 +4360,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
|
||||
g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale ||
|
||||
g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode ||
|
||||
g_settings.gpu_texture_cache != old_settings.gpu_texture_cache ||
|
||||
(g_settings.gpu_texture_cache && g_settings.gpu_texture_scaling != old_settings.gpu_texture_scaling) ||
|
||||
g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode ||
|
||||
g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing ||
|
||||
g_settings.display_crop_mode != old_settings.display_crop_mode ||
|
||||
@@ -4648,7 +4649,7 @@ void System::WarnAboutUnsafeSettings()
|
||||
if (g_settings.gpu_texture_cache)
|
||||
{
|
||||
append(
|
||||
ICON_FA_PAINT_ROLLER,
|
||||
ICON_EMOJI_WARNING,
|
||||
TRANSLATE_SV("System",
|
||||
"Texture cache is enabled. This feature is experimental, some games may not render correctly."));
|
||||
}
|
||||
|
||||
@@ -138,6 +138,18 @@ enum class GPUDumpCompressionMode : u8
|
||||
MaxCount
|
||||
};
|
||||
|
||||
enum class GPUTextureScaling : u8
|
||||
{
|
||||
Disabled,
|
||||
HQ2X,
|
||||
HQ3X,
|
||||
HQ4X,
|
||||
MMPX,
|
||||
Scale2X,
|
||||
XBR,
|
||||
MaxCount
|
||||
};
|
||||
|
||||
enum class DisplayCropMode : u8
|
||||
{
|
||||
None,
|
||||
@@ -298,6 +310,6 @@ enum class ForceVideoTimingMode : u8
|
||||
Disabled,
|
||||
NTSC,
|
||||
PAL,
|
||||
|
||||
|
||||
Count,
|
||||
};
|
||||
|
||||
@@ -245,6 +245,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
|
||||
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureCache, "GPU", "EnableTextureCache", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useOldMDECRoutines, "Hacks", "UseOldMDECRoutines", false);
|
||||
SettingWidgetBinder::BindWidgetToEnumSetting(
|
||||
sif, m_ui.textureScaling, "GPU", "TextureScaling", &Settings::ParseGPUTextureScalingName,
|
||||
&Settings::GetGPUTextureScalingName, &Settings::GetGPUTextureScalingDisplayName, GPUTextureScaling::Disabled,
|
||||
GPUTextureScaling::MaxCount);
|
||||
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureReplacements, "TextureReplacements",
|
||||
"EnableTextureReplacements", false);
|
||||
@@ -582,6 +586,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
|
||||
dialog->registerWidgetHelp(m_ui.useOldMDECRoutines, tr("Use Old MDEC Routines"), tr("Unchecked"),
|
||||
tr("Enables the older, less accurate MDEC decoding routines. May be required for old "
|
||||
"replacement backgrounds to match/load."));
|
||||
dialog->registerWidgetHelp(m_ui.textureScaling, tr("Texture Scaling"), tr("Disabled"),
|
||||
tr("Applies a texture scaling filter to textures as a pre-processing step."));
|
||||
|
||||
dialog->registerWidgetHelp(m_ui.enableTextureReplacements, tr("Enable Texture Replacements"), tr("Unchecked"),
|
||||
tr("Enables loading of replacement textures. Not compatible with all games."));
|
||||
@@ -1146,6 +1152,7 @@ void GraphicsSettingsWidget::onMediaCaptureAudioEnabledChanged()
|
||||
void GraphicsSettingsWidget::onEnableTextureCacheChanged()
|
||||
{
|
||||
const bool tc_enabled = m_dialog->getEffectiveBoolValue("GPU", "EnableTextureCache", false);
|
||||
m_ui.textureScaling->setEnabled(tc_enabled);
|
||||
m_ui.enableTextureReplacements->setEnabled(tc_enabled);
|
||||
m_ui.enableTextureDumping->setEnabled(tc_enabled);
|
||||
onEnableTextureDumpingChanged();
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>584</width>
|
||||
<height>477</height>
|
||||
<height>474</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
@@ -1088,30 +1088,34 @@
|
||||
<property name="title">
|
||||
<string>General Settings</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_9">
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="enableTextureCache">
|
||||
<property name="text">
|
||||
<string>Enable Texture Cache</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<layout class="QFormLayout" name="formLayout_13">
|
||||
<item row="0" column="0" colspan="2">
|
||||
<widget class="QLabel" name="label_4">
|
||||
<layout class="QGridLayout" name="gridLayout_11">
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="enableTextureCache">
|
||||
<property name="text">
|
||||
<string>Enable Texture Cache (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QCheckBox" name="useOldMDECRoutines">
|
||||
<property name="text">
|
||||
<string>Use Old MDEC Routines</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_12">
|
||||
<property name="text">
|
||||
<string>The texture cache is currently experimental, and may cause rendering errors in some games.</string>
|
||||
</property>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
<string>Texture Scaling:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QCheckBox" name="useOldMDECRoutines">
|
||||
<property name="text">
|
||||
<string>Use Old MDEC Routines</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QComboBox" name="textureScaling"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
@@ -1246,7 +1250,7 @@
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>20</width>
|
||||
<height>40</height>
|
||||
<height>0</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
|
||||
@@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
|
||||
m_features.texture_buffers_emulated_with_ssbo = false;
|
||||
m_features.feedback_loops = false;
|
||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
||||
m_features.compute_shaders =
|
||||
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
|
||||
m_features.partial_msaa_resolve = false;
|
||||
m_features.memory_import = false;
|
||||
m_features.explicit_present = false;
|
||||
@@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
|
||||
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
||||
{
|
||||
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
|
||||
const UINT num_constants = req_size / 16u;
|
||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
DebugAssert(res.index_aligned == 0);
|
||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
}
|
||||
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
|
||||
}
|
||||
|
||||
void* D3D11Device::MapUniformBuffer(u32 size)
|
||||
@@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
|
||||
m_uniform_buffer.Unmap(m_context.Get(), req_size);
|
||||
s_stats.buffer_streamed += size;
|
||||
|
||||
BindUniformBuffer(pos, req_size);
|
||||
}
|
||||
|
||||
void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
|
||||
{
|
||||
if (m_uniform_buffer.IsUsingMapNoOverwrite())
|
||||
{
|
||||
const UINT first_constant = pos / 16u;
|
||||
const UINT num_constants = req_size / 16u;
|
||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
const UINT first_constant = offset / 16u;
|
||||
const UINT num_constants = size / 16u;
|
||||
if (m_current_compute_shader)
|
||||
{
|
||||
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
DebugAssert(pos == 0);
|
||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
DebugAssert(offset == 0);
|
||||
if (m_current_compute_shader)
|
||||
{
|
||||
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
||||
|
||||
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
|
||||
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
|
||||
m_num_current_render_targets, uavs.data(), nullptr);
|
||||
if (!m_current_compute_shader)
|
||||
{
|
||||
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
|
||||
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
|
||||
m_num_current_render_targets, uavs.data(), nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
|
||||
{
|
||||
m_current_textures[slot] = T;
|
||||
m_context->PSSetShaderResources(slot, 1, &T);
|
||||
if (m_current_compute_shader)
|
||||
m_context->CSSetShaderResources(slot, 1, &T);
|
||||
}
|
||||
if (m_current_samplers[slot] != S)
|
||||
{
|
||||
m_current_samplers[slot] = S;
|
||||
m_context->PSSetSamplers(slot, 1, &S);
|
||||
if (m_current_compute_shader)
|
||||
m_context->CSSetSamplers(slot, 1, &S);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
|
||||
if (m_current_textures[slot] != B)
|
||||
{
|
||||
m_current_textures[slot] = B;
|
||||
|
||||
// Compute doesn't support texture buffers, yet...
|
||||
m_context->PSSetShaderResources(slot, 1, &B);
|
||||
}
|
||||
}
|
||||
@@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)
|
||||
|
||||
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
|
||||
{
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||
s_stats.num_draws++;
|
||||
m_context->Draw(vertex_count, base_vertex);
|
||||
}
|
||||
|
||||
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
|
||||
{
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
|
||||
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
|
||||
s_stats.num_draws++;
|
||||
m_context->DrawIndexed(index_count, base_index, base_vertex);
|
||||
}
|
||||
@@ -1129,3 +1151,10 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
||||
{
|
||||
Panic("Barriers are not supported");
|
||||
}
|
||||
|
||||
void D3D11Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
|
||||
{
|
||||
DebugAssert(m_current_compute_shader);
|
||||
s_stats.num_draws++;
|
||||
m_context->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z);
|
||||
}
|
||||
|
||||
@@ -75,6 +75,7 @@ public:
|
||||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
@@ -98,6 +99,7 @@ public:
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
@@ -140,6 +142,8 @@ private:
|
||||
|
||||
bool CreateBuffers();
|
||||
void DestroyBuffers();
|
||||
void BindUniformBuffer(u32 offset, u32 size);
|
||||
void UnbindComputePipeline();
|
||||
|
||||
bool IsRenderTargetBound(const D3D11Texture* tex) const;
|
||||
|
||||
@@ -180,6 +184,7 @@ private:
|
||||
ID3D11VertexShader* m_current_vertex_shader = nullptr;
|
||||
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
|
||||
ID3D11PixelShader* m_current_pixel_shader = nullptr;
|
||||
ID3D11ComputeShader* m_current_compute_shader = nullptr;
|
||||
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
|
||||
ID3D11DepthStencilState* m_current_depth_state = nullptr;
|
||||
ID3D11BlendState* m_current_blend_state = nullptr;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "d3d11_pipeline.h"
|
||||
#include "d3d11_device.h"
|
||||
#include "d3d11_texture.h"
|
||||
#include "d3d_common.h"
|
||||
|
||||
#include "common/assert.h"
|
||||
@@ -121,10 +122,10 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st
|
||||
|
||||
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
|
||||
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
|
||||
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
|
||||
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11DeviceChild> ps_or_cs,
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
|
||||
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
|
||||
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
|
||||
m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride),
|
||||
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
|
||||
{
|
||||
}
|
||||
@@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c
|
||||
return h;
|
||||
}
|
||||
|
||||
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error)
|
||||
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts,
|
||||
Error* error)
|
||||
{
|
||||
ComPtr<ID3D11BlendState> dbs;
|
||||
|
||||
@@ -365,69 +367,124 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
|
||||
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
if (!config.compute_shader) [[unlikely]]
|
||||
{
|
||||
Error::SetStringView(error, "Missing compute shader.");
|
||||
return {};
|
||||
}
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(
|
||||
new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
|
||||
static_cast<const D3D11Shader*>(config.compute_shader)->GetComputeShader(),
|
||||
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
|
||||
}
|
||||
|
||||
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
|
||||
{
|
||||
if (m_current_pipeline == pipeline)
|
||||
return;
|
||||
|
||||
const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline();
|
||||
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
|
||||
m_current_pipeline = PL;
|
||||
|
||||
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
|
||||
if (!PL->IsComputePipeline())
|
||||
{
|
||||
m_current_input_layout = il;
|
||||
m_context->IASetInputLayout(il);
|
||||
}
|
||||
if (was_compute)
|
||||
UnbindComputePipeline();
|
||||
|
||||
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
|
||||
{
|
||||
const UINT offset = 0;
|
||||
m_current_vertex_stride = PL->GetVertexStride();
|
||||
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
|
||||
}
|
||||
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
|
||||
{
|
||||
m_current_input_layout = il;
|
||||
m_context->IASetInputLayout(il);
|
||||
}
|
||||
|
||||
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
|
||||
{
|
||||
m_current_primitive_topology = topology;
|
||||
m_context->IASetPrimitiveTopology(topology);
|
||||
}
|
||||
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
|
||||
{
|
||||
const UINT offset = 0;
|
||||
m_current_vertex_stride = PL->GetVertexStride();
|
||||
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
|
||||
}
|
||||
|
||||
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
|
||||
{
|
||||
m_current_vertex_shader = vs;
|
||||
m_context->VSSetShader(vs, nullptr, 0);
|
||||
}
|
||||
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
|
||||
{
|
||||
m_current_primitive_topology = topology;
|
||||
m_context->IASetPrimitiveTopology(topology);
|
||||
}
|
||||
|
||||
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
|
||||
{
|
||||
m_current_geometry_shader = gs;
|
||||
m_context->GSSetShader(gs, nullptr, 0);
|
||||
}
|
||||
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
|
||||
{
|
||||
m_current_vertex_shader = vs;
|
||||
m_context->VSSetShader(vs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
|
||||
{
|
||||
m_current_pixel_shader = ps;
|
||||
m_context->PSSetShader(ps, nullptr, 0);
|
||||
}
|
||||
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
|
||||
{
|
||||
m_current_geometry_shader = gs;
|
||||
m_context->GSSetShader(gs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
|
||||
{
|
||||
m_current_rasterizer_state = rs;
|
||||
m_context->RSSetState(rs);
|
||||
}
|
||||
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
|
||||
{
|
||||
m_current_pixel_shader = ps;
|
||||
m_context->PSSetShader(ps, nullptr, 0);
|
||||
}
|
||||
|
||||
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
||||
{
|
||||
m_current_depth_state = ds;
|
||||
m_context->OMSetDepthStencilState(ds, 0);
|
||||
}
|
||||
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
|
||||
{
|
||||
m_current_rasterizer_state = rs;
|
||||
m_context->RSSetState(rs);
|
||||
}
|
||||
|
||||
if (ID3D11BlendState* bs = PL->GetBlendState();
|
||||
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
|
||||
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
|
||||
{
|
||||
m_current_depth_state = ds;
|
||||
m_context->OMSetDepthStencilState(ds, 0);
|
||||
}
|
||||
|
||||
if (ID3D11BlendState* bs = PL->GetBlendState();
|
||||
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
|
||||
{
|
||||
m_current_blend_state = bs;
|
||||
m_current_blend_factor = PL->GetBlendFactor();
|
||||
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_current_blend_state = bs;
|
||||
m_current_blend_factor = PL->GetBlendFactor();
|
||||
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
|
||||
if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader)
|
||||
{
|
||||
m_current_compute_shader = cs;
|
||||
m_context->CSSetShader(cs, nullptr, 0);
|
||||
}
|
||||
|
||||
if (!was_compute)
|
||||
{
|
||||
// need to bind all SRVs/samplers
|
||||
u32 count;
|
||||
for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++)
|
||||
{
|
||||
if (!m_current_textures[count])
|
||||
break;
|
||||
}
|
||||
if (count > 0)
|
||||
{
|
||||
m_context->CSSetShaderResources(0, count, m_current_textures.data());
|
||||
m_context->CSSetSamplers(0, count, m_current_samplers.data());
|
||||
}
|
||||
|
||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||
{
|
||||
ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS];
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
|
||||
|
||||
m_context->OMSetRenderTargets(0, nullptr, nullptr);
|
||||
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl)
|
||||
if (m_current_pipeline != pl)
|
||||
return;
|
||||
|
||||
if (pl->IsComputePipeline())
|
||||
UnbindComputePipeline();
|
||||
|
||||
// Let the runtime deal with the dead objects...
|
||||
m_current_pipeline = nullptr;
|
||||
}
|
||||
|
||||
void D3D11Device::UnbindComputePipeline()
|
||||
{
|
||||
m_current_compute_shader = nullptr;
|
||||
|
||||
ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {};
|
||||
ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {};
|
||||
ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {};
|
||||
m_context->CSSetShader(nullptr, nullptr, 0);
|
||||
m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs);
|
||||
m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers);
|
||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr);
|
||||
}
|
||||
|
||||
@@ -51,13 +51,18 @@ public:
|
||||
|
||||
void SetDebugName(std::string_view name) override;
|
||||
|
||||
ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; }
|
||||
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
|
||||
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
|
||||
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
|
||||
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
|
||||
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
|
||||
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
|
||||
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
|
||||
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast<ID3D11PixelShader*>(m_ps_or_cs.Get()); }
|
||||
ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const
|
||||
{
|
||||
return static_cast<ID3D11ComputeShader*>(m_ps_or_cs.Get());
|
||||
}
|
||||
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
|
||||
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
|
||||
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
|
||||
@@ -66,7 +71,8 @@ public:
|
||||
private:
|
||||
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
|
||||
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
|
||||
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
|
||||
ComPtr<ID3D11DeviceChild> ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
|
||||
u32 blend_factor);
|
||||
|
||||
ComPtr<ID3D11RasterizerState> m_rs;
|
||||
ComPtr<ID3D11DepthStencilState> m_ds;
|
||||
@@ -74,7 +80,7 @@ private:
|
||||
ComPtr<ID3D11InputLayout> m_il;
|
||||
ComPtr<ID3D11VertexShader> m_vs;
|
||||
ComPtr<ID3D11GeometryShader> m_gs;
|
||||
ComPtr<ID3D11PixelShader> m_ps;
|
||||
ComPtr<ID3D11DeviceChild> m_ps_or_cs;
|
||||
D3D11_PRIMITIVE_TOPOLOGY m_topology;
|
||||
u32 m_vertex_stride;
|
||||
u32 m_blend_factor;
|
||||
|
||||
@@ -115,6 +115,8 @@ public:
|
||||
ComputePipelineBuilder();
|
||||
~ComputePipelineBuilder() = default;
|
||||
|
||||
ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; }
|
||||
|
||||
void Clear();
|
||||
|
||||
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);
|
||||
|
||||
@@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
|
||||
m_features.texture_buffers_emulated_with_ssbo = false;
|
||||
m_features.feedback_loops = false;
|
||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
|
||||
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
|
||||
m_features.partial_msaa_resolve = true;
|
||||
m_features.memory_import = false;
|
||||
m_features.explicit_present = true;
|
||||
@@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||
1, // SingleTextureBufferAndPushConstants
|
||||
0, // MultiTextureAndUBO
|
||||
2, // MultiTextureAndPushConstants
|
||||
2, // ComputeSingleTextureAndPushConstants
|
||||
};
|
||||
|
||||
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
@@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
|
||||
|
||||
const u32 push_param =
|
||||
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
|
||||
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||
if (!IsUsingComputeRootSignature())
|
||||
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||
else
|
||||
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
|
||||
}
|
||||
|
||||
void* D3D12Device::MapUniformBuffer(u32 size)
|
||||
@@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error)
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
|
||||
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
|
||||
if (!(rs = rsb.Create(error, true)))
|
||||
return false;
|
||||
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass()
|
||||
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
rt->SetUseFenceValue(GetCurrentFenceValue());
|
||||
rt->CommitClear(cmdlist);
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
}
|
||||
}
|
||||
if (m_current_depth_target)
|
||||
@@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck()
|
||||
BeginRenderPass();
|
||||
}
|
||||
|
||||
void D3D12Device::PreDispatchCheck()
|
||||
{
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
// Transition images.
|
||||
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||
|
||||
// All textures should be in shader read only optimal already, but just in case..
|
||||
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
|
||||
for (u32 i = 0; i < num_textures; i++)
|
||||
{
|
||||
if (m_current_textures[i])
|
||||
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
}
|
||||
|
||||
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
|
||||
{
|
||||
// Still need to clear the RTs.
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
{
|
||||
D3D12Texture* const rt = m_current_render_targets[i];
|
||||
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
rt->SetUseFenceValue(GetCurrentFenceValue());
|
||||
rt->CommitClear(cmdlist);
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a new command buffer, bind the pipeline and such.
|
||||
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||
SetInitialPipelineState();
|
||||
|
||||
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
|
||||
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
|
||||
const u32 dirty = std::exchange(m_dirty_flags, 0);
|
||||
if (dirty != 0)
|
||||
{
|
||||
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
|
||||
{
|
||||
UpdateRootSignature();
|
||||
if (!UpdateRootParameters(dirty))
|
||||
{
|
||||
SubmitCommandList(false, "out of descriptors");
|
||||
PreDispatchCheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
|
||||
{
|
||||
if (!UpdateRootParameters(dirty))
|
||||
{
|
||||
SubmitCommandList(false, "out of descriptors");
|
||||
PreDispatchCheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool D3D12Device::IsUsingROVRootSignature() const
|
||||
{
|
||||
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
|
||||
}
|
||||
|
||||
bool D3D12Device::IsUsingComputeRootSignature() const
|
||||
{
|
||||
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
|
||||
}
|
||||
|
||||
void D3D12Device::UpdateRootSignature()
|
||||
{
|
||||
GetCommandList()->SetGraphicsRootSignature(
|
||||
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
|
||||
if (!IsUsingComputeRootSignature())
|
||||
{
|
||||
cmdlist->SetGraphicsRootSignature(
|
||||
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||
}
|
||||
else
|
||||
{
|
||||
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
|
||||
}
|
||||
}
|
||||
|
||||
template<GPUPipeline::Layout layout>
|
||||
@@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
|
||||
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
|
||||
else
|
||||
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
|
||||
}
|
||||
|
||||
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
|
||||
@@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
||||
return false;
|
||||
}
|
||||
|
||||
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
|
||||
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
|
||||
else
|
||||
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
|
||||
}
|
||||
|
||||
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
|
||||
@@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
|
||||
1 :
|
||||
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
|
||||
2);
|
||||
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
|
||||
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
|
||||
else
|
||||
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
|
||||
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
||||
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
||||
|
||||
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
|
||||
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
}
|
||||
@@ -2331,3 +2435,10 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
|
||||
{
|
||||
Panic("Barriers are not supported");
|
||||
}
|
||||
|
||||
void D3D12Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
|
||||
{
|
||||
PreDispatchCheck();
|
||||
s_stats.num_draws++;
|
||||
GetCommandList()->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z);
|
||||
}
|
||||
|
||||
@@ -96,6 +96,7 @@ public:
|
||||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
@@ -119,6 +120,7 @@ public:
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
@@ -275,8 +277,10 @@ private:
|
||||
ID3D12RootSignature* GetCurrentRootSignature() const;
|
||||
void SetInitialPipelineState();
|
||||
void PreDrawCheck();
|
||||
void PreDispatchCheck();
|
||||
|
||||
bool IsUsingROVRootSignature() const;
|
||||
bool IsUsingComputeRootSignature() const;
|
||||
void UpdateRootSignature();
|
||||
template<GPUPipeline::Layout layout>
|
||||
bool UpdateParametersForLayout(u32 dirty);
|
||||
|
||||
@@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
|
||||
return SHA1Digest::DigestToString(digest);
|
||||
}
|
||||
|
||||
std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config)
|
||||
{
|
||||
SHA1Digest hash;
|
||||
hash.Update(&config.layout, sizeof(config.layout));
|
||||
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.compute_shader))
|
||||
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
|
||||
|
||||
u8 digest[SHA1Digest::DIGEST_SIZE];
|
||||
hash.Final(digest);
|
||||
return SHA1Digest::DigestToString(digest);
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
|
||||
{
|
||||
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
|
||||
@@ -274,3 +286,46 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
|
||||
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
|
||||
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
D3D12::ComputePipelineBuilder cpb;
|
||||
cpb.SetRootSignature(m_root_signatures[0][static_cast<u8>(config.layout)].Get());
|
||||
cpb.SetShader(static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeData(),
|
||||
static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeSize());
|
||||
|
||||
ComPtr<ID3D12PipelineState> pipeline;
|
||||
if (m_pipeline_library)
|
||||
{
|
||||
const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config));
|
||||
HRESULT hr =
|
||||
m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
// E_INVALIDARG = not found.
|
||||
if (hr != E_INVALIDARG)
|
||||
ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
|
||||
|
||||
// Need to create it normally.
|
||||
pipeline = cpb.Create(m_device.Get(), error, false);
|
||||
|
||||
// Store if it wasn't an OOM or something else.
|
||||
if (pipeline && hr == E_INVALIDARG)
|
||||
{
|
||||
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
|
||||
if (FAILED(hr))
|
||||
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pipeline = cpb.Create(m_device.Get(), error, false);
|
||||
}
|
||||
|
||||
if (!pipeline)
|
||||
return {};
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(
|
||||
new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ public:
|
||||
void SetDebugName(std::string_view name) override;
|
||||
|
||||
static std::string GetPipelineName(const GraphicsConfig& config);
|
||||
static std::string GetPipelineName(const ComputeConfig& config);
|
||||
|
||||
private:
|
||||
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,
|
||||
|
||||
@@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
|
||||
// Need to know if there's UBOs for mapping.
|
||||
const spvc_reflected_resource *ubos, *textures;
|
||||
size_t ubos_count, textures_count;
|
||||
size_t ubos_count, textures_count, images_count;
|
||||
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
|
||||
&ubos_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
|
||||
&textures, &textures_count)) != SPVC_SUCCESS)
|
||||
&textures, &textures_count)) != SPVC_SUCCESS ||
|
||||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
|
||||
&textures, &images_count)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
@@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
|
||||
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
|
||||
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
|
||||
[[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2;
|
||||
|
||||
switch (target_language)
|
||||
{
|
||||
@@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < images_count; i++)
|
||||
{
|
||||
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = IMAGE_DESCRIPTOR_SET,
|
||||
.binding = i,
|
||||
.cbv = {},
|
||||
.uav = {.register_space = 0, .register_binding = i},
|
||||
.srv = {},
|
||||
.sampler = {}};
|
||||
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
@@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
return {};
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Fragment)
|
||||
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
|
||||
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
|
||||
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
|
||||
.msl_buffer = 0,
|
||||
.msl_texture = 0,
|
||||
.msl_sampler = 0};
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment,
|
||||
.desc_set = 1,
|
||||
const spvc_msl_resource_binding rb = {.stage = execmodel,
|
||||
.desc_set = TEXTURE_DESCRIPTOR_SET,
|
||||
.binding = i,
|
||||
.msl_buffer = i,
|
||||
.msl_texture = i,
|
||||
@@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_features.framebuffer_fetch)
|
||||
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {
|
||||
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
||||
static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
if (stage == GPUShaderStage::Compute)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
{
|
||||
const spvc_msl_resource_binding rb = {
|
||||
.stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
|
||||
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
|
||||
|
||||
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
|
||||
{
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
|
||||
static_cast<int>(sres));
|
||||
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,6 +160,9 @@ public:
|
||||
// Multiple textures, 128 byte UBO via push constants.
|
||||
MultiTextureAndPushConstants,
|
||||
|
||||
// 128 byte UBO via push constants, 1 texture, compute shader.
|
||||
ComputeSingleTextureAndPushConstants,
|
||||
|
||||
MaxCount
|
||||
};
|
||||
|
||||
@@ -416,6 +419,12 @@ public:
|
||||
u32 GetRenderTargetCount() const;
|
||||
};
|
||||
|
||||
struct ComputeConfig
|
||||
{
|
||||
Layout layout;
|
||||
GPUShader* compute_shader;
|
||||
};
|
||||
|
||||
GPUPipeline();
|
||||
virtual ~GPUPipeline();
|
||||
|
||||
@@ -501,9 +510,10 @@ public:
|
||||
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
|
||||
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
|
||||
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
|
||||
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
|
||||
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
|
||||
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
|
||||
FEATURE_MASK_COMPUTE_SHADERS = (1 << 5),
|
||||
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
|
||||
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
|
||||
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
|
||||
};
|
||||
|
||||
enum class DrawBarrier : u32
|
||||
@@ -532,6 +542,7 @@ public:
|
||||
bool texture_buffers_emulated_with_ssbo : 1;
|
||||
bool feedback_loops : 1;
|
||||
bool geometry_shaders : 1;
|
||||
bool compute_shaders : 1;
|
||||
bool partial_msaa_resolve : 1;
|
||||
bool memory_import : 1;
|
||||
bool explicit_present : 1;
|
||||
@@ -625,11 +636,20 @@ public:
|
||||
0, // SingleTextureBufferAndPushConstants
|
||||
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
|
||||
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
|
||||
1, // ComputeSingleTextureAndPushConstants
|
||||
};
|
||||
|
||||
return counts[static_cast<u8>(layout)];
|
||||
}
|
||||
|
||||
/// Returns the number of thread groups to dispatch for a given total count and local size.
|
||||
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
|
||||
u32 local_size_y, u32 local_size_z)
|
||||
{
|
||||
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
|
||||
(count_z + (local_size_z - 1)) / local_size_z);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
|
||||
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
|
||||
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
|
||||
@@ -638,10 +658,6 @@ public:
|
||||
|
||||
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
|
||||
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
|
||||
// ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); }
|
||||
// ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); }
|
||||
// ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
|
||||
// ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; }
|
||||
|
||||
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
|
||||
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
|
||||
@@ -712,6 +728,8 @@ public:
|
||||
Error* error = nullptr, const char* entry_point = "main");
|
||||
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
|
||||
Error* error = nullptr) = 0;
|
||||
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
|
||||
Error* error = nullptr) = 0;
|
||||
|
||||
/// Debug messaging.
|
||||
virtual void PushDebugGroup(const char* name) = 0;
|
||||
@@ -753,6 +771,7 @@ public:
|
||||
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
|
||||
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
|
||||
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
|
||||
virtual void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) = 0;
|
||||
|
||||
/// Returns false if the window was completely occluded.
|
||||
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;
|
||||
|
||||
@@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
ERROR_LOG("Compute shaders are not yet supported.");
|
||||
return {};
|
||||
}
|
||||
|
||||
void OpenGLDevice::PushDebugGroup(const char* name)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
@@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
|
||||
|
||||
m_features.geometry_shaders =
|
||||
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
|
||||
m_features.compute_shaders = false;
|
||||
|
||||
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
|
||||
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
|
||||
@@ -1078,6 +1085,11 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
||||
Panic("Barriers are not supported");
|
||||
}
|
||||
|
||||
void OpenGLDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
|
||||
{
|
||||
Panic("Compute shaders are not supported");
|
||||
}
|
||||
|
||||
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
|
||||
u32* map_base_vertex)
|
||||
{
|
||||
|
||||
@@ -77,6 +77,7 @@ public:
|
||||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
@@ -100,6 +101,7 @@ public:
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
|
||||
|
||||
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
|
||||
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;
|
||||
|
||||
@@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear()
|
||||
m_smap_constants = {};
|
||||
}
|
||||
|
||||
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/,
|
||||
bool clear /*= true*/)
|
||||
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear,
|
||||
Error* error)
|
||||
{
|
||||
VkPipeline pipeline;
|
||||
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
|
||||
SetErrorObject(error, "vkCreateComputePipelines() failed: ", res);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
|
||||
@@ -197,7 +197,7 @@ public:
|
||||
|
||||
void Clear();
|
||||
|
||||
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
|
||||
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error);
|
||||
|
||||
void SetShader(VkShaderModule module, const char* entry_point);
|
||||
|
||||
|
||||
@@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
|
||||
WARNING_LOG("Emulating texture buffers with SSBOs.");
|
||||
|
||||
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
|
||||
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
|
||||
|
||||
m_features.partial_msaa_resolve = true;
|
||||
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
|
||||
@@ -2735,7 +2736,8 @@ void VulkanDevice::PushUniformBuffer(const void* data, u32 data_size)
|
||||
{
|
||||
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
s_stats.buffer_streamed += data_size;
|
||||
vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(), UNIFORM_PUSH_CONSTANTS_STAGES, 0,
|
||||
vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(),
|
||||
IsCurrentPipelineCompute() ? VK_SHADER_STAGE_COMPUTE_BIT : UNIFORM_PUSH_CONSTANTS_STAGES, 0,
|
||||
data_size, data);
|
||||
}
|
||||
|
||||
@@ -2802,7 +2804,8 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
}
|
||||
|
||||
{
|
||||
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
|
||||
@@ -2822,7 +2825,8 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
if (m_optional_extensions.vk_khr_push_descriptor)
|
||||
dslb.SetPushFlag();
|
||||
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
|
||||
@@ -2837,14 +2841,13 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
|
||||
}
|
||||
|
||||
if (m_features.raster_order_views)
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
{
|
||||
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
}
|
||||
if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout");
|
||||
|
||||
for (u32 type = 0; type < 3; type++)
|
||||
{
|
||||
@@ -2860,7 +2863,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
|
||||
@@ -2873,7 +2876,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
@@ -2887,7 +2890,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
@@ -2901,7 +2904,7 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
|
||||
@@ -2915,13 +2918,24 @@ bool VulkanDevice::CreatePipelineLayouts()
|
||||
if (feedback_loop)
|
||||
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
|
||||
else if (rov)
|
||||
plb.AddDescriptorSet(m_rov_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
VkPipelineLayout& pl =
|
||||
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
|
||||
plb.AddDescriptorSet(m_single_texture_ds_layout);
|
||||
plb.AddDescriptorSet(m_image_ds_layout);
|
||||
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
|
||||
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -2942,7 +2956,7 @@ void VulkanDevice::DestroyPipelineLayouts()
|
||||
l = VK_NULL_HANDLE;
|
||||
}
|
||||
};
|
||||
destroy_dsl(m_rov_ds_layout);
|
||||
destroy_dsl(m_image_ds_layout);
|
||||
destroy_dsl(m_feedback_loop_ds_layout);
|
||||
destroy_dsl(m_multi_texture_ds_layout);
|
||||
destroy_dsl(m_single_texture_buffer_ds_layout);
|
||||
@@ -3459,13 +3473,13 @@ void VulkanDevice::SetPipeline(GPUPipeline* pipeline)
|
||||
|
||||
m_current_pipeline = static_cast<VulkanPipeline*>(pipeline);
|
||||
|
||||
vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
|
||||
|
||||
if (m_current_pipeline_layout != m_current_pipeline->GetLayout())
|
||||
{
|
||||
m_current_pipeline_layout = m_current_pipeline->GetLayout();
|
||||
m_dirty_flags |= DIRTY_FLAG_PIPELINE_LAYOUT;
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(m_current_command_buffer, GetCurrentVkPipelineBindPoint(), m_current_pipeline->GetPipeline());
|
||||
}
|
||||
|
||||
void VulkanDevice::UnbindPipeline(VulkanPipeline* pl)
|
||||
@@ -3503,12 +3517,24 @@ VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline
|
||||
PipelineLayoutType::Normal);
|
||||
}
|
||||
|
||||
bool VulkanDevice::IsCurrentPipelineCompute() const
|
||||
{
|
||||
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
|
||||
}
|
||||
|
||||
VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const
|
||||
{
|
||||
return m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
|
||||
return m_pipeline_layouts[IsCurrentPipelineCompute() ?
|
||||
0 :
|
||||
static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
|
||||
[static_cast<size_t>(m_current_pipeline_layout)];
|
||||
}
|
||||
|
||||
VkPipelineBindPoint VulkanDevice::GetCurrentVkPipelineBindPoint() const
|
||||
{
|
||||
return IsCurrentPipelineCompute() ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
}
|
||||
|
||||
void VulkanDevice::SetInitialPipelineState()
|
||||
{
|
||||
DebugAssert(m_current_pipeline);
|
||||
@@ -3520,7 +3546,7 @@ void VulkanDevice::SetInitialPipelineState()
|
||||
vkCmdBindIndexBuffer(cmdbuf, m_index_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
||||
|
||||
m_current_pipeline_layout = m_current_pipeline->GetLayout();
|
||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
|
||||
vkCmdBindPipeline(cmdbuf, GetCurrentVkPipelineBindPoint(), m_current_pipeline->GetPipeline());
|
||||
|
||||
const VkViewport vp = {static_cast<float>(m_current_viewport.left),
|
||||
static_cast<float>(m_current_viewport.top),
|
||||
@@ -3674,12 +3700,56 @@ void VulkanDevice::PreDrawCheck()
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::PreDispatchCheck()
|
||||
{
|
||||
// All textures should be in shader read only optimal already, but just in case..
|
||||
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
|
||||
for (u32 i = 0; i < num_textures; i++)
|
||||
{
|
||||
if (m_current_textures[i])
|
||||
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
|
||||
}
|
||||
|
||||
// Binding as image, but we still need to clear it.
|
||||
for (u32 i = 0; i < m_num_current_render_targets; i++)
|
||||
{
|
||||
VulkanTexture* rt = m_current_render_targets[i];
|
||||
if (rt->GetState() == GPUTexture::State::Cleared)
|
||||
rt->CommitClear(m_current_command_buffer);
|
||||
rt->SetState(GPUTexture::State::Dirty);
|
||||
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
|
||||
rt->SetUseFenceCounter(GetCurrentFenceCounter());
|
||||
}
|
||||
|
||||
// If this is a new command buffer, bind the pipeline and such.
|
||||
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
|
||||
SetInitialPipelineState();
|
||||
|
||||
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
|
||||
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
|
||||
const u32 dirty = m_dirty_flags & update_mask;
|
||||
m_dirty_flags = m_dirty_flags & ~update_mask;
|
||||
|
||||
if (dirty != 0)
|
||||
{
|
||||
if (!UpdateDescriptorSets(dirty))
|
||||
{
|
||||
SubmitCommandBuffer(false, "out of descriptor sets");
|
||||
PreDispatchCheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<GPUPipeline::Layout layout>
|
||||
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||
{
|
||||
[[maybe_unused]] bool new_dynamic_offsets = false;
|
||||
|
||||
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
|
||||
constexpr bool is_compute = (layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
|
||||
constexpr VkPipelineBindPoint vk_bind_point =
|
||||
(is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
|
||||
std::array<VkDescriptorSet, 3> ds;
|
||||
u32 first_ds = 0;
|
||||
u32 num_ds = 0;
|
||||
@@ -3700,7 +3770,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||
}
|
||||
|
||||
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
|
||||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
|
||||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
|
||||
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
|
||||
{
|
||||
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
|
||||
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
|
||||
@@ -3727,7 +3798,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||
}
|
||||
|
||||
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
|
||||
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
|
||||
dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set);
|
||||
if (num_ds == 0)
|
||||
return true;
|
||||
}
|
||||
@@ -3757,7 +3828,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||
{
|
||||
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
|
||||
{
|
||||
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
|
||||
VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout);
|
||||
if (ids == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
@@ -3792,8 +3863,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
|
||||
}
|
||||
|
||||
DebugAssert(num_ds > 0);
|
||||
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
|
||||
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
|
||||
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(),
|
||||
static_cast<u32>(new_dynamic_offsets),
|
||||
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
|
||||
|
||||
return true;
|
||||
@@ -3818,6 +3889,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
|
||||
case GPUPipeline::Layout::MultiTextureAndPushConstants:
|
||||
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
|
||||
|
||||
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
|
||||
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
|
||||
|
||||
default:
|
||||
UnreachableCode();
|
||||
}
|
||||
@@ -3911,3 +3985,10 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
|
||||
DefaultCaseIsUnreachable();
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
|
||||
{
|
||||
PreDispatchCheck();
|
||||
s_stats.num_draws++;
|
||||
vkCmdDispatch(GetCurrentCommandBuffer(), thread_groups_x, thread_groups_y, thread_groups_z);
|
||||
}
|
||||
|
||||
@@ -113,6 +113,7 @@ public:
|
||||
std::string_view source, const char* entry_point,
|
||||
DynamicHeapArray<u8>* out_binary, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
|
||||
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
|
||||
|
||||
void PushDebugGroup(const char* name) override;
|
||||
void PopDebugGroup() override;
|
||||
@@ -136,6 +137,7 @@ public:
|
||||
void Draw(u32 vertex_count, u32 base_vertex) override;
|
||||
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
|
||||
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
|
||||
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
|
||||
|
||||
bool SetGPUTimingEnabled(bool enabled) override;
|
||||
float GetAndResetAccumulatedGPUTime() override;
|
||||
@@ -370,9 +372,12 @@ private:
|
||||
|
||||
/// Applies any changed state.
|
||||
static PipelineLayoutType GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags);
|
||||
bool IsCurrentPipelineCompute() const;
|
||||
VkPipelineLayout GetCurrentVkPipelineLayout() const;
|
||||
VkPipelineBindPoint GetCurrentVkPipelineBindPoint() const;
|
||||
void SetInitialPipelineState();
|
||||
void PreDrawCheck();
|
||||
void PreDispatchCheck();
|
||||
|
||||
template<GPUPipeline::Layout layout>
|
||||
bool UpdateDescriptorSetsForLayout(u32 dirty);
|
||||
@@ -435,7 +440,7 @@ private:
|
||||
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE;
|
||||
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
|
||||
static_cast<size_t>(PipelineLayoutType::MaxCount)>
|
||||
m_pipeline_layouts = {};
|
||||
|
||||
@@ -275,3 +275,16 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
|
||||
return std::unique_ptr<GPUPipeline>(
|
||||
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
|
||||
}
|
||||
|
||||
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
|
||||
{
|
||||
Vulkan::ComputePipelineBuilder cpb;
|
||||
cpb.SetShader(static_cast<const VulkanShader*>(config.compute_shader)->GetModule(), "main");
|
||||
cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast<size_t>(config.layout)]);
|
||||
|
||||
const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error);
|
||||
if (!pipeline)
|
||||
return {};
|
||||
|
||||
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user