Compare commits

2 Commits

Author SHA1 Message Date
Stenzek
de77b764a7 GPU/TextureCache: Add texture scaling feature 2024-11-20 21:47:46 +10:00
Stenzek
c21ea3c85b GPUDevice: Add compute shader support 2024-11-20 21:28:54 +10:00
30 changed files with 1402 additions and 155 deletions

View File

@@ -0,0 +1,124 @@
#version 460 core
// EPX.glc
// Copyright 2020 Morgan McGuire & Mara Gagiu,
// provided under the Open Source MIT license https://opensource.org/licenses/MIT
// Implementation of Eric Johnston and Andrea Mazzoleni's
// EPX aka Scale2X algorithm based on https://www.scale2x.it/algorithm
#define ABGR8 uint
UNIFORM_BLOCK_LAYOUT uniform UBOBlock {
ivec2 src_size;
ivec2 dst_size;
};
TEXTURE_LAYOUT(0) uniform sampler2D samp0;
IMAGE_LAYOUT(0, rgba8) uniform restrict writeonly image2D dst_image;
ABGR8 src(int x, int y) {
return packUnorm4x8(texelFetch(samp0, ivec2(x, y), 0));
}
void dst(int x, int y, ABGR8 value) {
imageStore(dst_image, ivec2(x, y), unpackUnorm4x8(value));
}
uint luma(ABGR8 C) {
uint alpha = (C & 0xFF000000u) >> 24;
return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha);
}
bool all_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) {
return ((B ^ A0) | (B ^ A1)) == 0u;
}
bool all_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) {
return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u;
}
bool all_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) {
return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u;
}
bool any_eq3(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2) {
return B == A0 || B == A1 || B == A2;
}
bool none_eq2(ABGR8 B, ABGR8 A0, ABGR8 A1) {
return (B != A0) && (B != A1);
}
bool none_eq4(ABGR8 B, ABGR8 A0, ABGR8 A1, ABGR8 A2, ABGR8 A3) {
return B != A0 && B != A1 && B != A2 && B != A3;
}
layout(local_size_x = 8, local_size_y = 8) in;
void main () {
// EPX first falls back to Nearest Neighbour
int srcX = int(gl_GlobalInvocationID.x);
int srcY = int(gl_GlobalInvocationID.y);
if (srcX >= src_size.x || srcY >= src_size.y)
return;
ABGR8 A = src(srcX - 1, srcY - 1), B = src(srcX, srcY - 1), C = src(srcX + 1, srcY - 1);
ABGR8 D = src(srcX - 1, srcY + 0), E = src(srcX, srcY + 0), F = src(srcX + 1, srcY + 0);
ABGR8 G = src(srcX - 1, srcY + 1), H = src(srcX, srcY + 1), I = src(srcX + 1, srcY + 1);
ABGR8 J = E, K = E, L = E, M = E;
if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) {
ABGR8 P = src(srcX, srcY - 2), S = src(srcX, srcY + 2);
ABGR8 Q = src(srcX - 2, srcY), R = src(srcX + 2, srcY);
ABGR8 Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H);
// 1:1 slope rules
if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D;
if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B;
if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H;
if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F;
// Intersection rules
if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(srcX + 3, srcY))) K = M = F;
if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(srcX - 3, srcY))) J = L = D;
if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(srcX, srcY + 3))) L = M = H;
if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(srcX, srcY - 3))) J = K = B;
if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B;
if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H;
if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F;
if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D;
// 2:1 slope rules
if (H != B) {
if (H != A && H != E && H != C) {
if (all_eq3(H, G, F, R) && none_eq2(H, D, src(srcX + 2, srcY - 1))) L = M;
if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(srcX - 2, srcY - 1))) M = L;
}
if (B != I && B != G && B != E) {
if (all_eq3(B, A, F, R) && none_eq2(B, D, src(srcX + 2, srcY + 1))) J = K;
if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(srcX - 2, srcY + 1))) K = J;
}
} // H !== B
if (F != D) {
if (D != I && D != E && D != C) {
if (all_eq3(D, A, H, S) && none_eq2(D, B, src(srcX + 1, srcY + 2))) J = L;
if (all_eq3(D, G, B, P) && none_eq2(D, H, src(srcX + 1, srcY - 2))) L = J;
}
if (F != E && F != A && F != G) {
if (all_eq3(F, C, H, S) && none_eq2(F, B, src(srcX - 1, srcY + 2))) K = M;
if (all_eq3(F, I, B, P) && none_eq2(F, H, src(srcX - 1, srcY - 2))) M = K;
}
} // F !== D
} // not constant
// Write four pixels at once
dst(srcX * 2, srcY * 2, J);
dst(srcX * 2 + 1, srcY * 2, K);
dst(srcX * 2, srcY * 2 + 1, L);
dst(srcX * 2 + 1, srcY * 2 + 1, M);
}

View File

@@ -0,0 +1,55 @@
#version 460 core
// EPX.glc
// Copyright 2020 Morgan McGuire & Mara Gagiu,
// provided under the Open Source MIT license https://opensource.org/licenses/MIT
// Implementation of Eric Johnston and Andrea Mazzoleni's
// EPX aka Scale2X algorithm based on https://www.scale2x.it/algorithm
#define ABGR8 uint
UNIFORM_BLOCK_LAYOUT uniform UBOBlock {
ivec2 src_size;
ivec2 dst_size;
};
TEXTURE_LAYOUT(0) uniform sampler2D samp0;
IMAGE_LAYOUT(0, rgba8) uniform restrict writeonly image2D dst_image;
ABGR8 src(int x, int y) {
return packUnorm4x8(texelFetch(samp0, ivec2(x, y), 0));
}
void dst(int x, int y, ABGR8 value) {
imageStore(dst_image, ivec2(x, y), unpackUnorm4x8(value));
}
layout(local_size_x = 8, local_size_y = 8) in;
void main () {
// EPX first falls back to Nearest Neighbour
int srcX = int(gl_GlobalInvocationID.x);
int srcY = int(gl_GlobalInvocationID.y);
if (srcX >= src_size.x || srcY >= src_size.y)
return;
ABGR8 E = src(srcX, srcY);
ABGR8 J = E, K = E, L = E, M = E;
ABGR8 B = src(srcX + 0, srcY - 1);
ABGR8 D = src(srcX - 1, srcY + 0);
ABGR8 F = src(srcX + 1, srcY + 0);
ABGR8 H = src(srcX + 0, srcY + 1);
if (D == B && B != F && D != H) J = D;
if (B == F && D != F && H != F) K = F;
if (H == D && F != D && B != D) L = D;
if (H == F && D != H && B != F) M = F;
// Write four pixels at once
dst(srcX * 2, srcY * 2, J);
dst(srcX * 2 + 1, srcY * 2, K);
dst(srcX * 2, srcY * 2 + 1, L);
dst(srcX * 2 + 1, srcY * 2 + 1, M);
}

View File

@@ -0,0 +1,248 @@
#version 460 core
layout(location = 0) in VertexData {
vec2 v_tex0;
};
layout(location = 0) out vec4 dest;
TEXTURE_LAYOUT(0) uniform sampler2D samp0;
vec4 SrcGet(vec2 uv)
{
return texelFetch(samp0, ivec2(uv), 0);
}
// XBR.pix
// Copyright 2020 Morgan McGuire & Mara Gagiu,
// provided under the Open Source MIT license https://opensource.org/licenses/MIT
#define XBR_Y_WEIGHT 48.0
#define XBR_EQ_THRESHOLD 15.0
#define XBR_LV1_COEFFICIENT 0.5
#define XBR_LV2_COEFFICIENT 2.0
// END PARAMETERS //
// XBR GLSL implementation source:
// https://github.com/libretro/glsl-shaders/blob/master/xbr/shaders/xbr-lv2.glsl
/*
Hyllian's xBR-lv2 Shader
Copyright (C) 2011-2015 Hyllian - sergiogdb@gmail.com
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Incorporates some of the ideas from SABR shader. Thanks to Joshua Street.
*/
// Uncomment just one of the three params below to choose the corner detection
#define CORNER_A
//#define CORNER_B
//#define CORNER_C
//#define CORNER_D
#ifndef CORNER_A
#define SMOOTH_TIPS
#endif
#define XBR_SCALE 2.0
#define lv2_cf XBR_LV2_COEFFICIENT
//=================================================================================
// XBR Helper Functions
//=================================================================================
const float coef = 2.0;
const vec3 rgbw = vec3(14.352, 28.176, 5.472);
const vec4 eq_threshold = vec4(15.0, 15.0, 15.0, 15.0);
const vec4 delta = vec4(1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE, 1.0/XBR_SCALE);
const vec4 delta_l = vec4(0.5/XBR_SCALE, 1.0/XBR_SCALE, 0.5/XBR_SCALE, 1.0/XBR_SCALE);
const vec4 delta_u = delta_l.yxwz;
const vec4 Ao = vec4( 1.0, -1.0, -1.0, 1.0 );
const vec4 Bo = vec4( 1.0, 1.0, -1.0,-1.0 );
const vec4 Co = vec4( 1.5, 0.5, -0.5, 0.5 );
const vec4 Ax = vec4( 1.0, -1.0, -1.0, 1.0 );
const vec4 Bx = vec4( 0.5, 2.0, -0.5,-2.0 );
const vec4 Cx = vec4( 1.0, 1.0, -0.5, 0.0 );
const vec4 Ay = vec4( 1.0, -1.0, -1.0, 1.0 );
const vec4 By = vec4( 2.0, 0.5, -2.0,-0.5 );
const vec4 Cy = vec4( 2.0, 0.0, -1.0, 0.5 );
const vec4 Ci = vec4(0.25, 0.25, 0.25, 0.25);
// Difference between vector components.
vec4 df(vec4 A, vec4 B)
{
return vec4(abs(A-B));
}
// Compare two vectors and return their components are different.
vec4 diff(vec4 A, vec4 B)
{
return vec4(notEqual(A, B));
}
// Determine if two vector components are equal based on a threshold.
vec4 eq(vec4 A, vec4 B)
{
return (step(df(A, B), vec4(XBR_EQ_THRESHOLD)));
}
// Determine if two vector components are NOT equal based on a threshold.
vec4 neq(vec4 A, vec4 B)
{
return (vec4(1.0, 1.0, 1.0, 1.0) - eq(A, B));
}
// Weighted distance.
vec4 wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h)
{
return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h));
}
float c_df(vec3 c1, vec3 c2)
{
vec3 df = abs(c1 - c2);
return df.r + df.g + df.b;
}
vec4 XBR()
{
vec4 proxy_dest = vec4(0, 0, 0, 1);
ivec2 tex_fetch_coords = ivec2(gl_FragCoord.xy / 2.0);
ivec2 tex_coords = ivec2(gl_FragCoord.xy);
vec4 edri, edr, edr_l, edr_u, px; // px = pixel, edr = edge detection rule
vec4 irlv0, irlv1, irlv2l, irlv2u, block_3d;
vec4 fx, fx_l, fx_u; // inequations of straight lines.
vec2 fp = fract(gl_FragCoord.xy / 2.0);
vec3 A1 = SrcGet(tex_fetch_coords + ivec2(-1, -2)).xyz;
vec3 B1 = SrcGet(tex_fetch_coords + ivec2( 0, -2)).xyz;
vec3 C1 = SrcGet(tex_fetch_coords + ivec2(+1, -2)).xyz;
vec3 A = SrcGet(tex_fetch_coords + ivec2(-1, -1)).xyz;
vec3 B = SrcGet(tex_fetch_coords + ivec2( 0, -1)).xyz;
vec3 C = SrcGet(tex_fetch_coords + ivec2(+1, -1)).xyz;
vec3 D = SrcGet(tex_fetch_coords + ivec2(-1, 0)).xyz;
vec4 Eo = SrcGet(tex_fetch_coords);
vec3 E = Eo.xyz;
vec3 F = SrcGet(tex_fetch_coords + ivec2(+1, 0)).xyz;
vec3 G = SrcGet(tex_fetch_coords + ivec2(-1, +1)).xyz;
vec3 H = SrcGet(tex_fetch_coords + ivec2( 0, +1)).xyz;
vec3 I = SrcGet(tex_fetch_coords + ivec2(+1, +1)).xyz;
vec3 G5 = SrcGet(tex_fetch_coords + ivec2(-1, +2)).xyz;
vec3 H5 = SrcGet(tex_fetch_coords + ivec2( 0, +2) ).xyz;
vec3 I5 = SrcGet(tex_fetch_coords + ivec2(+1, +2)).xyz;
vec3 A0 = SrcGet(tex_fetch_coords + ivec2(-2, -1)).xyz;
vec3 D0 = SrcGet(tex_fetch_coords + ivec2(-2, 0)).xyz;
vec3 G0 = SrcGet(tex_fetch_coords + ivec2(-2, +1)).xyz;
vec3 C4 = SrcGet(tex_fetch_coords + ivec2(+2, -1)).xyz;
vec3 F4 = SrcGet(tex_fetch_coords + ivec2(+2, 0)).xyz;
vec3 I4 = SrcGet(tex_fetch_coords + ivec2(+2, +1)).xyz;
vec4 b = vec4(dot(B ,rgbw), dot(D ,rgbw), dot(H ,rgbw), dot(F ,rgbw));
vec4 c = vec4(dot(C ,rgbw), dot(A ,rgbw), dot(G ,rgbw), dot(I ,rgbw));
vec4 d = b.yzwx;
vec4 e = vec4(dot(E,rgbw));
vec4 f = b.wxyz;
vec4 g = c.zwxy;
vec4 h = b.zwxy;
vec4 i = c.wxyz;
vec4 i4 = vec4(dot(I4,rgbw), dot(C1,rgbw), dot(A0,rgbw), dot(G5,rgbw));
vec4 i5 = vec4(dot(I5,rgbw), dot(C4,rgbw), dot(A1,rgbw), dot(G0,rgbw));
vec4 h5 = vec4(dot(H5,rgbw), dot(F4,rgbw), dot(B1,rgbw), dot(D0,rgbw));
vec4 f4 = h5.yzwx;
// These inequations define the line below which interpolation occurs.
fx = (Ao*fp.y+Bo*fp.x);
fx_l = (Ax*fp.y+Bx*fp.x);
fx_u = (Ay*fp.y+By*fp.x);
irlv1 = irlv0 = diff(e,f) * diff(e,h);
#ifdef CORNER_B
// E1/K case (X odd, Y even)
irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) );
#endif
#ifdef CORNER_D
// E3/M case (X odd, Y odd)
vec4 c1 = i4.yzwx;
vec4 g0 = i5.wxyz;
irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0)));
#endif
#ifdef CORNER_C
irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) );
#endif
irlv2l = diff(e,g) * diff(d,g);
irlv2u = diff(e,c) * diff(b,c);
vec4 fx45i = clamp((fx + delta -Co - Ci)/(2.0*delta ), 0.0, 1.0);
vec4 fx45 = clamp((fx + delta -Co )/(2.0*delta ), 0.0, 1.0);
vec4 fx30 = clamp((fx_l + delta_l -Cx )/(2.0*delta_l), 0.0, 1.0);
vec4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0);
vec4 wd1 = wd( e, c, g, i, h5, f4, h, f);
vec4 wd2 = wd( h, d, i5, f, i4, b, e, i);
edri = step(wd1, wd2) * irlv0;
edr = step(wd1 + vec4(0.1, 0.1, 0.1, 0.1), wd2) * step(vec4(0.5, 0.5, 0.5, 0.5), irlv1);
edr_l = step( lv2_cf*df(f,g), df(h,c) ) * irlv2l * edr;
edr_u = step( lv2_cf*df(h,c), df(f,g) ) * irlv2u * edr;
fx45 = edr * fx45;
fx30 = edr_l * fx30;
fx60 = edr_u * fx60;
fx45i = edri * fx45i;
px = step(df(e,f), df(e,h));
#ifdef SMOOTH_TIPS
//vec4 maximos = max(max(fx30, fx60), max(fx45, fx45i));
#endif
#ifndef SMOOTH_TIPS
vec4 maximos = max(max(fx30, fx60), fx45);
#endif
vec3 res1 = E;
res1 = mix(res1, mix(H, F, px.x), maximos.x);
res1 = mix(res1, mix(B, D, px.z), maximos.z);
vec3 res2 = E;
res2 = mix(res2, mix(F, B, px.y), maximos.y);
res2 = mix(res2, mix(D, H, px.w), maximos.w);
vec3 res = mix(res1, res2, step(c_df(E, res1), c_df(E, res2)));
proxy_dest.rgb = res;
proxy_dest.a = Eo.a;
return proxy_dest;
}
void main () {
dest = XBR();
}

View File

@@ -0,0 +1,14 @@
#version 460 core
layout(location = 0) out VertexData {
vec2 v_tex0;
};
void main()
{
v_tex0 = vec2(float((gl_VertexIndex << 1) & 2), float(gl_VertexIndex & 2u));
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
gl_Position.y = -gl_Position.y;
#endif
}

View File

@@ -4591,6 +4591,11 @@ void FullscreenUI::DrawGraphicsSettingsPage()
"Hacks", "UseOldMDECRoutines", false);
const bool texture_cache_enabled = GetEffectiveBoolSetting(bsi, "GPU", "EnableTextureCache", false);
DrawEnumSetting(bsi, FSUI_ICONSTR(ICON_FA_EXPAND_ALT, "Texture Scaling"),
FSUI_CSTR("Applies a texture scaling filter to textures as a pre-processing step."), "GPU",
"TextureScaling", GPUTextureScaling::Disabled, &Settings::ParseGPUTextureScalingName,
&Settings::GetGPUTextureScalingName, &Settings::GetGPUTextureScalingDisplayName,
GPUTextureScaling::MaxCount, texture_cache_enabled);
DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_FILE_IMPORT, "Enable Texture Replacements"),
FSUI_CSTR("Enables loading of replacement textures. Not compatible with all games."),
"TextureReplacements", "EnableTextureReplacements", false, texture_cache_enabled);
@@ -7686,6 +7691,7 @@ TRANSLATE_NOOP("FullscreenUI", "Allow Booting Without SBI File");
TRANSLATE_NOOP("FullscreenUI", "Allows loading protected games without subchannel information.");
TRANSLATE_NOOP("FullscreenUI", "An error occurred while deleting empty game settings:\n{}");
TRANSLATE_NOOP("FullscreenUI", "An error occurred while saving game settings:\n{}");
TRANSLATE_NOOP("FullscreenUI", "Applies a texture scaling filter to textures as a pre-processing step.");
TRANSLATE_NOOP("FullscreenUI", "Apply Image Patches");
TRANSLATE_NOOP("FullscreenUI", "Are you sure you want to clear the current post-processing chain? All configuration will be lost.");
TRANSLATE_NOOP("FullscreenUI", "Aspect Ratio");
@@ -7969,7 +7975,10 @@ TRANSLATE_NOOP("FullscreenUI", "Log To File");
TRANSLATE_NOOP("FullscreenUI", "Log To System Console");
TRANSLATE_NOOP("FullscreenUI", "Logging");
TRANSLATE_NOOP("FullscreenUI", "Logging Settings");
TRANSLATE_NOOP("FullscreenUI", "Logging in to RetroAchievements...");
TRANSLATE_NOOP("FullscreenUI", "Login");
TRANSLATE_NOOP("FullscreenUI", "Login Error");
TRANSLATE_NOOP("FullscreenUI", "Login Failed.\nError: {}\nPlease check your username and password, and try again.");
TRANSLATE_NOOP("FullscreenUI", "Login token generated on {}");
TRANSLATE_NOOP("FullscreenUI", "Logout");
TRANSLATE_NOOP("FullscreenUI", "Logs BIOS calls to printf(). Not all games contain debugging messages.");
@@ -8026,6 +8035,7 @@ TRANSLATE_NOOP("FullscreenUI", "PGXP (Precision Geometry Transform Pipeline)");
TRANSLATE_NOOP("FullscreenUI", "PGXP Depth Buffer");
TRANSLATE_NOOP("FullscreenUI", "PGXP Geometry Correction");
TRANSLATE_NOOP("FullscreenUI", "Parent Directory");
TRANSLATE_NOOP("FullscreenUI", "Password: ");
TRANSLATE_NOOP("FullscreenUI", "Patches");
TRANSLATE_NOOP("FullscreenUI", "Patches the BIOS to skip the boot animation. Safe to enable.");
TRANSLATE_NOOP("FullscreenUI", "Path");
@@ -8041,6 +8051,7 @@ TRANSLATE_NOOP("FullscreenUI", "Performance enhancement - jumps directly between
TRANSLATE_NOOP("FullscreenUI", "Perspective Correct Colors");
TRANSLATE_NOOP("FullscreenUI", "Perspective Correct Textures");
TRANSLATE_NOOP("FullscreenUI", "Plays sound effects for events such as achievement unlocks and leaderboard submissions.");
TRANSLATE_NOOP("FullscreenUI", "Please enter your user name and password for retroachievements.org below. Your password will not be saved in DuckStation, an access token will be generated and used instead.");
TRANSLATE_NOOP("FullscreenUI", "Port {} Controller Type");
TRANSLATE_NOOP("FullscreenUI", "Post-Processing Settings");
TRANSLATE_NOOP("FullscreenUI", "Post-processing chain cleared.");
@@ -8088,6 +8099,7 @@ TRANSLATE_NOOP("FullscreenUI", "Resolution change will be applied after restarti
TRANSLATE_NOOP("FullscreenUI", "Restores the state of the system prior to the last state loaded.");
TRANSLATE_NOOP("FullscreenUI", "Resume Game");
TRANSLATE_NOOP("FullscreenUI", "Resume Last Session");
TRANSLATE_NOOP("FullscreenUI", "RetroAchievements Login");
TRANSLATE_NOOP("FullscreenUI", "Return To Game");
TRANSLATE_NOOP("FullscreenUI", "Return to desktop mode, or exit the application.");
TRANSLATE_NOOP("FullscreenUI", "Return to the previous menu.");
@@ -8222,6 +8234,7 @@ TRANSLATE_NOOP("FullscreenUI", "Temporarily disables all enhancements, useful wh
TRANSLATE_NOOP("FullscreenUI", "Test Unofficial Achievements");
TRANSLATE_NOOP("FullscreenUI", "Texture Filtering");
TRANSLATE_NOOP("FullscreenUI", "Texture Replacements");
TRANSLATE_NOOP("FullscreenUI", "Texture Scaling");
TRANSLATE_NOOP("FullscreenUI", "Textures Directory");
TRANSLATE_NOOP("FullscreenUI", "The SDL input source supports most controllers.");
TRANSLATE_NOOP("FullscreenUI", "The XInput source provides support for XBox 360/XBox One/XBox Series controllers.");
@@ -8259,6 +8272,7 @@ TRANSLATE_NOOP("FullscreenUI", "Use Light Theme");
TRANSLATE_NOOP("FullscreenUI", "Use Old MDEC Routines");
TRANSLATE_NOOP("FullscreenUI", "Use Single Card For Multi-Disc Games");
TRANSLATE_NOOP("FullscreenUI", "Use Software Renderer For Readbacks");
TRANSLATE_NOOP("FullscreenUI", "User Name: ");
TRANSLATE_NOOP("FullscreenUI", "Username: {}");
TRANSLATE_NOOP("FullscreenUI", "Uses PGXP for all instructions, not just memory operations.");
TRANSLATE_NOOP("FullscreenUI", "Uses a blit presentation model instead of flipping. This may be needed on some systems.");

View File

@@ -240,7 +240,8 @@ static bool ShouldTrackVRAMWrites();
static bool IsDumpingVRAMWriteTextures();
static void UpdateVRAMTrackingState();
static bool CompilePipelines();
static bool CompileReplacementPipelines();
static bool CompileTextureScalingPipeline();
static void DestroyPipelines();
static const Source* ReturnSource(Source* source, const GSVector4i uv_rect, PaletteRecordFlags flags);
@@ -284,6 +285,7 @@ static void DecodeTexture4(const u16* page, const u16* palette, u32 width, u32 h
static void DecodeTexture8(const u16* page, const u16* palette, u32 width, u32 height, u32* dest, u32 dest_stride);
static void DecodeTexture16(const u16* page, u32 width, u32 height, u32* dest, u32 dest_stride);
static void DecodeTexture(u8 page, GPUTexturePaletteReg palette, GPUTextureMode mode, GPUTexture* texture);
static std::unique_ptr<GPUTexture> ScaleTexture(std::unique_ptr<GPUTexture> texture);
static std::optional<TextureReplacementType> GetTextureReplacementTypeFromFileTitle(const std::string_view file_title);
static bool HasValidReplacementExtension(const std::string_view path);
@@ -501,6 +503,20 @@ ALWAYS_INLINE static float RectDistance(const GSVector4i& lhs, const GSVector4i&
}
namespace {
enum TextureScaler
{
None,
XBR,
};
struct TextureScalerInfo
{
u32 scale;
u32 compute_local_size;
const char* vertex_shader_path;
const char* fragment_shader_path;
ALWAYS_INLINE bool IsComputeShader() const { return (compute_local_size > 0); }
};
struct GPUTextureCacheState
{
Settings::TextureReplacementSettings::Configuration config;
@@ -509,6 +525,7 @@ struct GPUTextureCacheState
VRAMWrite* last_vram_write = nullptr;
bool track_vram_writes = false;
const TextureScalerInfo* texture_scaler;
HashCache hash_cache;
/// List of candidates for purging when the hash cache gets too large.
@@ -517,6 +534,8 @@ struct GPUTextureCacheState
/// List of VRAM writes collected when saving state.
std::vector<VRAMWrite*> temp_vram_write_list;
std::unique_ptr<GPUPipeline> texture_scaler_pipeline;
std::unique_ptr<GPUTexture> replacement_texture_render_target;
std::unique_ptr<GPUPipeline> replacement_draw_pipeline; // copies alpha as-is
std::unique_ptr<GPUPipeline> replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent)
@@ -540,6 +559,15 @@ struct GPUTextureCacheState
ALIGN_TO_CACHE_LINE GPUTextureCacheState s_state;
static constexpr const TextureScalerInfo s_texture_scalers[] = {
{2, 0, "shaders/system/texscale.vert", "shaders/system/texscale-hq2x.frag"}, // HQ2x
{3, 0, "shaders/system/texscale.vert", "shaders/system/texscale-hq3x.frag"}, // HQ3x
{4, 0, "shaders/system/texscale.vert", "shaders/system/texscale-hq4x.frag"}, // HQ4x
{2, 8, nullptr, "shaders/system/texscale-mmpx.comp"}, // MMPX
{2, 8, "shaders/system/texscale.vert", "shaders/system/texscale-scale2x.comp"}, // Scale2x
{2, 0, "shaders/system/texscale.vert", "shaders/system/texscale-xbr.frag"}, // XBR
};
} // namespace GPUTextureCache
bool GPUTextureCache::ShouldTrackVRAMWrites()
@@ -562,11 +590,18 @@ bool GPUTextureCache::IsDumpingVRAMWriteTextures()
bool GPUTextureCache::Initialize()
{
s_state.texture_scaler = (g_settings.gpu_texture_scaling == GPUTextureScaling::Disabled) ?
nullptr :
&s_texture_scalers[static_cast<size_t>(g_settings.gpu_texture_scaling) - 1];
LoadLocalConfiguration(false, false);
UpdateVRAMTrackingState();
if (!CompilePipelines())
if (!CompileReplacementPipelines())
return false;
if (s_state.texture_scaler && !CompileTextureScalingPipeline()) [[unlikely]]
s_state.texture_scaler = nullptr;
return true;
}
@@ -582,11 +617,16 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old
Invalidate();
DestroyPipelines();
if (!CompilePipelines()) [[unlikely]]
if (!CompileReplacementPipelines()) [[unlikely]]
Panic("Failed to compile pipelines on TC settings change");
}
}
const TextureScalerInfo* old_scaler = s_state.texture_scaler;
s_state.texture_scaler = (!use_texture_cache || g_settings.gpu_texture_scaling == GPUTextureScaling::Disabled) ?
nullptr :
&s_texture_scalers[static_cast<size_t>(g_settings.gpu_texture_scaling) - 1];
// Reload textures if configuration changes.
const bool old_replacement_scale_linear_filter = s_state.config.replacement_scale_linear_filter;
if (LoadLocalConfiguration(false, false) ||
@@ -599,13 +639,21 @@ void GPUTextureCache::UpdateSettings(bool use_texture_cache, const Settings& old
{
if (s_state.config.replacement_scale_linear_filter != old_replacement_scale_linear_filter)
{
if (!CompilePipelines()) [[unlikely]]
if (!CompileReplacementPipelines()) [[unlikely]]
Panic("Failed to compile pipelines on TC replacement settings change");
}
}
ReloadTextureReplacements(false);
}
if (use_texture_cache && s_state.texture_scaler != old_scaler)
{
if (!CompileTextureScalingPipeline()) [[unlikely]]
s_state.texture_scaler = nullptr;
Invalidate();
}
}
bool GPUTextureCache::DoState(StateWrapper& sw, bool skip)
@@ -756,7 +804,7 @@ void GPUTextureCache::Shutdown()
s_state.game_id = {};
}
bool GPUTextureCache::CompilePipelines()
bool GPUTextureCache::CompileReplacementPipelines()
{
if (!g_settings.texture_replacements.enable_texture_replacements)
return true;
@@ -807,6 +855,7 @@ bool GPUTextureCache::CompilePipelines()
void GPUTextureCache::DestroyPipelines()
{
s_state.texture_scaler_pipeline.reset();
s_state.replacement_draw_pipeline.reset();
s_state.replacement_semitransparent_draw_pipeline.reset();
}
@@ -2057,6 +2106,8 @@ GPUTextureCache::HashCacheEntry* GPUTextureCache::LookupHashCache(SourceKey key,
}
DecodeTexture(key.page, key.palette, key.mode, entry.texture.get());
if (s_state.texture_scaler)
entry.texture = ScaleTexture(std::move(entry.texture));
if (g_settings.texture_replacements.enable_texture_replacements)
ApplyTextureReplacements(key, tex_hash, pal_hash, &entry);
@@ -3340,4 +3391,197 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash,
entry->texture = std::move(replacement_tex);
g_gpu->RestoreDeviceContext();
}
}
bool GPUTextureCache::CompileTextureScalingPipeline()
{
s_state.texture_scaler_pipeline.reset();
const TextureScalerInfo* const info = s_state.texture_scaler;
if (!info)
return true;
static constexpr auto add_defines = [](std::string& source) {
std::string::size_type pos = source.find("#version ");
if (pos == std::string::npos)
return;
pos = source.find('\n', pos);
if (pos == std::string::npos)
return;
const RenderAPI render_api = g_gpu_device->GetRenderAPI();
const bool vulkan = (render_api == RenderAPI::Vulkan);
const std::string macros =
fmt::format("#define API_D3D11 {}\n"
"#define API_D3D12 {}\n"
"#define API_OPENGL {}\n"
"#define API_OPENGL_ES {}\n"
"#define API_VULKAN {}\n"
"#define API_METAL {}\n"
"#define UNIFORM_BLOCK_LAYOUT layout(push_constant)\n"
"#define TEXTURE_LAYOUT(index) layout(set = {}, binding = index)\n"
"#define IMAGE_LAYOUT(index, format) layout(set = {}, binding = index, format)\n",
BoolToUInt32(render_api == RenderAPI::D3D11), BoolToUInt32(render_api == RenderAPI::D3D12),
BoolToUInt32(render_api == RenderAPI::OpenGL), BoolToUInt32(render_api == RenderAPI::OpenGLES),
BoolToUInt32(render_api == RenderAPI::Vulkan), BoolToUInt32(render_api == RenderAPI::Metal),
vulkan ? 0 : 1, vulkan ? 1 : 2);
source.insert(pos + 1, macros);
};
Error error;
std::optional<std::string> source;
if (!info->IsComputeShader())
{
source = Host::ReadResourceFileToString(info->vertex_shader_path, true, &error);
if (!source.has_value())
{
ERROR_LOG("Failed to read scaling vertex shader '{}': {}", info->vertex_shader_path, error.GetDescription());
return false;
}
add_defines(source.value());
std::unique_ptr<GPUShader> vertex_shader =
g_gpu_device->CreateShader(GPUShaderStage::Vertex, GPUShaderLanguage::GLSLVK, source.value(), &error);
if (!vertex_shader)
{
ERROR_LOG("Failed to compile scaling vertex shader '{}': {}", info->vertex_shader_path, error.GetDescription());
return false;
}
source = Host::ReadResourceFileToString(info->fragment_shader_path, true, &error);
if (!source.has_value())
{
ERROR_LOG("Failed to read scaling fragment shader '{}': {}", info->fragment_shader_path, error.GetDescription());
return false;
}
add_defines(source.value());
std::unique_ptr<GPUShader> fragment_shader =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, GPUShaderLanguage::GLSLVK, source.value(), &error);
if (!fragment_shader)
{
ERROR_LOG("Failed to compile scaling fragment shader '{}': {}", info->fragment_shader_path,
error.GetDescription());
return false;
}
GPUPipeline::GraphicsConfig config;
config.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
config.primitive = GPUPipeline::Primitive::Triangles;
config.input_layout = {};
config.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
config.depth = GPUPipeline::DepthState::GetNoTestsState();
config.blend = GPUPipeline::BlendState::GetNoBlendingState();
config.vertex_shader = vertex_shader.get();
config.fragment_shader = fragment_shader.get();
config.geometry_shader = nullptr;
config.SetTargetFormats(GPUTexture::Format::RGBA8);
config.samples = 1;
config.per_sample_shading = false;
config.render_pass_flags = GPUPipeline::NoRenderPassFlags;
s_state.texture_scaler_pipeline = g_gpu_device->CreatePipeline(config, &error);
if (!s_state.texture_scaler_pipeline)
{
ERROR_LOG("Failed to compile scaling pipeline {}", error.GetDescription());
return false;
}
}
else
{
source = Host::ReadResourceFileToString(info->fragment_shader_path, true, &error);
if (!source.has_value())
{
ERROR_LOG("Failed to read scaling compute shader '{}': {}", info->fragment_shader_path, error.GetDescription());
return false;
}
add_defines(source.value());
std::unique_ptr<GPUShader> compute_shader =
g_gpu_device->CreateShader(GPUShaderStage::Compute, GPUShaderLanguage::GLSLVK, source.value(), &error);
if (!compute_shader)
{
ERROR_LOG("Failed to compile scaling compute shader '{}': {}", info->fragment_shader_path,
error.GetDescription());
return false;
}
GPUPipeline::ComputeConfig config;
config.layout = GPUPipeline::Layout::ComputeSingleTextureAndPushConstants;
config.compute_shader = compute_shader.get();
s_state.texture_scaler_pipeline = g_gpu_device->CreatePipeline(config, &error);
if (!s_state.texture_scaler_pipeline)
{
ERROR_LOG("Failed to compile scaling pipeline {}", error.GetDescription());
return false;
}
}
return true;
}
std::unique_ptr<GPUTexture> GPUTextureCache::ScaleTexture(std::unique_ptr<GPUTexture> texture)
{
const TextureScalerInfo* const info = s_state.texture_scaler;
// TODO: rounds
const u32 new_width = texture->GetWidth() * info->scale;
const u32 new_height = texture->GetHeight() * info->scale;
const GPUTexture::Type rt_type =
info->IsComputeShader() ? GPUTexture::Type::RWTexture : GPUTexture::Type::RenderTarget;
auto rt = g_gpu_device->FetchAutoRecycleTexture(new_width, new_height, 1, 1, 1, rt_type, texture->GetFormat());
if (!rt) [[unlikely]]
{
WARNING_LOG("Failed to create {}x{} RT for scaling", new_width, new_height);
return texture;
}
g_gpu_device->SetPipeline(s_state.texture_scaler_pipeline.get());
g_gpu_device->SetRenderTarget(rt.get(), nullptr,
info->IsComputeShader() ? GPUPipeline::BindRenderTargetsAsImages :
GPUPipeline::NoRenderPassFlags);
g_gpu_device->SetTextureSampler(0, texture.get(), g_gpu_device->GetNearestSampler());
if (!info->IsComputeShader())
{
g_gpu_device->InvalidateRenderTarget(rt.get());
g_gpu_device->SetViewportAndScissor(rt->GetRect());
g_gpu_device->Draw(3, 0);
}
else
{
struct ComputeUBO
{
u32 src_size[2];
u32 dst_size[2];
};
const ComputeUBO uniforms = {.src_size = {texture->GetWidth(), texture->GetHeight()},
.dst_size = {new_width, new_height}};
const auto& [dispatch_x, dispatch_y, dispatch_z] = GPUDevice::GetDispatchCount(
texture->GetWidth(), texture->GetHeight(), 1, info->compute_local_size, info->compute_local_size, 1);
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Dispatch(dispatch_x, dispatch_y, dispatch_z);
}
std::unique_ptr<GPUTexture> new_texture =
g_gpu_device->CreateTexture(new_width, new_height, 1, 1, 1, GPUTexture::Type::Texture, rt->GetFormat());
if (!new_texture)
{
WARNING_LOG("Failed to create {}x{} texture for scaling", new_width, new_height);
return texture;
}
rt->MakeReadyForSampling();
g_gpu_device->CopyTextureRegion(new_texture.get(), 0, 0, 0, 0, rt.get(), 0, 0, 0, 0, new_width, new_height);
g_gpu_device->RecycleTexture(std::move(texture));
g_gpu->RestoreDeviceContext();
return new_texture;
}

View File

@@ -217,6 +217,10 @@ void Settings::Load(const SettingsInterface& si, const SettingsInterface& contro
ParseTextureFilterName(
si.GetStringValue("GPU", "SpriteTextureFilter", GetTextureFilterName(gpu_texture_filter)).c_str())
.value_or(gpu_texture_filter);
gpu_texture_scaling =
ParseGPUTextureScalingName(
si.GetStringValue("GPU", "TextureScaling", GetGPUTextureScalingName(gpu_texture_scaling)).c_str())
.value_or(gpu_texture_scaling);
gpu_line_detect_mode =
ParseLineDetectModeName(
si.GetStringValue("GPU", "LineDetectMode", GetLineDetectModeName(DEFAULT_GPU_LINE_DETECT_MODE)).c_str())
@@ -542,6 +546,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const
si.SetStringValue(
"GPU", "SpriteTextureFilter",
(gpu_sprite_texture_filter != gpu_texture_filter) ? GetTextureFilterName(gpu_sprite_texture_filter) : "");
si.SetStringValue("GPU", "TextureScaling", GetGPUTextureScalingName(gpu_texture_scaling));
si.SetStringValue("GPU", "LineDetectMode", GetLineDetectModeName(gpu_line_detect_mode));
si.SetStringValue("GPU", "DownsampleMode", GetDownsampleModeName(gpu_downsample_mode));
si.SetUIntValue("GPU", "DownsampleScale", gpu_downsample_scale);
@@ -992,6 +997,10 @@ void Settings::FixIncompatibleSettings(bool display_osd_messages)
}
}
// scaling depends on TC
g_settings.gpu_texture_scaling =
g_settings.gpu_texture_cache ? g_settings.gpu_texture_scaling : GPUTextureScaling::Disabled;
// if challenge mode is enabled, disable things like rewind since they use save states
if (Achievements::IsHardcoreModeActive())
{
@@ -1580,6 +1589,46 @@ const char* Settings::GetGPUDumpCompressionModeDisplayName(GPUDumpCompressionMod
"GPUDumpCompressionMode");
}
static constexpr const std::array s_texture_scaling_names = {
"Disabled", "HQ2x", "HQ3x", "HQ4x", "MMPX", "Scale2x", "xBR",
};
static constexpr const std::array s_texture_scaling_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Disabled", "GPUTextureScaling"),
TRANSLATE_DISAMBIG_NOOP("Settings", "HQ2x", "GPUTextureScaling"),
TRANSLATE_DISAMBIG_NOOP("Settings", "HQ3x", "GPUTextureScaling"),
TRANSLATE_DISAMBIG_NOOP("Settings", "HQ4x", "GPUTextureScaling"),
TRANSLATE_DISAMBIG_NOOP("Settings", "MMPX", "GPUTextureScaling"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Scale2x", "GPUTextureScaling"),
TRANSLATE_DISAMBIG_NOOP("Settings", "xBR", "GPUTextureScaling"),
};
static_assert(s_texture_scaling_names.size() == static_cast<size_t>(GPUTextureScaling::MaxCount));
static_assert(s_texture_scaling_display_names.size() == static_cast<size_t>(GPUTextureScaling::MaxCount));
std::optional<GPUTextureScaling> Settings::ParseGPUTextureScalingName(const char* str)
{
int index = 0;
for (const char* name : s_texture_scaling_names)
{
if (StringUtil::Strcasecmp(name, str) == 0)
return static_cast<GPUTextureScaling>(index);
index++;
}
return std::nullopt;
}
const char* Settings::GetGPUTextureScalingName(GPUTextureScaling scaler)
{
return s_texture_scaling_names[static_cast<size_t>(scaler)];
}
const char* Settings::GetGPUTextureScalingDisplayName(GPUTextureScaling scaler)
{
return Host::TranslateToCString("Settings", s_texture_scaling_display_names[static_cast<size_t>(scaler)],
"GPUTextureScaling");
}
static constexpr const std::array s_display_deinterlacing_mode_names = {
"Disabled", "Weave", "Blend", "Adaptive", "Progressive",
};

View File

@@ -127,6 +127,7 @@ struct Settings
ForceVideoTimingMode gpu_force_video_timing = DEFAULT_FORCE_VIDEO_TIMING_MODE;
GPUTextureFilter gpu_texture_filter = DEFAULT_GPU_TEXTURE_FILTER;
GPUTextureFilter gpu_sprite_texture_filter = DEFAULT_GPU_TEXTURE_FILTER;
GPUTextureScaling gpu_texture_scaling = GPUTextureScaling::Disabled;
GPULineDetectMode gpu_line_detect_mode = DEFAULT_GPU_LINE_DETECT_MODE;
GPUDownsampleMode gpu_downsample_mode = DEFAULT_GPU_DOWNSAMPLE_MODE;
u8 gpu_downsample_scale = 1;
@@ -419,6 +420,10 @@ struct Settings
static const char* GetGPUDumpCompressionModeName(GPUDumpCompressionMode mode);
static const char* GetGPUDumpCompressionModeDisplayName(GPUDumpCompressionMode mode);
static std::optional<GPUTextureScaling> ParseGPUTextureScalingName(const char* str);
static const char* GetGPUTextureScalingName(GPUTextureScaling scaler);
static const char* GetGPUTextureScalingDisplayName(GPUTextureScaling scaler);
static std::optional<DisplayDeinterlacingMode> ParseDisplayDeinterlacingMode(const char* str);
static const char* GetDisplayDeinterlacingModeName(DisplayDeinterlacingMode mode);
static const char* GetDisplayDeinterlacingModeDisplayName(DisplayDeinterlacingMode mode);

View File

@@ -4360,6 +4360,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings)
g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale ||
g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode ||
g_settings.gpu_texture_cache != old_settings.gpu_texture_cache ||
(g_settings.gpu_texture_cache && g_settings.gpu_texture_scaling != old_settings.gpu_texture_scaling) ||
g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode ||
g_settings.display_24bit_chroma_smoothing != old_settings.display_24bit_chroma_smoothing ||
g_settings.display_crop_mode != old_settings.display_crop_mode ||
@@ -4648,7 +4649,7 @@ void System::WarnAboutUnsafeSettings()
if (g_settings.gpu_texture_cache)
{
append(
ICON_FA_PAINT_ROLLER,
ICON_EMOJI_WARNING,
TRANSLATE_SV("System",
"Texture cache is enabled. This feature is experimental, some games may not render correctly."));
}

View File

@@ -138,6 +138,18 @@ enum class GPUDumpCompressionMode : u8
MaxCount
};
enum class GPUTextureScaling : u8
{
Disabled,
HQ2X,
HQ3X,
HQ4X,
MMPX,
Scale2X,
XBR,
MaxCount
};
enum class DisplayCropMode : u8
{
None,
@@ -298,6 +310,6 @@ enum class ForceVideoTimingMode : u8
Disabled,
NTSC,
PAL,
Count,
};

View File

@@ -245,6 +245,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureCache, "GPU", "EnableTextureCache", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.useOldMDECRoutines, "Hacks", "UseOldMDECRoutines", false);
SettingWidgetBinder::BindWidgetToEnumSetting(
sif, m_ui.textureScaling, "GPU", "TextureScaling", &Settings::ParseGPUTextureScalingName,
&Settings::GetGPUTextureScalingName, &Settings::GetGPUTextureScalingDisplayName, GPUTextureScaling::Disabled,
GPUTextureScaling::MaxCount);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.enableTextureReplacements, "TextureReplacements",
"EnableTextureReplacements", false);
@@ -582,6 +586,8 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget*
dialog->registerWidgetHelp(m_ui.useOldMDECRoutines, tr("Use Old MDEC Routines"), tr("Unchecked"),
tr("Enables the older, less accurate MDEC decoding routines. May be required for old "
"replacement backgrounds to match/load."));
dialog->registerWidgetHelp(m_ui.textureScaling, tr("Texture Scaling"), tr("Disabled"),
tr("Applies a texture scaling filter to textures as a pre-processing step."));
dialog->registerWidgetHelp(m_ui.enableTextureReplacements, tr("Enable Texture Replacements"), tr("Unchecked"),
tr("Enables loading of replacement textures. Not compatible with all games."));
@@ -1146,6 +1152,7 @@ void GraphicsSettingsWidget::onMediaCaptureAudioEnabledChanged()
void GraphicsSettingsWidget::onEnableTextureCacheChanged()
{
const bool tc_enabled = m_dialog->getEffectiveBoolValue("GPU", "EnableTextureCache", false);
m_ui.textureScaling->setEnabled(tc_enabled);
m_ui.enableTextureReplacements->setEnabled(tc_enabled);
m_ui.enableTextureDumping->setEnabled(tc_enabled);
onEnableTextureDumpingChanged();

View File

@@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>584</width>
<height>477</height>
<height>474</height>
</rect>
</property>
<property name="windowTitle">
@@ -1088,30 +1088,34 @@
<property name="title">
<string>General Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout_9">
<item row="1" column="0">
<widget class="QCheckBox" name="enableTextureCache">
<property name="text">
<string>Enable Texture Cache</string>
</property>
</widget>
</item>
<layout class="QFormLayout" name="formLayout_13">
<item row="0" column="0" colspan="2">
<widget class="QLabel" name="label_4">
<layout class="QGridLayout" name="gridLayout_11">
<item row="0" column="0">
<widget class="QCheckBox" name="enableTextureCache">
<property name="text">
<string>Enable Texture Cache (Experimental)</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="useOldMDECRoutines">
<property name="text">
<string>Use Old MDEC Routines</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_12">
<property name="text">
<string>The texture cache is currently experimental, and may cause rendering errors in some games.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
<string>Texture Scaling:</string>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QCheckBox" name="useOldMDECRoutines">
<property name="text">
<string>Use Old MDEC Routines</string>
</property>
</widget>
<widget class="QComboBox" name="textureScaling"/>
</item>
</layout>
</widget>
@@ -1246,7 +1250,7 @@
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
<height>0</height>
</size>
</property>
</spacer>

View File

@@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders =
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
m_features.partial_msaa_resolve = false;
m_features.memory_import = false;
m_features.explicit_present = false;
@@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += data_size;
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
DebugAssert(res.index_aligned == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
}
void* D3D11Device::MapUniformBuffer(u32 size)
@@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += size;
BindUniformBuffer(pos, req_size);
}
void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
{
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = pos / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
const UINT first_constant = offset / 16u;
const UINT num_constants = size / 16u;
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
}
else
{
DebugAssert(pos == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
DebugAssert(offset == 0);
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
else
{
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
}
}
@@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
if (!m_current_compute_shader)
{
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
}
else
{
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
}
}
else
{
@@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
{
m_current_textures[slot] = T;
m_context->PSSetShaderResources(slot, 1, &T);
if (m_current_compute_shader)
m_context->CSSetShaderResources(slot, 1, &T);
}
if (m_current_samplers[slot] != S)
{
m_current_samplers[slot] = S;
m_context->PSSetSamplers(slot, 1, &S);
if (m_current_compute_shader)
m_context->CSSetSamplers(slot, 1, &S);
}
}
@@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
if (m_current_textures[slot] != B)
{
m_current_textures[slot] = B;
// Compute doesn't support texture buffers, yet...
m_context->PSSetShaderResources(slot, 1, &B);
}
}
@@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)
void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->Draw(vertex_count, base_vertex);
}
void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->DrawIndexed(index_count, base_index, base_vertex);
}
@@ -1129,3 +1151,10 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}
void D3D11Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
DebugAssert(m_current_compute_shader);
s_stats.num_draws++;
m_context->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z);
}

View File

@@ -75,6 +75,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@@ -98,6 +99,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@@ -140,6 +142,8 @@ private:
bool CreateBuffers();
void DestroyBuffers();
void BindUniformBuffer(u32 offset, u32 size);
void UnbindComputePipeline();
bool IsRenderTargetBound(const D3D11Texture* tex) const;
@@ -180,6 +184,7 @@ private:
ID3D11VertexShader* m_current_vertex_shader = nullptr;
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
ID3D11PixelShader* m_current_pixel_shader = nullptr;
ID3D11ComputeShader* m_current_compute_shader = nullptr;
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
ID3D11DepthStencilState* m_current_depth_state = nullptr;
ID3D11BlendState* m_current_blend_state = nullptr;

View File

@@ -3,6 +3,7 @@
#include "d3d11_pipeline.h"
#include "d3d11_device.h"
#include "d3d11_texture.h"
#include "d3d_common.h"
#include "common/assert.h"
@@ -121,10 +122,10 @@ std::unique_ptr<GPUShader> D3D11Device::CreateShaderFromSource(GPUShaderStage st
D3D11Pipeline::D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds,
ComPtr<ID3D11BlendState> bs, ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs,
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11PixelShader> ps,
ComPtr<ID3D11GeometryShader> gs, ComPtr<ID3D11DeviceChild> ps_or_cs,
D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor)
: m_rs(std::move(rs)), m_ds(std::move(ds)), m_bs(std::move(bs)), m_il(std::move(il)), m_vs(std::move(vs)),
m_gs(std::move(gs)), m_ps(std::move(ps)), m_topology(topology), m_vertex_stride(vertex_stride),
m_gs(std::move(gs)), m_ps_or_cs(std::move(ps_or_cs)), m_topology(topology), m_vertex_stride(vertex_stride),
m_blend_factor(blend_factor), m_blend_factor_float(GPUDevice::RGBA8ToFloat(blend_factor))
{
}
@@ -215,7 +216,8 @@ size_t D3D11Device::BlendStateMapHash::operator()(const BlendStateMapKey& key) c
return h;
}
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts, Error* error)
D3D11Device::ComPtr<ID3D11BlendState> D3D11Device::GetBlendState(const GPUPipeline::BlendState& bs, u32 num_rts,
Error* error)
{
ComPtr<ID3D11BlendState> dbs;
@@ -365,69 +367,124 @@ std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::Grap
primitives[static_cast<u8>(config.primitive)], vertex_stride, config.blend.constant));
}
std::unique_ptr<GPUPipeline> D3D11Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
if (!config.compute_shader) [[unlikely]]
{
Error::SetStringView(error, "Missing compute shader.");
return {};
}
return std::unique_ptr<GPUPipeline>(
new D3D11Pipeline(nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
static_cast<const D3D11Shader*>(config.compute_shader)->GetComputeShader(),
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
}
void D3D11Device::SetPipeline(GPUPipeline* pipeline)
{
if (m_current_pipeline == pipeline)
return;
const bool was_compute = m_current_pipeline && m_current_pipeline->IsComputePipeline();
D3D11Pipeline* const PL = static_cast<D3D11Pipeline*>(pipeline);
m_current_pipeline = PL;
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
if (!PL->IsComputePipeline())
{
m_current_input_layout = il;
m_context->IASetInputLayout(il);
}
if (was_compute)
UnbindComputePipeline();
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
{
const UINT offset = 0;
m_current_vertex_stride = PL->GetVertexStride();
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
}
if (ID3D11InputLayout* il = PL->GetInputLayout(); m_current_input_layout != il)
{
m_current_input_layout = il;
m_context->IASetInputLayout(il);
}
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
{
m_current_primitive_topology = topology;
m_context->IASetPrimitiveTopology(topology);
}
if (const u32 vertex_stride = PL->GetVertexStride(); m_current_vertex_stride != vertex_stride)
{
const UINT offset = 0;
m_current_vertex_stride = PL->GetVertexStride();
m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &m_current_vertex_stride, &offset);
}
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
{
m_current_vertex_shader = vs;
m_context->VSSetShader(vs, nullptr, 0);
}
if (D3D_PRIMITIVE_TOPOLOGY topology = PL->GetPrimitiveTopology(); m_current_primitive_topology != topology)
{
m_current_primitive_topology = topology;
m_context->IASetPrimitiveTopology(topology);
}
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}
if (ID3D11VertexShader* vs = PL->GetVertexShader(); m_current_vertex_shader != vs)
{
m_current_vertex_shader = vs;
m_context->VSSetShader(vs, nullptr, 0);
}
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
m_context->PSSetShader(ps, nullptr, 0);
}
if (ID3D11GeometryShader* gs = PL->GetGeometryShader(); m_current_geometry_shader != gs)
{
m_current_geometry_shader = gs;
m_context->GSSetShader(gs, nullptr, 0);
}
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
{
m_current_rasterizer_state = rs;
m_context->RSSetState(rs);
}
if (ID3D11PixelShader* ps = PL->GetPixelShader(); m_current_pixel_shader != ps)
{
m_current_pixel_shader = ps;
m_context->PSSetShader(ps, nullptr, 0);
}
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
}
if (ID3D11RasterizerState* rs = PL->GetRasterizerState(); m_current_rasterizer_state != rs)
{
m_current_rasterizer_state = rs;
m_context->RSSetState(rs);
}
if (ID3D11BlendState* bs = PL->GetBlendState();
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
if (ID3D11DepthStencilState* ds = PL->GetDepthStencilState(); m_current_depth_state != ds)
{
m_current_depth_state = ds;
m_context->OMSetDepthStencilState(ds, 0);
}
if (ID3D11BlendState* bs = PL->GetBlendState();
m_current_blend_state != bs || m_current_blend_factor != PL->GetBlendFactor())
{
m_current_blend_state = bs;
m_current_blend_factor = PL->GetBlendFactor();
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
}
}
else
{
m_current_blend_state = bs;
m_current_blend_factor = PL->GetBlendFactor();
m_context->OMSetBlendState(bs, RGBA8ToFloat(m_current_blend_factor).data(), 0xFFFFFFFFu);
if (ID3D11ComputeShader* cs = m_current_pipeline->GetComputeShader(); cs != m_current_compute_shader)
{
m_current_compute_shader = cs;
m_context->CSSetShader(cs, nullptr, 0);
}
if (!was_compute)
{
// need to bind all SRVs/samplers
u32 count;
for (count = 0; count < MAX_TEXTURE_SAMPLERS; count++)
{
if (!m_current_textures[count])
break;
}
if (count > 0)
{
m_context->CSSetShaderResources(0, count, m_current_textures.data());
m_context->CSSetSamplers(0, count, m_current_samplers.data());
}
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
ID3D11UnorderedAccessView* uavs[MAX_TEXTURE_SAMPLERS];
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();
m_context->OMSetRenderTargets(0, nullptr, nullptr);
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs, nullptr);
}
}
}
}
@@ -436,6 +493,23 @@ void D3D11Device::UnbindPipeline(D3D11Pipeline* pl)
if (m_current_pipeline != pl)
return;
if (pl->IsComputePipeline())
UnbindComputePipeline();
// Let the runtime deal with the dead objects...
m_current_pipeline = nullptr;
}
void D3D11Device::UnbindComputePipeline()
{
m_current_compute_shader = nullptr;
ID3D11ShaderResourceView* null_srvs[MAX_TEXTURE_SAMPLERS] = {};
ID3D11SamplerState* null_samplers[MAX_TEXTURE_SAMPLERS] = {};
ID3D11UnorderedAccessView* null_uavs[MAX_RENDER_TARGETS] = {};
m_context->CSSetShader(nullptr, nullptr, 0);
m_context->CSSetShaderResources(0, MAX_TEXTURE_SAMPLERS, null_srvs);
m_context->CSSetSamplers(0, MAX_TEXTURE_SAMPLERS, null_samplers);
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, null_uavs, nullptr);
}

View File

@@ -51,13 +51,18 @@ public:
void SetDebugName(std::string_view name) override;
ALWAYS_INLINE bool IsComputePipeline() const { return !m_vs; }
ALWAYS_INLINE ID3D11RasterizerState* GetRasterizerState() const { return m_rs.Get(); }
ALWAYS_INLINE ID3D11DepthStencilState* GetDepthStencilState() const { return m_ds.Get(); }
ALWAYS_INLINE ID3D11BlendState* GetBlendState() const { return m_bs.Get(); }
ALWAYS_INLINE ID3D11InputLayout* GetInputLayout() const { return m_il.Get(); }
ALWAYS_INLINE ID3D11VertexShader* GetVertexShader() const { return m_vs.Get(); }
ALWAYS_INLINE ID3D11GeometryShader* GetGeometryShader() const { return m_gs.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return m_ps.Get(); }
ALWAYS_INLINE ID3D11PixelShader* GetPixelShader() const { return static_cast<ID3D11PixelShader*>(m_ps_or_cs.Get()); }
ALWAYS_INLINE ID3D11ComputeShader* GetComputeShader() const
{
return static_cast<ID3D11ComputeShader*>(m_ps_or_cs.Get());
}
ALWAYS_INLINE D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_topology; }
ALWAYS_INLINE u32 GetVertexStride() const { return m_vertex_stride; }
ALWAYS_INLINE u32 GetBlendFactor() const { return m_blend_factor; }
@@ -66,7 +71,8 @@ public:
private:
D3D11Pipeline(ComPtr<ID3D11RasterizerState> rs, ComPtr<ID3D11DepthStencilState> ds, ComPtr<ID3D11BlendState> bs,
ComPtr<ID3D11InputLayout> il, ComPtr<ID3D11VertexShader> vs, ComPtr<ID3D11GeometryShader> gs,
ComPtr<ID3D11PixelShader> ps, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride, u32 blend_factor);
ComPtr<ID3D11DeviceChild> ps_or_cs, D3D11_PRIMITIVE_TOPOLOGY topology, u32 vertex_stride,
u32 blend_factor);
ComPtr<ID3D11RasterizerState> m_rs;
ComPtr<ID3D11DepthStencilState> m_ds;
@@ -74,7 +80,7 @@ private:
ComPtr<ID3D11InputLayout> m_il;
ComPtr<ID3D11VertexShader> m_vs;
ComPtr<ID3D11GeometryShader> m_gs;
ComPtr<ID3D11PixelShader> m_ps;
ComPtr<ID3D11DeviceChild> m_ps_or_cs;
D3D11_PRIMITIVE_TOPOLOGY m_topology;
u32 m_vertex_stride;
u32 m_blend_factor;

View File

@@ -115,6 +115,8 @@ public:
ComputePipelineBuilder();
~ComputePipelineBuilder() = default;
ALWAYS_INLINE const D3D12_COMPUTE_PIPELINE_STATE_DESC* GetDesc() const { return &m_desc; }
void Clear();
Microsoft::WRL::ComPtr<ID3D12PipelineState> Create(ID3D12Device* device, Error* error, bool clear);

View File

@@ -1298,6 +1298,7 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = false;
m_features.explicit_present = true;
@@ -1552,6 +1553,7 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
1, // SingleTextureBufferAndPushConstants
0, // MultiTextureAndUBO
2, // MultiTextureAndPushConstants
2, // ComputeSingleTextureAndPushConstants
};
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
@@ -1565,7 +1567,11 @@ void D3D12Device::PushUniformBuffer(const void* data, u32 data_size)
const u32 push_param =
push_parameters[static_cast<u8>(m_current_pipeline_layout)] + BoolToUInt8(IsUsingROVRootSignature());
GetCommandList()->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (!IsUsingComputeRootSignature())
cmdlist->SetGraphicsRoot32BitConstants(push_param, data_size / 4u, data, 0);
else
cmdlist->SetComputeRoot32BitConstants(push_param, data_size / 4u, data, 0);
}
void* D3D12Device::MapUniformBuffer(u32 size)
@@ -1687,6 +1693,18 @@ bool D3D12Device::CreateRootSignatures(Error* error)
}
}
{
auto& rs = m_root_signatures[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, MAX_TEXTURE_SAMPLERS, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 0, MAX_IMAGE_RENDER_TARGETS, D3D12_SHADER_VISIBILITY_ALL);
rsb.Add32BitConstants(0, UNIFORM_PUSH_CONSTANTS_SIZE / sizeof(u32), D3D12_SHADER_VISIBILITY_ALL);
if (!(rs = rsb.Create(error, true)))
return false;
D3D12::SetObjectName(rs.Get(), "Compute Single Texture Pipeline Layout");
}
return true;
}
@@ -1810,6 +1828,7 @@ void D3D12Device::BeginRenderPass()
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
rt->SetState(GPUTexture::State::Dirty);
}
}
if (m_current_depth_target)
@@ -2174,15 +2193,88 @@ void D3D12Device::PreDrawCheck()
BeginRenderPass();
}
void D3D12Device::PreDispatchCheck()
{
if (InRenderPass())
EndRenderPass();
// Transition images.
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
if (m_num_current_render_targets > 0 && (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages))
{
// Still need to clear the RTs.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
D3D12Texture* const rt = m_current_render_targets[i];
rt->TransitionToState(cmdlist, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rt->SetUseFenceValue(GetCurrentFenceValue());
rt->CommitClear(cmdlist);
rt->SetState(GPUTexture::State::Dirty);
}
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
SetInitialPipelineState();
// TODO: Flushing cmdbuffer because of descriptor OOM will lose push constants.
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 dirty = std::exchange(m_dirty_flags, 0);
if (dirty != 0)
{
if (dirty & DIRTY_FLAG_PIPELINE_LAYOUT)
{
UpdateRootSignature();
if (!UpdateRootParameters(dirty))
{
SubmitCommandList(false, "out of descriptors");
PreDispatchCheck();
return;
}
}
else if (dirty & (DIRTY_FLAG_CONSTANT_BUFFER | DIRTY_FLAG_TEXTURES | DIRTY_FLAG_SAMPLERS | DIRTY_FLAG_RT_UAVS))
{
if (!UpdateRootParameters(dirty))
{
SubmitCommandList(false, "out of descriptors");
PreDispatchCheck();
return;
}
}
}
}
bool D3D12Device::IsUsingROVRootSignature() const
{
return ((m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages) != 0);
}
bool D3D12Device::IsUsingComputeRootSignature() const
{
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
}
void D3D12Device::UpdateRootSignature()
{
GetCommandList()->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
if (!IsUsingComputeRootSignature())
{
cmdlist->SetGraphicsRootSignature(
m_root_signatures[BoolToUInt8(IsUsingROVRootSignature())][static_cast<u8>(m_current_pipeline_layout)].Get());
}
else
{
cmdlist->SetComputeRootSignature(m_root_signatures[0][static_cast<u8>(m_current_pipeline_layout)].Get());
}
}
template<GPUPipeline::Layout layout>
@@ -2223,7 +2315,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
}
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(0, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(0, gpu_handle);
}
if (dirty & DIRTY_FLAG_SAMPLERS && num_textures > 0)
@@ -2241,7 +2336,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
return false;
}
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(1, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(1, gpu_handle);
}
if (dirty & DIRTY_FLAG_TEXTURES && layout == GPUPipeline::Layout::SingleTextureBufferAndPushConstants)
@@ -2283,7 +2381,10 @@ bool D3D12Device::UpdateParametersForLayout(u32 dirty)
1 :
((layout == GPUPipeline::Layout::SingleTextureAndUBO || layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 3 :
2);
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
if constexpr (layout < GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
cmdlist->SetGraphicsRootDescriptorTable(rov_param, gpu_handle);
else
cmdlist->SetComputeRootDescriptorTable(rov_param, gpu_handle);
}
return true;
@@ -2308,6 +2409,9 @@ bool D3D12Device::UpdateRootParameters(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateParametersForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
default:
UnreachableCode();
}
@@ -2331,3 +2435,10 @@ void D3D12Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}
void D3D12Device::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
PreDispatchCheck();
s_stats.num_draws++;
GetCommandList()->Dispatch(thread_groups_x, thread_groups_y, thread_groups_z);
}

View File

@@ -96,6 +96,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@@ -119,6 +120,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@@ -275,8 +277,10 @@ private:
ID3D12RootSignature* GetCurrentRootSignature() const;
void SetInitialPipelineState();
void PreDrawCheck();
void PreDispatchCheck();
bool IsUsingROVRootSignature() const;
bool IsUsingComputeRootSignature() const;
void UpdateRootSignature();
template<GPUPipeline::Layout layout>
bool UpdateParametersForLayout(u32 dirty);

View File

@@ -107,6 +107,18 @@ std::string D3D12Pipeline::GetPipelineName(const GraphicsConfig& config)
return SHA1Digest::DigestToString(digest);
}
std::string D3D12Pipeline::GetPipelineName(const ComputeConfig& config)
{
SHA1Digest hash;
hash.Update(&config.layout, sizeof(config.layout));
if (const D3D12Shader* shader = static_cast<const D3D12Shader*>(config.compute_shader))
hash.Update(shader->GetBytecodeData(), shader->GetBytecodeSize());
u8 digest[SHA1Digest::DIGEST_SIZE];
hash.Final(digest);
return SHA1Digest::DigestToString(digest);
}
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error)
{
static constexpr std::array<D3D12_PRIMITIVE_TOPOLOGY, static_cast<u32>(GPUPipeline::Primitive::MaxCount)> primitives =
@@ -274,3 +286,46 @@ std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::Grap
pipeline, config.layout, primitives[static_cast<u8>(config.primitive)],
config.input_layout.vertex_attributes.empty() ? 0 : config.input_layout.vertex_stride, config.blend.constant));
}
std::unique_ptr<GPUPipeline> D3D12Device::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
D3D12::ComputePipelineBuilder cpb;
cpb.SetRootSignature(m_root_signatures[0][static_cast<u8>(config.layout)].Get());
cpb.SetShader(static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeData(),
static_cast<const D3D12Shader*>(config.compute_shader)->GetBytecodeSize());
ComPtr<ID3D12PipelineState> pipeline;
if (m_pipeline_library)
{
const std::wstring name = StringUtil::UTF8StringToWideString(D3D12Pipeline::GetPipelineName(config));
HRESULT hr =
m_pipeline_library->LoadComputePipeline(name.c_str(), cpb.GetDesc(), IID_PPV_ARGS(pipeline.GetAddressOf()));
if (FAILED(hr))
{
// E_INVALIDARG = not found.
if (hr != E_INVALIDARG)
ERROR_LOG("LoadComputePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
// Need to create it normally.
pipeline = cpb.Create(m_device.Get(), error, false);
// Store if it wasn't an OOM or something else.
if (pipeline && hr == E_INVALIDARG)
{
hr = m_pipeline_library->StorePipeline(name.c_str(), pipeline.Get());
if (FAILED(hr))
ERROR_LOG("StorePipeline() failed with HRESULT {:08X}", static_cast<unsigned>(hr));
}
}
}
else
{
pipeline = cpb.Create(m_device.Get(), error, false);
}
if (!pipeline)
return {};
return std::unique_ptr<GPUPipeline>(
new D3D12Pipeline(pipeline, config.layout, D3D_PRIMITIVE_TOPOLOGY_UNDEFINED, 0, 0));
}

View File

@@ -51,6 +51,7 @@ public:
void SetDebugName(std::string_view name) override;
static std::string GetPipelineName(const GraphicsConfig& config);
static std::string GetPipelineName(const ComputeConfig& config);
private:
D3D12Pipeline(Microsoft::WRL::ComPtr<ID3D12PipelineState> pipeline, Layout layout, D3D12_PRIMITIVE_TOPOLOGY topology,

View File

@@ -1579,11 +1579,13 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
// Need to know if there's UBOs for mapping.
const spvc_reflected_resource *ubos, *textures;
size_t ubos_count, textures_count;
size_t ubos_count, textures_count, images_count;
if ((sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_UNIFORM_BUFFER, &ubos,
&ubos_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_SAMPLED_IMAGE,
&textures, &textures_count)) != SPVC_SUCCESS)
&textures, &textures_count)) != SPVC_SUCCESS ||
(sres = dyn_libs::spvc_resources_get_resource_list_for_type(resources, SPVC_RESOURCE_TYPE_STORAGE_IMAGE,
&textures, &images_count)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_resources_get_resource_list_for_type() failed: {}", static_cast<int>(sres));
return {};
@@ -1592,6 +1594,7 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
[[maybe_unused]] const SpvExecutionModel execmodel = dyn_libs::spvc_compiler_get_execution_model(scompiler);
[[maybe_unused]] static constexpr u32 UBO_DESCRIPTOR_SET = 0;
[[maybe_unused]] static constexpr u32 TEXTURE_DESCRIPTOR_SET = 1;
[[maybe_unused]] static constexpr u32 IMAGE_DESCRIPTOR_SET = 2;
switch (target_language)
{
@@ -1659,6 +1662,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
}
}
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < images_count; i++)
{
const spvc_hlsl_resource_binding rb = {.stage = execmodel,
.desc_set = IMAGE_DESCRIPTOR_SET,
.binding = i,
.cbv = {},
.uav = {.register_space = 0, .register_binding = i},
.srv = {},
.sampler = {}};
if ((sres = dyn_libs::spvc_compiler_hlsl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_hlsl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
}
}
}
break;
#endif
@@ -1727,12 +1749,25 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
if (stage == GPUShaderStage::Fragment)
const spvc_msl_resource_binding pc_rb = {.stage = execmodel,
.desc_set = SPVC_MSL_PUSH_CONSTANT_DESC_SET,
.binding = SPVC_MSL_PUSH_CONSTANT_BINDING,
.msl_buffer = 0,
.msl_texture = 0,
.msl_sampler = 0};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &pc_rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for push constant failed: {}",
static_cast<int>(sres));
return {};
}
if (stage == GPUShaderStage::Fragment || stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
{
const spvc_msl_resource_binding rb = {.stage = SpvExecutionModelFragment,
.desc_set = 1,
const spvc_msl_resource_binding rb = {.stage = execmodel,
.desc_set = TEXTURE_DESCRIPTOR_SET,
.binding = i,
.msl_buffer = i,
.msl_texture = i,
@@ -1744,16 +1779,31 @@ bool GPUDevice::TranslateVulkanSpvToLanguage(const std::span<const u8> spirv, GP
return {};
}
}
}
if (!m_features.framebuffer_fetch)
if (stage == GPUShaderStage::Fragment && !m_features.framebuffer_fetch)
{
const spvc_msl_resource_binding rb = {
.stage = execmodel, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
return {};
}
}
if (stage == GPUShaderStage::Compute)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
const spvc_msl_resource_binding rb = {
.stage = SpvExecutionModelFragment, .desc_set = 2, .binding = 0, .msl_texture = MAX_TEXTURE_SAMPLERS};
.stage = execmodel, .desc_set = 2, .binding = i, .msl_buffer = i, .msl_texture = i, .msl_sampler = i};
if ((sres = dyn_libs::spvc_compiler_msl_add_resource_binding(scompiler, &rb)) != SPVC_SUCCESS)
{
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() for FB failed: {}",
static_cast<int>(sres));
Error::SetStringFmt(error, "spvc_compiler_msl_add_resource_binding() failed: {}", static_cast<int>(sres));
return {};
}
}

View File

@@ -160,6 +160,9 @@ public:
// Multiple textures, 128 byte UBO via push constants.
MultiTextureAndPushConstants,
// 128 byte UBO via push constants, 1 texture, compute shader.
ComputeSingleTextureAndPushConstants,
MaxCount
};
@@ -416,6 +419,12 @@ public:
u32 GetRenderTargetCount() const;
};
struct ComputeConfig
{
Layout layout;
GPUShader* compute_shader;
};
GPUPipeline();
virtual ~GPUPipeline();
@@ -501,9 +510,10 @@ public:
FEATURE_MASK_FRAMEBUFFER_FETCH = (1 << 2),
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 3),
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 4),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 5),
FEATURE_MASK_MEMORY_IMPORT = (1 << 6),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 7),
FEATURE_MASK_COMPUTE_SHADERS = (1 << 5),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6),
FEATURE_MASK_MEMORY_IMPORT = (1 << 7),
FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8),
};
enum class DrawBarrier : u32
@@ -532,6 +542,7 @@ public:
bool texture_buffers_emulated_with_ssbo : 1;
bool feedback_loops : 1;
bool geometry_shaders : 1;
bool compute_shaders : 1;
bool partial_msaa_resolve : 1;
bool memory_import : 1;
bool explicit_present : 1;
@@ -625,11 +636,20 @@ public:
0, // SingleTextureBufferAndPushConstants
MAX_TEXTURE_SAMPLERS, // MultiTextureAndUBO
MAX_TEXTURE_SAMPLERS, // MultiTextureAndPushConstants
1, // ComputeSingleTextureAndPushConstants
};
return counts[static_cast<u8>(layout)];
}
/// Returns the number of thread groups to dispatch for a given total count and local size.
static constexpr std::tuple<u32, u32, u32> GetDispatchCount(u32 count_x, u32 count_y, u32 count_z, u32 local_size_x,
u32 local_size_y, u32 local_size_z)
{
return std::make_tuple((count_x + (local_size_x - 1)) / local_size_x, (count_y + (local_size_y - 1)) / local_size_y,
(count_z + (local_size_z - 1)) / local_size_z);
}
ALWAYS_INLINE const Features& GetFeatures() const { return m_features; }
ALWAYS_INLINE RenderAPI GetRenderAPI() const { return m_render_api; }
ALWAYS_INLINE u32 GetRenderAPIVersion() const { return m_render_api_version; }
@@ -638,10 +658,6 @@ public:
ALWAYS_INLINE GPUSwapChain* GetMainSwapChain() const { return m_main_swap_chain.get(); }
ALWAYS_INLINE bool HasMainSwapChain() const { return static_cast<bool>(m_main_swap_chain); }
// ALWAYS_INLINE u32 GetMainSwapChainWidth() const { return m_main_swap_chain->GetWidth(); }
// ALWAYS_INLINE u32 GetMainSwapChainHeight() const { return m_main_swap_chain->GetHeight(); }
// ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
// ALWAYS_INLINE GPUTexture::Format GetWindowFormat() const { return m_window_info.surface_format; }
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
ALWAYS_INLINE GPUSampler* GetNearestSampler() const { return m_nearest_sampler.get(); }
@@ -712,6 +728,8 @@ public:
Error* error = nullptr, const char* entry_point = "main");
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config,
Error* error = nullptr) = 0;
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config,
Error* error = nullptr) = 0;
/// Debug messaging.
virtual void PushDebugGroup(const char* name) = 0;
@@ -753,6 +771,7 @@ public:
virtual void Draw(u32 vertex_count, u32 base_vertex) = 0;
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) = 0;
virtual void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) = 0;
virtual void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) = 0;
/// Returns false if the window was completely occluded.
virtual PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color = DEFAULT_CLEAR_COLOR) = 0;

View File

@@ -207,6 +207,12 @@ void OpenGLDevice::InvalidateRenderTarget(GPUTexture* t)
}
}
std::unique_ptr<GPUPipeline> OpenGLDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
ERROR_LOG("Compute shaders are not yet supported.");
return {};
}
void OpenGLDevice::PushDebugGroup(const char* name)
{
#ifdef _DEBUG
@@ -488,6 +494,7 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
m_features.geometry_shaders =
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && (GLAD_GL_VERSION_3_2 || GLAD_GL_ES_VERSION_3_2);
m_features.compute_shaders = false;
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
@@ -1078,6 +1085,11 @@ void OpenGLDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
Panic("Barriers are not supported");
}
void OpenGLDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
Panic("Compute shaders are not supported");
}
void OpenGLDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
u32* map_base_vertex)
{

View File

@@ -77,6 +77,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@@ -100,6 +101,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
PresentResult BeginPresent(GPUSwapChain* swap_chain, u32 clear_color) override;
void EndPresent(GPUSwapChain* swap_chain, bool explicit_present, u64 present_time) override;

View File

@@ -627,14 +627,15 @@ void Vulkan::ComputePipelineBuilder::Clear()
m_smap_constants = {};
}
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache /*= VK_NULL_HANDLE*/,
bool clear /*= true*/)
VkPipeline Vulkan::ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear,
Error* error)
{
VkPipeline pipeline;
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
SetErrorObject(error, "vkCreateComputePipelines() failed: ", res);
return VK_NULL_HANDLE;
}

View File

@@ -197,7 +197,7 @@ public:
void Clear();
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear, Error* error);
void SetShader(VkShaderModule module, const char* entry_point);

View File

@@ -2447,6 +2447,7 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe
WARNING_LOG("Emulating texture buffers with SSBOs.");
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && vk_features.geometryShader;
m_features.compute_shaders = !(disabled_features & FEATURE_MASK_COMPUTE_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
@@ -2735,7 +2736,8 @@ void VulkanDevice::PushUniformBuffer(const void* data, u32 data_size)
{
DebugAssert(data_size < UNIFORM_PUSH_CONSTANTS_SIZE);
s_stats.buffer_streamed += data_size;
vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(), UNIFORM_PUSH_CONSTANTS_STAGES, 0,
vkCmdPushConstants(GetCurrentCommandBuffer(), GetCurrentVkPipelineLayout(),
IsCurrentPipelineCompute() ? VK_SHADER_STAGE_COMPUTE_BIT : UNIFORM_PUSH_CONSTANTS_STAGES, 0,
data_size, data);
}
@@ -2802,7 +2804,8 @@ bool VulkanDevice::CreatePipelineLayouts()
}
{
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
if ((m_single_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_single_texture_ds_layout, "Single Texture Descriptor Set Layout");
@@ -2822,7 +2825,8 @@ bool VulkanDevice::CreatePipelineLayouts()
if (m_optional_extensions.vk_khr_push_descriptor)
dslb.SetPushFlag();
for (u32 i = 0; i < MAX_TEXTURE_SAMPLERS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
if ((m_multi_texture_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_multi_texture_ds_layout, "Multi Texture Descriptor Set Layout");
@@ -2837,14 +2841,13 @@ bool VulkanDevice::CreatePipelineLayouts()
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "Feedback Loop Descriptor Set Layout");
}
if (m_features.raster_order_views)
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
{
for (u32 i = 0; i < MAX_IMAGE_RENDER_TARGETS; i++)
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_rov_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_feedback_loop_ds_layout, "ROV Descriptor Set Layout");
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
}
if ((m_image_ds_layout = dslb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, m_image_ds_layout, "ROV Descriptor Set Layout");
for (u32 type = 0; type < 3; type++)
{
@@ -2860,7 +2863,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Single Texture + UBO Pipeline Layout");
@@ -2873,7 +2876,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
@@ -2887,7 +2890,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(UNIFORM_PUSH_CONSTANTS_STAGES, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
@@ -2901,7 +2904,7 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture + UBO Pipeline Layout");
@@ -2915,13 +2918,24 @@ bool VulkanDevice::CreatePipelineLayouts()
if (feedback_loop)
plb.AddDescriptorSet(m_feedback_loop_ds_layout);
else if (rov)
plb.AddDescriptorSet(m_rov_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Multi Texture Pipeline Layout");
}
}
{
VkPipelineLayout& pl =
m_pipeline_layouts[0][static_cast<u8>(GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)];
plb.AddDescriptorSet(m_single_texture_ds_layout);
plb.AddDescriptorSet(m_image_ds_layout);
plb.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, UNIFORM_PUSH_CONSTANTS_SIZE);
if ((pl = plb.Create(m_device)) == VK_NULL_HANDLE)
return false;
Vulkan::SetObjectName(m_device, pl, "Compute Single Texture Pipeline Layout");
}
return true;
}
@@ -2942,7 +2956,7 @@ void VulkanDevice::DestroyPipelineLayouts()
l = VK_NULL_HANDLE;
}
};
destroy_dsl(m_rov_ds_layout);
destroy_dsl(m_image_ds_layout);
destroy_dsl(m_feedback_loop_ds_layout);
destroy_dsl(m_multi_texture_ds_layout);
destroy_dsl(m_single_texture_buffer_ds_layout);
@@ -3459,13 +3473,13 @@ void VulkanDevice::SetPipeline(GPUPipeline* pipeline)
m_current_pipeline = static_cast<VulkanPipeline*>(pipeline);
vkCmdBindPipeline(m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
if (m_current_pipeline_layout != m_current_pipeline->GetLayout())
{
m_current_pipeline_layout = m_current_pipeline->GetLayout();
m_dirty_flags |= DIRTY_FLAG_PIPELINE_LAYOUT;
}
vkCmdBindPipeline(m_current_command_buffer, GetCurrentVkPipelineBindPoint(), m_current_pipeline->GetPipeline());
}
void VulkanDevice::UnbindPipeline(VulkanPipeline* pl)
@@ -3503,12 +3517,24 @@ VulkanDevice::PipelineLayoutType VulkanDevice::GetPipelineLayoutType(GPUPipeline
PipelineLayoutType::Normal);
}
bool VulkanDevice::IsCurrentPipelineCompute() const
{
return (m_current_pipeline_layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
}
VkPipelineLayout VulkanDevice::GetCurrentVkPipelineLayout() const
{
return m_pipeline_layouts[static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
return m_pipeline_layouts[IsCurrentPipelineCompute() ?
0 :
static_cast<size_t>(GetPipelineLayoutType(m_current_render_pass_flags))]
[static_cast<size_t>(m_current_pipeline_layout)];
}
VkPipelineBindPoint VulkanDevice::GetCurrentVkPipelineBindPoint() const
{
return IsCurrentPipelineCompute() ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
}
void VulkanDevice::SetInitialPipelineState()
{
DebugAssert(m_current_pipeline);
@@ -3520,7 +3546,7 @@ void VulkanDevice::SetInitialPipelineState()
vkCmdBindIndexBuffer(cmdbuf, m_index_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
m_current_pipeline_layout = m_current_pipeline->GetLayout();
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_current_pipeline->GetPipeline());
vkCmdBindPipeline(cmdbuf, GetCurrentVkPipelineBindPoint(), m_current_pipeline->GetPipeline());
const VkViewport vp = {static_cast<float>(m_current_viewport.left),
static_cast<float>(m_current_viewport.top),
@@ -3674,12 +3700,56 @@ void VulkanDevice::PreDrawCheck()
}
}
void VulkanDevice::PreDispatchCheck()
{
// All textures should be in shader read only optimal already, but just in case..
const u32 num_textures = GetActiveTexturesForLayout(m_current_pipeline_layout);
for (u32 i = 0; i < num_textures; i++)
{
if (m_current_textures[i])
m_current_textures[i]->TransitionToLayout(VulkanTexture::Layout::ShaderReadOnly);
}
// Binding as image, but we still need to clear it.
for (u32 i = 0; i < m_num_current_render_targets; i++)
{
VulkanTexture* rt = m_current_render_targets[i];
if (rt->GetState() == GPUTexture::State::Cleared)
rt->CommitClear(m_current_command_buffer);
rt->SetState(GPUTexture::State::Dirty);
rt->TransitionToLayout(VulkanTexture::Layout::ReadWriteImage);
rt->SetUseFenceCounter(GetCurrentFenceCounter());
}
// If this is a new command buffer, bind the pipeline and such.
if (m_dirty_flags & DIRTY_FLAG_INITIAL)
SetInitialPipelineState();
DebugAssert(!(m_dirty_flags & DIRTY_FLAG_INITIAL));
const u32 update_mask = (m_current_render_pass_flags ? ~0u : ~DIRTY_FLAG_INPUT_ATTACHMENT);
const u32 dirty = m_dirty_flags & update_mask;
m_dirty_flags = m_dirty_flags & ~update_mask;
if (dirty != 0)
{
if (!UpdateDescriptorSets(dirty))
{
SubmitCommandBuffer(false, "out of descriptor sets");
PreDispatchCheck();
return;
}
}
}
template<GPUPipeline::Layout layout>
bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
[[maybe_unused]] bool new_dynamic_offsets = false;
VkPipelineLayout const vk_pipeline_layout = GetCurrentVkPipelineLayout();
constexpr bool is_compute = (layout >= GPUPipeline::Layout::ComputeSingleTextureAndPushConstants);
constexpr VkPipelineBindPoint vk_bind_point =
(is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
const VkPipelineLayout vk_pipeline_layout = GetCurrentVkPipelineLayout();
std::array<VkDescriptorSet, 3> ds;
u32 first_ds = 0;
u32 num_ds = 0;
@@ -3700,7 +3770,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
if constexpr (layout == GPUPipeline::Layout::SingleTextureAndUBO ||
layout == GPUPipeline::Layout::SingleTextureAndPushConstants)
layout == GPUPipeline::Layout::SingleTextureAndPushConstants ||
layout == GPUPipeline::Layout::ComputeSingleTextureAndPushConstants)
{
VulkanTexture* const tex = m_current_textures[0] ? m_current_textures[0] : m_null_texture.get();
DebugAssert(tex && m_current_samplers[0] != VK_NULL_HANDLE);
@@ -3727,7 +3798,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
const u32 set = (layout == GPUPipeline::Layout::MultiTextureAndUBO) ? 1 : 0;
dsub.PushUpdate(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, set);
dsub.PushUpdate(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, set);
if (num_ds == 0)
return true;
}
@@ -3757,7 +3828,7 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
{
if (m_current_render_pass_flags & GPUPipeline::BindRenderTargetsAsImages)
{
VkDescriptorSet ids = AllocateDescriptorSet(m_rov_ds_layout);
VkDescriptorSet ids = AllocateDescriptorSet(m_image_ds_layout);
if (ids == VK_NULL_HANDLE)
return false;
@@ -3792,8 +3863,8 @@ bool VulkanDevice::UpdateDescriptorSetsForLayout(u32 dirty)
}
DebugAssert(num_ds > 0);
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, vk_pipeline_layout, first_ds,
num_ds, ds.data(), static_cast<u32>(new_dynamic_offsets),
vkCmdBindDescriptorSets(GetCurrentCommandBuffer(), vk_bind_point, vk_pipeline_layout, first_ds, num_ds, ds.data(),
static_cast<u32>(new_dynamic_offsets),
new_dynamic_offsets ? &m_uniform_buffer_position : nullptr);
return true;
@@ -3818,6 +3889,9 @@ bool VulkanDevice::UpdateDescriptorSets(u32 dirty)
case GPUPipeline::Layout::MultiTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::MultiTextureAndPushConstants>(dirty);
case GPUPipeline::Layout::ComputeSingleTextureAndPushConstants:
return UpdateDescriptorSetsForLayout<GPUPipeline::Layout::ComputeSingleTextureAndPushConstants>(dirty);
default:
UnreachableCode();
}
@@ -3911,3 +3985,10 @@ void VulkanDevice::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 b
DefaultCaseIsUnreachable();
}
}
void VulkanDevice::Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z)
{
PreDispatchCheck();
s_stats.num_draws++;
vkCmdDispatch(GetCurrentCommandBuffer(), thread_groups_x, thread_groups_y, thread_groups_z);
}

View File

@@ -113,6 +113,7 @@ public:
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;
void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@@ -136,6 +137,7 @@ public:
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 thread_groups_x, u32 thread_groups_y, u32 thread_groups_z) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@@ -370,9 +372,12 @@ private:
/// Applies any changed state.
static PipelineLayoutType GetPipelineLayoutType(GPUPipeline::RenderPassFlag flags);
bool IsCurrentPipelineCompute() const;
VkPipelineLayout GetCurrentVkPipelineLayout() const;
VkPipelineBindPoint GetCurrentVkPipelineBindPoint() const;
void SetInitialPipelineState();
void PreDrawCheck();
void PreDispatchCheck();
template<GPUPipeline::Layout layout>
bool UpdateDescriptorSetsForLayout(u32 dirty);
@@ -435,7 +440,7 @@ private:
VkDescriptorSetLayout m_single_texture_buffer_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_multi_texture_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_feedback_loop_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_rov_ds_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_image_ds_layout = VK_NULL_HANDLE;
DimensionalArray<VkPipelineLayout, static_cast<size_t>(GPUPipeline::Layout::MaxCount),
static_cast<size_t>(PipelineLayoutType::MaxCount)>
m_pipeline_layouts = {};

View File

@@ -275,3 +275,16 @@ std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::Gra
return std::unique_ptr<GPUPipeline>(
new VulkanPipeline(pipeline, config.layout, static_cast<u8>(vertices_per_primitive), config.render_pass_flags));
}
std::unique_ptr<GPUPipeline> VulkanDevice::CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error)
{
Vulkan::ComputePipelineBuilder cpb;
cpb.SetShader(static_cast<const VulkanShader*>(config.compute_shader)->GetModule(), "main");
cpb.SetPipelineLayout(m_pipeline_layouts[0][static_cast<size_t>(config.layout)]);
const VkPipeline pipeline = cpb.Create(m_device, m_pipeline_cache, false, error);
if (!pipeline)
return {};
return std::unique_ptr<GPUPipeline>(new VulkanPipeline(pipeline, config.layout, 0, GPUPipeline::NoRenderPassFlags));
}