Common: Add vector rectangle helper functions

This commit is contained in:
Stenzek
2025-12-13 21:03:37 +10:00
parent e06d880257
commit c9177bc398
4 changed files with 535 additions and 44 deletions

View File

@@ -1023,10 +1023,10 @@ TEST(GSVector4Test, BasicOps)
// sat(minmax vector) : x/z clamped to [min.x, min.z], y/w to [min.y, min.w]
GSVector4 range(0.0f, -1.0f, 2.0f, 1.0f);
auto sat_pair = v.sat(range);
EXPECT_FLOAT_EQ(sat_pair.x, 1.0f); // within [0,2]
EXPECT_FLOAT_EQ(sat_pair.y, -1.0f); // clamped to -1
EXPECT_FLOAT_EQ(sat_pair.z, 2.0f); // clamped to 2
EXPECT_FLOAT_EQ(sat_pair.w, -1.0f); // clamped to -1
EXPECT_FLOAT_EQ(sat_pair.x, 1.0f); // within [0,2]
EXPECT_FLOAT_EQ(sat_pair.y, -1.0f); // clamped to -1
EXPECT_FLOAT_EQ(sat_pair.z, 2.0f); // clamped to 2
EXPECT_FLOAT_EQ(sat_pair.w, -1.0f); // clamped to -1
}
TEST(GSVector4Test, BlendAndMask)
@@ -1189,10 +1189,10 @@ TEST(GSVector4Test, ReplaceNaN)
GSVector4 v(1.0f, std::numeric_limits<float>::quiet_NaN(), -5.0f, std::numeric_limits<float>::quiet_NaN());
GSVector4 repl(10.0f, 20.0f, 30.0f, 40.0f);
auto r = v.replace_nan(repl);
EXPECT_FLOAT_EQ(r.x, 1.0f); // kept
EXPECT_FLOAT_EQ(r.y, 20.0f); // replaced
EXPECT_FLOAT_EQ(r.z, -5.0f); // kept
EXPECT_FLOAT_EQ(r.w, 40.0f); // replaced
EXPECT_FLOAT_EQ(r.x, 1.0f); // kept
EXPECT_FLOAT_EQ(r.y, 20.0f); // replaced
EXPECT_FLOAT_EQ(r.z, -5.0f); // kept
EXPECT_FLOAT_EQ(r.w, 40.0f); // replaced
}
TEST(GSVector4Test, DoubleExtendedOps)
@@ -1221,14 +1221,14 @@ TEST(GSVector4Test, DoubleExtendedOps)
EXPECT_DOUBLE_EQ(d.max64(d2).F64[1], 10.0);
auto gt = d.gt64(d2);
EXPECT_EQ(gt.U64[0], 0ULL); // -4 > -2 ? no
EXPECT_EQ(gt.U64[1], 0ULL); // 9 > 10 ? no
EXPECT_EQ(gt.U64[0], 0ULL); // -4 > -2 ? no
EXPECT_EQ(gt.U64[1], 0ULL); // 9 > 10 ? no
auto lt = d.lt64(d2);
EXPECT_NE(lt.U64[0], 0ULL); // -4 < -2
EXPECT_NE(lt.U64[1], 0ULL); // 9 < 10
EXPECT_NE(lt.U64[0], 0ULL); // -4 < -2
EXPECT_NE(lt.U64[1], 0ULL); // 9 < 10
auto ge = d.ge64(d2);
EXPECT_EQ(ge.U64[0], 0ULL); // -4 >= -2 ? no
EXPECT_EQ(ge.U64[1], 0ULL); // 9 >= 10 ? no
EXPECT_EQ(ge.U64[0], 0ULL); // -4 >= -2 ? no
EXPECT_EQ(ge.U64[1], 0ULL); // 9 >= 10 ? no
auto le = d.le64(d2);
EXPECT_NE(le.U64[0], 0ULL);
EXPECT_NE(le.U64[1], 0ULL);
@@ -1274,3 +1274,242 @@ TEST(GSVectorTest, ConversionsGSVector2iGSVector2)
EXPECT_EQ(cast_result2.U32[0], vf2.U32[0]);
EXPECT_EQ(cast_result2.U32[1], vf2.U32[1]);
}
// width() tests
TEST(GSVectorTest, Width_ReturnsCorrectValue)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
EXPECT_FLOAT_EQ(rect1.width(), 40.0f); // 50 - 10 = 40
EXPECT_FLOAT_EQ(rect2.width(), 70.0f); // 100 - 30 = 70
}
TEST(GSVectorTest, Width_EmptyRect_ReturnsZero)
{
const GSVector4 emptyRect = GSVector4(0.0f, 0.0f, 0.0f, 0.0f);
const GSVector4 zeroSizeRect = GSVector4(25.0f, 35.0f, 25.0f, 35.0f);
EXPECT_FLOAT_EQ(emptyRect.width(), 0.0f);
EXPECT_FLOAT_EQ(zeroSizeRect.width(), 0.0f);
}
TEST(GSVectorTest, Width_InvalidRect_ReturnsNegative)
{
const GSVector4 invalidRect = GSVector4(50.0f, 80.0f, 10.0f, 20.0f); // right < left, bottom < top
EXPECT_FLOAT_EQ(invalidRect.width(), -40.0f); // 10 - 50 = -40
}
// height() tests
TEST(GSVectorTest, Height_ReturnsCorrectValue)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
EXPECT_FLOAT_EQ(rect1.height(), 60.0f); // 80 - 20 = 60
EXPECT_FLOAT_EQ(rect2.height(), 80.0f); // 120 - 40 = 80
}
TEST(GSVectorTest, Height_EmptyRect_ReturnsZero)
{
const GSVector4 emptyRect = GSVector4(0.0f, 0.0f, 0.0f, 0.0f);
const GSVector4 zeroSizeRect = GSVector4(25.0f, 35.0f, 25.0f, 35.0f);
EXPECT_FLOAT_EQ(emptyRect.height(), 0.0f);
EXPECT_FLOAT_EQ(zeroSizeRect.height(), 0.0f);
}
TEST(GSVectorTest, Height_InvalidRect_ReturnsNegative)
{
const GSVector4 invalidRect = GSVector4(50.0f, 80.0f, 10.0f, 20.0f); // right < left, bottom < top
EXPECT_FLOAT_EQ(invalidRect.height(), -60.0f); // 20 - 80 = -60
}
// rsize() tests
TEST(GSVectorTest, Rsize_ReturnsCorrectSize)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
GSVector2 size1 = rect1.rsize();
EXPECT_FLOAT_EQ(size1.x, 40.0f);
EXPECT_FLOAT_EQ(size1.y, 60.0f);
GSVector2 size2 = rect2.rsize();
EXPECT_FLOAT_EQ(size2.x, 70.0f);
EXPECT_FLOAT_EQ(size2.y, 80.0f);
}
TEST(GSVectorTest, Rsize_EmptyRect_ReturnsZeroSize)
{
const GSVector4 emptyRect = GSVector4(0.0f, 0.0f, 0.0f, 0.0f);
GSVector2 size = emptyRect.rsize();
EXPECT_FLOAT_EQ(size.x, 0.0f);
EXPECT_FLOAT_EQ(size.y, 0.0f);
}
// rvalid() tests
TEST(GSVectorTest, Rvalid_ValidRect_ReturnsTrue)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
EXPECT_TRUE(rect1.rvalid());
EXPECT_TRUE(rect2.rvalid());
}
TEST(GSVectorTest, Rvalid_EmptyRect_ReturnsFalse)
{
const GSVector4 emptyRect = GSVector4(0.0f, 0.0f, 0.0f, 0.0f);
const GSVector4 zeroSizeRect = GSVector4(25.0f, 35.0f, 25.0f, 35.0f);
// Empty rect where left==right and top==bottom is considered invalid
EXPECT_FALSE(emptyRect.rvalid());
EXPECT_FALSE(zeroSizeRect.rvalid());
}
TEST(GSVectorTest, Rvalid_InvalidRect_ReturnsFalse)
{
const GSVector4 invalidRect = GSVector4(50.0f, 80.0f, 10.0f, 20.0f); // right < left, bottom < top
EXPECT_FALSE(invalidRect.rvalid());
}
TEST(GSVectorTest, Rvalid_PartiallyInvalid_ReturnsFalse)
{
const GSVector4 invalidWidth(50.0f, 20.0f, 10.0f, 80.0f); // right < left
const GSVector4 invalidHeight(10.0f, 80.0f, 50.0f, 20.0f); // bottom < top
EXPECT_FALSE(invalidWidth.rvalid());
EXPECT_FALSE(invalidHeight.rvalid());
}
// rempty() tests
TEST(GSVectorTest, Rempty_EmptyRect_ReturnsTrue)
{
const GSVector4 emptyRect = GSVector4(0.0f, 0.0f, 0.0f, 0.0f);
const GSVector4 zeroSizeRect = GSVector4(25.0f, 35.0f, 25.0f, 35.0f);
EXPECT_TRUE(emptyRect.rempty());
EXPECT_TRUE(zeroSizeRect.rempty());
}
TEST(GSVectorTest, Rempty_NonEmptyRect_ReturnsFalse)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
EXPECT_FALSE(rect1.rempty());
EXPECT_FALSE(rect2.rempty());
}
TEST(GSVectorTest, Rempty_ZeroWidthOnly_ReturnsTrue)
{
GSVector4 zeroWidth(25.0f, 20.0f, 25.0f, 80.0f); // width = 0, height > 0
EXPECT_TRUE(zeroWidth.rempty());
}
TEST(GSVectorTest, Rempty_ZeroHeightOnly_ReturnsTrue)
{
GSVector4 zeroHeight(10.0f, 50.0f, 40.0f, 50.0f); // width > 0, height = 0
EXPECT_TRUE(zeroHeight.rempty());
}
TEST(GSVectorTest, Rempty_InvalidRect_ReturnsTrue)
{
const GSVector4 invalidRect = GSVector4(50.0f, 80.0f, 10.0f, 20.0f); // right < left, bottom < top
// Invalid rects are considered empty
EXPECT_TRUE(invalidRect.rempty());
}
// runion() tests
TEST(GSVectorTest, Runion_OverlappingRects_ReturnsCorrectUnion)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
GSVector4 result = rect1.runion(rect2);
// Union should be min of lefts/tops and max of rights/bottoms
EXPECT_FLOAT_EQ(result.left, 10.0f); // min(10, 30)
EXPECT_FLOAT_EQ(result.top, 20.0f); // min(20, 40)
EXPECT_FLOAT_EQ(result.right, 100.0f); // max(50, 100)
EXPECT_FLOAT_EQ(result.bottom, 120.0f); // max(80, 120)
}
TEST(GSVectorTest, Runion_NonOverlappingRects_ReturnsEnclosingRect)
{
GSVector4 rectA(0.0f, 0.0f, 10.0f, 10.0f);
GSVector4 rectB(50.0f, 50.0f, 60.0f, 60.0f);
GSVector4 result = rectA.runion(rectB);
EXPECT_FLOAT_EQ(result.left, 0.0f);
EXPECT_FLOAT_EQ(result.top, 0.0f);
EXPECT_FLOAT_EQ(result.right, 60.0f);
EXPECT_FLOAT_EQ(result.bottom, 60.0f);
}
TEST(GSVectorTest, Runion_ContainedRect_ReturnsOuterRect)
{
GSVector4 outer(0.0f, 0.0f, 100.0f, 100.0f);
GSVector4 inner(25.0f, 25.0f, 75.0f, 75.0f);
GSVector4 result = outer.runion(inner);
EXPECT_FLOAT_EQ(result.left, 0.0f);
EXPECT_FLOAT_EQ(result.top, 0.0f);
EXPECT_FLOAT_EQ(result.right, 100.0f);
EXPECT_FLOAT_EQ(result.bottom, 100.0f);
}
TEST(GSVectorTest, Runion_WithEmptyRect_ReturnsOtherRect)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
const GSVector4 emptyRect = GSVector4(0.0f, 0.0f, 0.0f, 0.0f);
GSVector4 result = rect1.runion(emptyRect);
// Union with empty rect at origin
EXPECT_FLOAT_EQ(result.left, 0.0f); // min(10, 0)
EXPECT_FLOAT_EQ(result.top, 0.0f); // min(20, 0)
EXPECT_FLOAT_EQ(result.right, 50.0f); // max(50, 0)
EXPECT_FLOAT_EQ(result.bottom, 80.0f); // max(80, 0)
}
TEST(GSVectorTest, Runion_IdenticalRects_ReturnsSameRect)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
GSVector4 result = rect1.runion(rect1);
EXPECT_FLOAT_EQ(result.left, rect1.left);
EXPECT_FLOAT_EQ(result.top, rect1.top);
EXPECT_FLOAT_EQ(result.right, rect1.right);
EXPECT_FLOAT_EQ(result.bottom, rect1.bottom);
}
TEST(GSVectorTest, Runion_NegativeCoordinates_ReturnsCorrectUnion)
{
GSVector4 rectA(-50.0f, -40.0f, -10.0f, -5.0f);
GSVector4 rectB(-30.0f, -20.0f, 10.0f, 15.0f);
GSVector4 result = rectA.runion(rectB);
EXPECT_FLOAT_EQ(result.left, -50.0f); // min(-50, -30)
EXPECT_FLOAT_EQ(result.top, -40.0f); // min(-40, -20)
EXPECT_FLOAT_EQ(result.right, 10.0f); // max(-10, 10)
EXPECT_FLOAT_EQ(result.bottom, 15.0f); // max(-5, 15)
}
TEST(GSVectorTest, Runion_IsCommutative)
{
const GSVector4 rect1 = GSVector4(10.0f, 20.0f, 50.0f, 80.0f); // left=10, top=20, right=50, bottom=80
const GSVector4 rect2 = GSVector4(30.0f, 40.0f, 100.0f, 120.0f);
GSVector4 result1 = rect1.runion(rect2);
GSVector4 result2 = rect2.runion(rect1);
EXPECT_TRUE(result1.eq(result2));
}

View File

@@ -279,7 +279,7 @@ public:
{
constexpr int bit1 = ((mask & 2) * 3) << 1;
constexpr int bit0 = (mask & 1) * 3;
return blend16 < bit1 | bit0 > (v);
return blend16<bit1 | bit0>(v);
}
ALWAYS_INLINE GSVector2i blend(const GSVector2i& v, const GSVector2i& mask) const
@@ -487,10 +487,7 @@ public:
ALWAYS_INLINE GSVector2i sra32(s32 i) const { return GSVector2i(vshl_s32(v2s, vdup_n_s32(-i))); }
ALWAYS_INLINE GSVector2i srav32(const GSVector2i& v) const
{
return GSVector2i(vshl_s32(v2s, vneg_s32(v.v2s)));
}
ALWAYS_INLINE GSVector2i srav32(const GSVector2i& v) const { return GSVector2i(vshl_s32(v2s, vneg_s32(v.v2s))); }
ALWAYS_INLINE GSVector2i add8(const GSVector2i& v) const
{
@@ -599,7 +596,10 @@ public:
return GSVector2i(vreinterpret_s32_u16(vcgt_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s))));
}
ALWAYS_INLINE GSVector2i gt32(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_u32(vcgt_s32(v2s, v.v2s))); }
ALWAYS_INLINE GSVector2i gt32(const GSVector2i& v) const
{
return GSVector2i(vreinterpret_s32_u32(vcgt_s32(v2s, v.v2s)));
}
ALWAYS_INLINE GSVector2i ge8(const GSVector2i& v) const
{
@@ -609,7 +609,10 @@ public:
{
return GSVector2i(vreinterpret_s32_u16(vcge_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s))));
}
ALWAYS_INLINE GSVector2i ge32(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_u32(vcge_s32(v2s, v.v2s))); }
ALWAYS_INLINE GSVector2i ge32(const GSVector2i& v) const
{
return GSVector2i(vreinterpret_s32_u32(vcge_s32(v2s, v.v2s)));
}
ALWAYS_INLINE GSVector2i lt8(const GSVector2i& v) const
{
@@ -621,7 +624,10 @@ public:
return GSVector2i(vreinterpret_s32_u16(vclt_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s))));
}
ALWAYS_INLINE GSVector2i lt32(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_u32(vclt_s32(v2s, v.v2s))); }
ALWAYS_INLINE GSVector2i lt32(const GSVector2i& v) const
{
return GSVector2i(vreinterpret_s32_u32(vclt_s32(v2s, v.v2s)));
}
ALWAYS_INLINE GSVector2i le8(const GSVector2i& v) const
{
@@ -631,7 +637,10 @@ public:
{
return GSVector2i(vreinterpret_s32_u16(vcle_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s))));
}
ALWAYS_INLINE GSVector2i le32(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_u32(vcle_s32(v2s, v.v2s))); }
ALWAYS_INLINE GSVector2i le32(const GSVector2i& v) const
{
return GSVector2i(vreinterpret_s32_u32(vcle_s32(v2s, v.v2s)));
}
ALWAYS_INLINE GSVector2i andnot(const GSVector2i& v) const { return GSVector2i(vbic_s32(v2s, v.v2s)); }
@@ -650,6 +659,8 @@ public:
return (vget_lane_u64(vreinterpret_u64_s32(v2s), 0) == UINT64_C(0xFFFFFFFFFFFFFFFF));
}
ALWAYS_INLINE bool anytrue() const { return (vget_lane_u64(vreinterpret_u64_s32(v2s), 0) != UINT64_C(0)); }
ALWAYS_INLINE bool allfalse() const { return (vget_lane_u64(vreinterpret_u64_s32(v2s), 0) == UINT64_C(0)); }
template<int i>
@@ -901,6 +912,8 @@ public:
return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) == UINT64_C(0xFFFFFFFFFFFFFFFF));
}
ALWAYS_INLINE bool anytrue() const { return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) != UINT64_C(0)); }
ALWAYS_INLINE bool allfalse() const { return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) == UINT64_C(0)); }
ALWAYS_INLINE GSVector2 replace_nan(const GSVector2& v) const { return v.blend32(*this, *this == *this); }
@@ -1825,10 +1838,7 @@ public:
ALWAYS_INLINE GSVector4i sra32(s32 i) const { return GSVector4i(vshlq_s32(v4s, vdupq_n_s32(-i))); }
ALWAYS_INLINE GSVector4i srav32(const GSVector4i& v) const
{
return GSVector4i(vshlq_s32(v4s, vnegq_s32(v.v4s)));
}
ALWAYS_INLINE GSVector4i srav32(const GSVector4i& v) const { return GSVector4i(vshlq_s32(v4s, vnegq_s32(v.v4s))); }
template<int i>
ALWAYS_INLINE GSVector4i sll64() const
@@ -2034,7 +2044,10 @@ public:
return GSVector4i(vreinterpretq_s32_u16(vcgtq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s))));
}
ALWAYS_INLINE GSVector4i gt32(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_u32(vcgtq_s32(v4s, v.v4s))); }
ALWAYS_INLINE GSVector4i gt32(const GSVector4i& v) const
{
return GSVector4i(vreinterpretq_s32_u32(vcgtq_s32(v4s, v.v4s)));
}
ALWAYS_INLINE GSVector4i ge8(const GSVector4i& v) const
{
@@ -2044,7 +2057,10 @@ public:
{
return GSVector4i(vreinterpretq_s32_u16(vcgeq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s))));
}
ALWAYS_INLINE GSVector4i ge32(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_u32(vcgeq_s32(v4s, v.v4s))); }
ALWAYS_INLINE GSVector4i ge32(const GSVector4i& v) const
{
return GSVector4i(vreinterpretq_s32_u32(vcgeq_s32(v4s, v.v4s)));
}
ALWAYS_INLINE GSVector4i lt8(const GSVector4i& v) const
{
@@ -2056,7 +2072,10 @@ public:
return GSVector4i(vreinterpretq_s32_u16(vcltq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s))));
}
ALWAYS_INLINE GSVector4i lt32(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_u32(vcltq_s32(v4s, v.v4s))); }
ALWAYS_INLINE GSVector4i lt32(const GSVector4i& v) const
{
return GSVector4i(vreinterpretq_s32_u32(vcltq_s32(v4s, v.v4s)));
}
ALWAYS_INLINE GSVector4i le8(const GSVector4i& v) const
{
@@ -2066,7 +2085,10 @@ public:
{
return GSVector4i(vreinterpretq_s32_u16(vcleq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s))));
}
ALWAYS_INLINE GSVector4i le32(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_u32(vcleq_s32(v4s, v.v4s))); }
ALWAYS_INLINE GSVector4i le32(const GSVector4i& v) const
{
return GSVector4i(vreinterpretq_s32_u32(vcleq_s32(v4s, v.v4s)));
}
ALWAYS_INLINE GSVector4i andnot(const GSVector4i& v) const { return GSVector4i(vbicq_s32(v4s, v.v4s)); }
@@ -2090,6 +2112,15 @@ public:
#endif
}
ALWAYS_INLINE bool anytrue() const
{
#ifdef CPU_ARCH_ARM64
return (vmaxvq_u32(vreinterpretq_u32_s32(v4s)) != UINT32_C(0));
#else
return (vget_lane_u64(vreinterpret_u64_s32(vorr_s32(vget_low_s32(v4s), vget_high_s32(v4s))), 0) != UINT64_C(0));
#endif
}
ALWAYS_INLINE bool allfalse() const
{
#ifdef CPU_ARCH_ARM64
@@ -2629,6 +2660,30 @@ public:
#endif
}
ALWAYS_INLINE float width() const { return right - left; }
ALWAYS_INLINE float height() const { return bottom - top; }
ALWAYS_INLINE GSVector2 rsize() const { return (zw() - xy()); }
ALWAYS_INLINE bool rempty() const
{
// !any((x, y) < (z, w)) i.e. !not_empty
return (vget_lane_u64(vreinterpret_u64_f32(vclt_f32(vget_low_f32(v4s), vget_high_f32(v4s))), 0) !=
0xFFFFFFFFFFFFFFFFULL);
}
ALWAYS_INLINE bool rvalid() const
{
// !all((x, y) >= (z, w))
return (vget_lane_u64(vreinterpret_u64_f32(vcge_f32(vget_low_f32(v4s), vget_high_f32(v4s))), 0) == 0);
}
ALWAYS_INLINE GSVector4 runion(const GSVector4 v) const { return min(v).blend32<0xc>(max(v)); }
ALWAYS_INLINE GSVector4 rintersect(const GSVector4& a) const { return sat(a); }
ALWAYS_INLINE bool rintersects(const GSVector4& v) const { return rintersect(v).rvalid(); }
ALWAYS_INLINE bool rcontains(const GSVector4& v) const { return rintersect(v).eq(v); }
ALWAYS_INLINE GSVector4 sat(const GSVector4& a, const GSVector4& b) const { return max(a).min(b); }
ALWAYS_INLINE GSVector4 sat(const GSVector4& a) const
@@ -2749,6 +2804,17 @@ public:
#endif
}
ALWAYS_INLINE bool anytrue() const
{
#ifdef CPU_ARCH_ARM64
return (vmaxvq_u32(vreinterpretq_u32_f32(v4s)) != UINT32_C(0));
#else
return (vget_lane_u64(vreinterpret_u64_u32(vorr_u32(vget_low_u32(vreinterpretq_u32_f32(v4s)),
vget_high_u32(vreinterpretq_u32_f32(v4s)))),
0) != UINT64_C(0));
#endif
}
ALWAYS_INLINE bool allfalse() const
{
#ifdef CPU_ARCH_ARM64
@@ -2821,6 +2887,19 @@ public:
return GSVector4(vcombine_f32(vld1_f32((const float*)p), vcreate_f32(0)));
}
template<bool aligned>
ALWAYS_INLINE static GSVector4 loadh(const void* p)
{
#ifdef CPU_ARCH_ARM32
if constexpr (!aligned)
return GSVector4(vreinterpretq_s32_s8(vcombine_s8(vdup_n_s8(0), vld1_s8((int8_t*)p))));
#endif
return GSVector4(vreinterpretq_f32_s64(vcombine_s64(vdup_n_s64(0), vld1_s64((int64_t*)p))));
}
ALWAYS_INLINE static GSVector4 loadh(const GSVector2& v) { return GSVector4(vcombine_f32(vcreate_f32(0), v.v2s)); }
ALWAYS_INLINE static GSVector4 load(float f) { return GSVector4(vsetq_lane_f32(f, vmovq_n_f32(0.0f), 0)); }
template<bool aligned>
@@ -3009,6 +3088,18 @@ public:
return GSVector4(vreinterpretq_f32_u32(vcleq_f32(v1.v4s, v2.v4s)));
}
ALWAYS_INLINE bool eq(const GSVector4& v) const
{
#ifdef CPU_ARCH_ARM64
const uint32x4_t res = vceqq_f32(v4s, v.v4s);
return (vminvq_u32(res) != 0);
#else
const uint32x4_t res = vmvnq_u32(vceqq_f32(v4s, v.v4s));
const uint32x2_t paired = vorr_u32(vget_low_u32(res), vget_high_u32(res));
return (vget_lane_u64(paired, 0) == 0);
#endif
}
ALWAYS_INLINE GSVector4 mul64(const GSVector4& v) const
{
#ifdef CPU_ARCH_ARM64

View File

@@ -417,7 +417,7 @@ public:
}
ALWAYS_INLINE bool alltrue() const { return (U64[0] == 0xFFFFFFFFFFFFFFFFULL); }
ALWAYS_INLINE bool anytrue() const { return (U64[0] != 0); }
ALWAYS_INLINE bool allfalse() const { return (U64[0] == 0); }
template<s32 i>
@@ -638,7 +638,7 @@ public:
ALWAYS_INLINE int mask() const { return (U32[0] >> 31) | ((U32[1] >> 30) & 2); }
ALWAYS_INLINE bool alltrue() const { return (U64[0] == 0xFFFFFFFFFFFFFFFFULL); }
ALWAYS_INLINE bool anytrue() const { return (U64[0] != 0); }
ALWAYS_INLINE bool allfalse() const { return (U64[0] == 0); }
ALWAYS_INLINE GSVector2 replace_nan(const GSVector2& v) const { return v.blend32(*this, *this == *this); }
@@ -1462,7 +1462,7 @@ public:
}
ALWAYS_INLINE bool alltrue() const { return ((U64[0] & U64[1]) == 0xFFFFFFFFFFFFFFFFULL); }
ALWAYS_INLINE bool anytrue() const { return ((U64[0] | U64[1]) != 0); }
ALWAYS_INLINE bool allfalse() const { return ((U64[0] | U64[1]) == 0); }
template<s32 i>
@@ -1862,6 +1862,22 @@ public:
ALWAYS_INLINE float minv() const { return std::min(x, std::min(y, std::min(z, w))); }
ALWAYS_INLINE float maxv() const { return std::max(x, std::max(y, std::max(z, w))); }
ALWAYS_INLINE float width() const { return right - left; }
ALWAYS_INLINE float height() const { return bottom - top; }
ALWAYS_INLINE GSVector2 rsize() const { return GSVector2(width(), height()); }
ALWAYS_INLINE bool rempty() const { return ((*this < zwzw()).mask() != 0x3); }
ALWAYS_INLINE bool rvalid() const { return (((*this >= zwzw()).mask()) == 0); }
GSVector4 runion(const GSVector4& v) const
{
return GSVector4(std::min(x, v.x), std::min(y, v.y), std::max(z, v.z), std::max(w, v.w));
}
ALWAYS_INLINE GSVector4 rintersect(const GSVector4& v) const { return sat(v); }
ALWAYS_INLINE bool rintersects(const GSVector4& v) const { return rintersect(v).rvalid(); }
ALWAYS_INLINE bool rcontains(const GSVector4& v) const { return rintersect(v).eq(v); }
GSVector4 sat(const GSVector4& min, const GSVector4& max) const
{
return GSVector4(std::clamp(x, min.x, max.x), std::clamp(y, min.y, max.y), std::clamp(z, min.z, max.z),
@@ -1940,7 +1956,7 @@ public:
}
ALWAYS_INLINE bool alltrue() const { return ((U64[0] & U64[1]) == 0xFFFFFFFFFFFFFFFFULL); }
ALWAYS_INLINE bool anytrue() const { return ((U64[0] | U64[1]) != 0); }
ALWAYS_INLINE bool allfalse() const { return ((U64[0] | U64[1]) == 0); }
ALWAYS_INLINE GSVector4 replace_nan(const GSVector4& v) const { return v.blend32(*this, *this == *this); }
@@ -2001,6 +2017,17 @@ public:
return ret;
}
template<bool aligned>
ALWAYS_INLINE static GSVector4 loadh(const void* p)
{
GSVector4 ret;
ret.U64[0] = 0;
std::memcpy(&ret.U64[1], p, sizeof(ret.U64[1]));
return ret;
}
ALWAYS_INLINE static GSVector4 loadh(const GSVector2& v) { return loadh<true>(&v); }
template<bool aligned>
ALWAYS_INLINE static GSVector4 load(const void* p)
{
@@ -2219,6 +2246,8 @@ public:
return ret;
}
ALWAYS_INLINE bool eq(const GSVector4& v) const { return (std::memcmp(F32, v.F32, sizeof(F32))) == 0; }
ALWAYS_INLINE GSVector4 mul64(const GSVector4& v_) const
{
GSVector4 ret;

View File

@@ -329,7 +329,7 @@ public:
#else
constexpr s32 bit1 = ((mask & 2) * 3) << 1;
constexpr s32 bit0 = (mask & 1) * 3;
return blend16 < bit1 | bit0 > (v);
return blend16<bit1 | bit0>(v);
#endif
}
@@ -527,6 +527,7 @@ public:
ALWAYS_INLINE s32 mask() const { return (_mm_movemask_epi8(m) & 0xff); }
ALWAYS_INLINE bool alltrue() const { return (mask() == 0xff); }
ALWAYS_INLINE bool anytrue() const { return (mask() != 0x00); }
ALWAYS_INLINE bool allfalse() const { return (mask() == 0x00); }
template<s32 i>
@@ -790,7 +791,7 @@ public:
ALWAYS_INLINE int mask() const { return (_mm_movemask_ps(m) & 0x3); }
ALWAYS_INLINE bool alltrue() const { return (mask() == 0x3); }
ALWAYS_INLINE bool anytrue() const { return (mask() != 0x0); }
ALWAYS_INLINE bool allfalse() const { return (mask() == 0x0); }
ALWAYS_INLINE GSVector2 replace_nan(const GSVector2& v) const { return v.blend32(*this, *this == *this); }
@@ -1349,7 +1350,7 @@ public:
constexpr s32 bit2 = ((mask & 4) * 3) << 2;
constexpr s32 bit1 = ((mask & 2) * 3) << 1;
constexpr s32 bit0 = (mask & 1) * 3;
return blend16 < bit3 | bit2 | bit1 | bit0 > (v);
return blend16<bit3 | bit2 | bit1 | bit0>(v);
#endif
}
@@ -1630,12 +1631,21 @@ public:
ALWAYS_INLINE bool alltrue() const { return mask() == 0xffff; }
ALWAYS_INLINE bool anytrue() const
{
#ifdef CPU_ARCH_SSE41
return (_mm_testz_si128(m, m) == 0);
#else
return (mask() != 0);
#endif
}
ALWAYS_INLINE bool allfalse() const
{
#ifdef CPU_ARCH_SSE41
return _mm_testz_si128(m, m) != 0;
return (_mm_testz_si128(m, m) != 0);
#else
return mask() == 0;
return (mask() == 0);
#endif
}
@@ -2102,6 +2112,19 @@ public:
return _mm_cvtss_f32(v);
}
ALWAYS_INLINE float width() const { return right - left; }
ALWAYS_INLINE float height() const { return bottom - top; }
ALWAYS_INLINE GSVector2 rsize() const { return (zwzw() - xyxy()).xy(); }
ALWAYS_INLINE bool rempty() const { return ((*this < zwzw()).mask() != 0x3); }
ALWAYS_INLINE bool rvalid() const { return (((*this >= zwzw()).mask()) == 0); }
ALWAYS_INLINE GSVector4 runion(const GSVector4 v) const { return min(v).blend32<0xc>(max(v)); }
ALWAYS_INLINE GSVector4 rintersect(const GSVector4& v) const { return sat(v); }
ALWAYS_INLINE bool rintersects(const GSVector4& v) const { return rintersect(v).rvalid(); }
ALWAYS_INLINE bool rcontains(const GSVector4& v) const { return rintersect(v).eq(v); }
ALWAYS_INLINE GSVector4 sat(const GSVector4& min, const GSVector4& max) const
{
return GSVector4(_mm_min_ps(_mm_max_ps(m, min), max));
@@ -2123,7 +2146,85 @@ public:
template<int mask>
ALWAYS_INLINE GSVector4 blend32(const GSVector4& v) const
{
#ifdef CPU_ARCH_SSE41
return GSVector4(_mm_blend_ps(m, v, mask));
#else
// horrible, just horrible
static_assert(mask >= 0 && mask < 16);
__m128 ret;
if constexpr (mask == 0)
{
ret = m;
}
else if constexpr (mask == 1)
{
ret = _mm_move_ss(m, v.m);
}
else if constexpr (mask == 2)
{
ret = _mm_shuffle_ps(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(1, 1, 0, 0)), m, _MM_SHUFFLE(3, 2, 2, 0));
}
else if constexpr (mask == 3)
{
ret = _mm_castpd_ps(_mm_move_sd(_mm_castps_pd(m), _mm_castps_pd(v.m)));
}
else if constexpr (mask == 4)
{
ret = _mm_shuffle_ps(m, _mm_shuffle_ps(v.m, m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 1, 0));
}
else if constexpr (mask == 5)
{
ret =
_mm_shuffle_ps(_mm_move_ss(m, v.m), _mm_shuffle_ps(v.m, m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 1, 0));
}
else if constexpr (mask == 6)
{
ret = _mm_shuffle_ps(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(1, 1, 0, 0)),
_mm_shuffle_ps(v.m, m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 2, 0));
}
else if constexpr (mask == 7)
{
ret = _mm_shuffle_ps(_mm_castpd_ps(_mm_move_sd(_mm_castps_pd(m), _mm_castps_pd(v.m))),
_mm_shuffle_ps(v.m, m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 1, 0));
}
else if constexpr (mask == 8)
{
ret = _mm_shuffle_ps(m, _mm_shuffle_ps(m, v.m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 1, 0));
}
else if constexpr (mask == 9)
{
ret =
_mm_shuffle_ps(_mm_move_ss(m, v.m), _mm_shuffle_ps(m, v.m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 1, 0));
}
else if constexpr (mask == 10)
{
ret = _mm_shuffle_ps(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(1, 1, 0, 0)),
_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 2, 0));
}
else if constexpr (mask == 11)
{
ret = _mm_shuffle_ps(_mm_castpd_ps(_mm_move_sd(_mm_castps_pd(m), _mm_castps_pd(v.m))),
_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(3, 3, 2, 2)), _MM_SHUFFLE(2, 0, 1, 0));
}
else if constexpr (mask == 12)
{
ret = _mm_shuffle_ps(m, v.m, _MM_SHUFFLE(3, 2, 1, 0));
}
else if constexpr (mask == 13)
{
ret = _mm_shuffle_ps(_mm_move_ss(m, v.m), v.m, _MM_SHUFFLE(3, 2, 1, 0));
}
else if constexpr (mask == 14)
{
ret = _mm_shuffle_ps(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(1, 1, 0, 0)), v.m, _MM_SHUFFLE(3, 2, 2, 0));
}
else if constexpr (mask == 15)
{
ret = v.m;
}
return GSVector4(ret);
#endif
}
ALWAYS_INLINE GSVector4 blend32(const GSVector4& v, const GSVector4& mask) const
@@ -2160,13 +2261,23 @@ public:
ALWAYS_INLINE bool alltrue() const { return mask() == 0xf; }
ALWAYS_INLINE bool anytrue() const
{
#ifdef CPU_ARCH_AVX2
return (_mm_testz_ps(m, m) == 0);
#else
const __m128i ii = _mm_castps_si128(m);
return (_mm_testz_si128(ii, ii) == 0);
#endif
}
ALWAYS_INLINE bool allfalse() const
{
#ifdef CPU_ARCH_AVX2
return _mm_testz_ps(m, m) != 0;
return (_mm_testz_ps(m, m) != 0);
#else
const __m128i ii = _mm_castps_si128(m);
return _mm_testz_si128(ii, ii) != 0;
return (_mm_testz_si128(ii, ii) != 0);
#endif
}
@@ -2252,6 +2363,17 @@ public:
return GSVector4(_mm_castpd_ps(_mm_load_sd(static_cast<const double*>(p))));
}
template<bool aligned>
ALWAYS_INLINE static GSVector4 loadh(const void* p)
{
return GSVector4(_mm_castpd_ps(_mm_loadh_pd(_mm_setzero_pd(), static_cast<const double*>(p))));
}
ALWAYS_INLINE static GSVector4 loadh(const GSVector2& v)
{
return GSVector4(_mm_unpacklo_pd(_mm_setzero_pd(), _mm_castps_pd(v.m)));
}
template<bool aligned>
ALWAYS_INLINE static GSVector4 load(const void* p)
{
@@ -2424,6 +2546,16 @@ public:
return GSVector4(_mm_cmple_ps(v1, v2));
}
ALWAYS_INLINE bool eq(const GSVector4& v) const
{
#ifdef CPU_ARCH_SSE41
const __m128i t = _mm_castps_si128(_mm_xor_ps(m, v.m));
return _mm_testz_si128(t, t) != 0;
#else
return (*this == v).alltrue();
#endif
}
ALWAYS_INLINE GSVector4 mul64(const GSVector4& v_) const
{
return GSVector4(_mm_mul_pd(_mm_castps_pd(m), _mm_castps_pd(v_.m)));
@@ -2944,8 +3076,8 @@ public:
ALWAYS_INLINE u32 mask() const { return static_cast<u32>(_mm256_movemask_epi8(m)); }
ALWAYS_INLINE bool alltrue() const { return mask() == 0xFFFFFFFFu; }
ALWAYS_INLINE bool allfalse() const { return _mm256_testz_si256(m, m) != 0; }
ALWAYS_INLINE bool anytrue() const { return (_mm256_testz_si256(m, m) == 0); }
ALWAYS_INLINE bool allfalse() const { return (_mm256_testz_si256(m, m) != 0); }
template<s32 i>
ALWAYS_INLINE GSVector8i insert8(s32 a) const