mirror of
https://github.com/steev/efikamx.git
synced 2026-02-04 05:36:22 +00:00
x11-libs/pixman: Bump and patch to latest git head
Use 0.21.6 as a base, and patch in git for NEON optimizations
This commit is contained in:
committed by
steev
parent
cfef46b0ea
commit
2a67d77463
23
x11-libs/pixman/Manifest
Normal file
23
x11-libs/pixman/Manifest
Normal file
@@ -0,0 +1,23 @@
|
||||
AUX 0002-Fix-compilation-on-Win32.patch 1373 RMD160 27ab9d8e5ee15ca0ca2316c2088488f77cc04193 SHA1 b60a844b6f97405d5974838126ce1a581a5578fb SHA256 c167e98ac16db2f09d11e71b6acfc2436ea06ad5f5e91d829463e8a2428c8f1d
|
||||
AUX 0003-test-Fix-tests-for-compilation-on-Windows.patch 5857 RMD160 c03eda4e6678e85da3d3aecb6f8af77bbbcba396 SHA1 78bc36587fabe4e5d984c1535d60bac1f27665bd SHA256 c17670b7a3603e3591e5f3264441b01dd1861c24681cea9bb63c1a4896471f09
|
||||
AUX 0004-test-Add-Makefile-for-Win32.patch 2285 RMD160 956800336268328f68cbf80fcf0f1e1a8254ed41 SHA1 9bf6452ad0982af23ead0ebfff1c3a46ab8a1454 SHA256 23b281492ad50c090c3ae3d501f92a039edcd35b4019ba60566394a9b9c99a41
|
||||
AUX 0005-Do-not-include-unused-headers.patch 1138 RMD160 e73bcdb3d39a3fe29a8d61fec12facbe0c15bb1f SHA1 77e320b1f0702e6b31214a7057c759f0cdec37fd SHA256 036fad75930a7a5981d0fe58749c1d1c7b066931d1bbcb7695ad8f45208c66e3
|
||||
AUX 0006-test-Silence-MSVC-warnings.patch 1879 RMD160 1ee25d5477740736c3bbb1c925f14fa45b9baea9 SHA1 17b823ed9bca1423ce3e7df6384820cff5b2c4f7 SHA256 dd835cb47e6f54c7295e181c8cab32924f5b7aa79be630b1dffd4987b04535a2
|
||||
AUX 0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch 18432 RMD160 45c3975ea38fdb4fe9ed927c60a020cf65c30726 SHA1 45023ba64a3c48d73d8d43b70dd38fa885b7ba7f SHA256 b96ae6c8bea2a900dd013f134f5223bf415fdc9f492f3854ee2b095451276857
|
||||
AUX 0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch 3637 RMD160 a8b2d0ddc050a521c1510b0f34c465e6e17d8b1d SHA1 f6b92ea26d7773cc826d63c175742194523b8480 SHA256 aa1354d2395925d53108269dc7f45ca4c16509318af794704c458339541d1ea8
|
||||
AUX 0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch 5981 RMD160 577a6b80b87d4687798e86bd9fe777a536ca9d76 SHA1 ffe4dfd7b3464bf6271ae869483124f4b8df7fd7 SHA256 ab8e918705c5d8bc24944a9b34f1a6d941d6f88cd16db4476566cb4bbf535039
|
||||
AUX 0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch 9330 RMD160 8601746564959d01c01993bd359c4b4db0beec8f SHA1 76b52d88701def10885f9f0592dea3b19707f6c1 SHA256 2da797eebe471eabca3da195ea295faf462c3f38330a11eee18e7247f7370477
|
||||
AUX 0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch 4741 RMD160 9054134e9c656a955c595bf004e51fc5652687fd SHA1 7c361a399fa4a20ca8e413b2cf0f29847d519cb1 SHA256 9c8fb1eb06e054fb0fdfece9c33e0b311a3949ba3550c4a95f5943a914e7a770
|
||||
AUX 0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch 1080 RMD160 6ca0c5e9597a765f03ac719357d9add04acdecea SHA1 6f74002a839afb6e5d91a8565776edfe19a29f07 SHA256 af280e15b33683841a7df486c8bbd21c9268958865652bdaa6389fdd3909a457
|
||||
AUX 0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch 2486 RMD160 cdb29a25ba6726bda75db4c2a37b29d628957085 SHA1 12465119abc6ae4aba91fb4ffa5e21c7d3044ad4 SHA256 88bc1c5118b1550f5b59a16a511da2675697f79bed863368d284e4a1b260f833
|
||||
AUX 0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch 4606 RMD160 2f826d65e0e3b80ad0fe3371137df128336c257f SHA1 75b051949e226dea0ac55d9a2618b688808793e8 SHA256 85d417a2160944b774dad489432b637a5662e0416a3919f095a93607772278ac
|
||||
AUX 0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch 2712 RMD160 8d335c0f1585ad9882fdef63c14328f36a2a7eac SHA1 f0f024b9248a85bc59a503a0088c24a9b97b0646 SHA256 bd97cb792274b8d6d498f07479a314b2ef1d9059b11acc3979871d38abb30ceb
|
||||
AUX 0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch 7188 RMD160 4fa7ee0d71533a47d699b78e8af9e83d60dae450 SHA1 7fa1dea003735971baf7f199a240c8eec6917f30 SHA256 6b99d26015f8953bf43cb3a7495a02efec4b807ce23166ccd5faca711acc2475
|
||||
AUX 0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch 8195 RMD160 4cff07d6bd52966a57148064f4a7a2a9da73838f SHA1 fdd5b62e8f33d0f3f245d86fb1567e26c829a051 SHA256 b17b03a4f7516de8bd803320310b26b2c09b694730ae4eddb5ae56a092da03bb
|
||||
AUX 0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch 6752 RMD160 53aaedec3527bd2a280b4b503504233306f92505 SHA1 db2606645ad5eb1bb0ec42b14116ef3aed0b9a3f SHA256 671c3c9d910f4ed8631149b31795879d35399679b8136847f6dbc94dda885a6d
|
||||
AUX 0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch 2083 RMD160 e4ba51ef2842f4cf42acbfc85dc2ecb09fabe655 SHA1 ca597361fc9da1d4b74a875c145af1cb7e4abb34 SHA256 929326bc1eca3e45a9a42dca9890e9a5422621e0587470a0eb28ed088ff097b3
|
||||
AUX 0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch 2063 RMD160 222b88cd6453fb4adc7f4c14b7b9d89a2c0e5c6d SHA1 f14961b189217cdb125d6ec8c002e8237c7f804a SHA256 ce0e82c68cef5fd9b4f7557ebb8c903bda7b09b496b04f6b7b229f42d7cd4bf0
|
||||
AUX 0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch 2058 RMD160 316bc2158c3dda9edef498582b316020e2a5e3ed SHA1 54d123fdb551a0dede24f5d53fa98e359d5d5d13 SHA256 c540fd8abdb2a23d445f53f0ff35905b27e4104fd4d9c9d59cce2d68d4970a58
|
||||
AUX 0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch 5971 RMD160 5a6956cbf719d928d1e6d5a4cd07f0ec22a62c41 SHA1 f04f6938337145bde6410ad8983e8a7e3749d9fa SHA256 5320bdbf0d1ddd3b753dedae61d5785b9db27ccdaeb49880508138c0e6113f57
|
||||
DIST pixman-0.21.6.tar.bz2 457580 RMD160 6ad5979d123e0268426c08954fd7f6040f7a3859 SHA1 73198f8f9159e3ffc2294806f32fa2c8042b57e6 SHA256 35a9fc00fc55c022318a7ac48eb52de60360beec36008b0037f944f3d0d62e83
|
||||
EBUILD pixman-0.21.6.ebuild 3157 RMD160 9188bc6dac468b30d4888a5a507d2c3145cb36c9 SHA1 a2b33238bc243e1e99b298e65d76482b2d32e73a SHA256 060cb6f797fa67bee354865a599f240150fe80373bab99f164dfd27d10aa76f2
|
||||
42
x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch
Normal file
42
x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch
Normal file
@@ -0,0 +1,42 @@
|
||||
From 20ed723a5a42fb8636bc9a5f32974dec1b66a785 Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Canciani <ranma42@gmail.com>
|
||||
Date: Thu, 24 Feb 2011 10:44:04 +0100
|
||||
Subject: [PATCH 02/22] Fix compilation on Win32
|
||||
|
||||
Makefile.win32 contained a typo and was missing the dependency from
|
||||
the built sources.
|
||||
---
|
||||
pixman/Makefile.win32 | 6 ++++--
|
||||
1 files changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
|
||||
index 775fb5e..b5f9397 100644
|
||||
--- a/pixman/Makefile.win32
|
||||
+++ b/pixman/Makefile.win32
|
||||
@@ -56,6 +56,8 @@ SOURCES = \
|
||||
pixman-general.c \
|
||||
$(NULL)
|
||||
|
||||
+BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
|
||||
+
|
||||
# MMX compilation flags
|
||||
ifeq ($(MMX_VAR),on)
|
||||
CFLAGS += $(MMX_CFLAGS)
|
||||
@@ -122,7 +124,7 @@ endif
|
||||
endif
|
||||
|
||||
# pixman compilation and linking
|
||||
-$(CFG_VAR)/%.obj: %.c
|
||||
+$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES)
|
||||
@mkdir -p $(CFG_VAR)
|
||||
@$(CC) -c $(CFLAGS) -Fo"$@" $<
|
||||
|
||||
@@ -141,4 +143,4 @@ pixman-combine64.h: pixman-combine.h.template make-combine.pl
|
||||
|
||||
clean_r:
|
||||
@rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0
|
||||
- @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c pixman-combine64.c pixman-combine64.h || exit 0
|
||||
+ @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,232 @@
|
||||
From 11305b4ecdd36a17592c5c75de9157874853ab20 Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Canciani <ranma42@gmail.com>
|
||||
Date: Tue, 22 Feb 2011 21:46:37 +0100
|
||||
Subject: [PATCH 03/22] test: Fix tests for compilation on Windows
|
||||
|
||||
The Microsoft C compiler cannot handle subobject initialization and
|
||||
Win32 does not provide snprintf.
|
||||
|
||||
Work around these limitations by using normal struct initialization
|
||||
and using sprintf (a manual check shows that the buffer size is
|
||||
sufficient).
|
||||
---
|
||||
test/composite.c | 29 +++++++++++++--------------
|
||||
test/fetch-test.c | 52 ++++++++++++++++++++++----------------------------
|
||||
test/trap-crasher.c | 20 +++++++++---------
|
||||
3 files changed, 47 insertions(+), 54 deletions(-)
|
||||
|
||||
diff --git a/test/composite.c b/test/composite.c
|
||||
index e14f954..08c6689 100644
|
||||
--- a/test/composite.c
|
||||
+++ b/test/composite.c
|
||||
@@ -617,18 +617,18 @@ eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
|
||||
}
|
||||
|
||||
static char *
|
||||
-describe_image (image_t *info, char *buf, int buflen)
|
||||
+describe_image (image_t *info, char *buf)
|
||||
{
|
||||
if (info->size)
|
||||
{
|
||||
- snprintf (buf, buflen, "%s %dx%d%s",
|
||||
- info->format->name,
|
||||
- info->size, info->size,
|
||||
- info->repeat ? "R" :"");
|
||||
+ sprintf (buf, "%s %dx%d%s",
|
||||
+ info->format->name,
|
||||
+ info->size, info->size,
|
||||
+ info->repeat ? "R" :"");
|
||||
}
|
||||
else
|
||||
{
|
||||
- snprintf (buf, buflen, "solid");
|
||||
+ sprintf (buf, "solid");
|
||||
}
|
||||
|
||||
return buf;
|
||||
@@ -710,10 +710,9 @@ composite_test (image_t *dst,
|
||||
{
|
||||
char buf[40];
|
||||
|
||||
- snprintf (buf, sizeof (buf),
|
||||
- "%s %scomposite",
|
||||
- op->name,
|
||||
- component_alpha ? "CA " : "");
|
||||
+ sprintf (buf, "%s %scomposite",
|
||||
+ op->name,
|
||||
+ component_alpha ? "CA " : "");
|
||||
|
||||
printf ("%s test error of %.4f --\n"
|
||||
" R G B A\n"
|
||||
@@ -735,9 +734,9 @@ composite_test (image_t *dst,
|
||||
mask->color->b, mask->color->a,
|
||||
dst->color->r, dst->color->g,
|
||||
dst->color->b, dst->color->a);
|
||||
- printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
|
||||
- printf ("mask: %s, ", describe_image (mask, buf, sizeof (buf)));
|
||||
- printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
|
||||
+ printf ("src: %s, ", describe_image (src, buf));
|
||||
+ printf ("mask: %s, ", describe_image (mask, buf));
|
||||
+ printf ("dst: %s\n\n", describe_image (dst, buf));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -747,8 +746,8 @@ composite_test (image_t *dst,
|
||||
src->color->b, src->color->a,
|
||||
dst->color->r, dst->color->g,
|
||||
dst->color->b, dst->color->a);
|
||||
- printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
|
||||
- printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
|
||||
+ printf ("src: %s, ", describe_image (src, buf));
|
||||
+ printf ("dst: %s\n\n", describe_image (dst, buf));
|
||||
}
|
||||
|
||||
success = FALSE;
|
||||
diff --git a/test/fetch-test.c b/test/fetch-test.c
|
||||
index 2ca16dd..314a072 100644
|
||||
--- a/test/fetch-test.c
|
||||
+++ b/test/fetch-test.c
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
static pixman_indexed_t mono_palette =
|
||||
{
|
||||
- .rgba = { 0x00000000, 0x00ffffff },
|
||||
+ 0, { 0x00000000, 0x00ffffff },
|
||||
};
|
||||
|
||||
|
||||
@@ -24,57 +24,53 @@ typedef struct {
|
||||
static testcase_t testcases[] =
|
||||
{
|
||||
{
|
||||
- .format = PIXMAN_a8r8g8b8,
|
||||
- .width = 2, .height = 2,
|
||||
- .stride = 8,
|
||||
- .src = { 0x00112233, 0x44556677,
|
||||
- 0x8899aabb, 0xccddeeff },
|
||||
- .dst = { 0x00112233, 0x44556677,
|
||||
- 0x8899aabb, 0xccddeeff },
|
||||
- .indexed = NULL,
|
||||
+ PIXMAN_a8r8g8b8,
|
||||
+ 2, 2,
|
||||
+ 8,
|
||||
+ { 0x00112233, 0x44556677,
|
||||
+ 0x8899aabb, 0xccddeeff },
|
||||
+ { 0x00112233, 0x44556677,
|
||||
+ 0x8899aabb, 0xccddeeff },
|
||||
+ NULL,
|
||||
},
|
||||
{
|
||||
- .format = PIXMAN_g1,
|
||||
- .width = 8, .height = 2,
|
||||
- .stride = 4,
|
||||
+ PIXMAN_g1,
|
||||
+ 8, 2,
|
||||
+ 4,
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
- .src =
|
||||
{
|
||||
0xaa000000,
|
||||
0x55000000
|
||||
},
|
||||
#else
|
||||
- .src =
|
||||
{
|
||||
0x00000055,
|
||||
0x000000aa
|
||||
},
|
||||
#endif
|
||||
- .dst =
|
||||
{
|
||||
0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
|
||||
0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
|
||||
},
|
||||
- .indexed = &mono_palette,
|
||||
+ &mono_palette,
|
||||
},
|
||||
#if 0
|
||||
{
|
||||
- .format = PIXMAN_g8,
|
||||
- .width = 4, .height = 2,
|
||||
- .stride = 4,
|
||||
- .src = { 0x01234567,
|
||||
- 0x89abcdef },
|
||||
- .dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
|
||||
- 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
|
||||
+ PIXMAN_g8,
|
||||
+ 4, 2,
|
||||
+ 4,
|
||||
+ { 0x01234567,
|
||||
+ 0x89abcdef },
|
||||
+ { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
|
||||
+ 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
|
||||
},
|
||||
#endif
|
||||
/* FIXME: make this work on big endian */
|
||||
{
|
||||
- .format = PIXMAN_yv12,
|
||||
- .width = 8, .height = 2,
|
||||
- .stride = 8,
|
||||
+ PIXMAN_yv12,
|
||||
+ 8, 2,
|
||||
+ 8,
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
- .src =
|
||||
{
|
||||
0x00ff00ff, 0x00ff00ff,
|
||||
0xff00ff00, 0xff00ff00,
|
||||
@@ -82,7 +78,6 @@ static testcase_t testcases[] =
|
||||
0x800080ff
|
||||
},
|
||||
#else
|
||||
- .src =
|
||||
{
|
||||
0xff00ff00, 0xff00ff00,
|
||||
0x00ff00ff, 0x00ff00ff,
|
||||
@@ -90,7 +85,6 @@ static testcase_t testcases[] =
|
||||
0xff800080
|
||||
},
|
||||
#endif
|
||||
- .dst =
|
||||
{
|
||||
0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
|
||||
0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
|
||||
diff --git a/test/trap-crasher.c b/test/trap-crasher.c
|
||||
index 42b82f6..7485e62 100644
|
||||
--- a/test/trap-crasher.c
|
||||
+++ b/test/trap-crasher.c
|
||||
@@ -7,21 +7,21 @@ main()
|
||||
pixman_image_t *dst;
|
||||
pixman_trapezoid_t traps[1] = {
|
||||
{
|
||||
- .top = 2147483646,
|
||||
- .bottom = 2147483647,
|
||||
- .left = {
|
||||
- .p1 = { .x = 0, .y = 0 },
|
||||
- .p2 = { .x = 0, .y = 2147483647 }
|
||||
+ 2147483646,
|
||||
+ 2147483647,
|
||||
+ {
|
||||
+ { 0, 0 },
|
||||
+ { 0, 2147483647 }
|
||||
},
|
||||
- .right = {
|
||||
- .p1 = { .x = 65536, .y = 0 },
|
||||
- .p2 = { .x = 0, .y = 2147483647 }
|
||||
+ {
|
||||
+ { 65536, 0 },
|
||||
+ { 0, 2147483647 }
|
||||
}
|
||||
},
|
||||
};
|
||||
-
|
||||
+
|
||||
dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1);
|
||||
-
|
||||
+
|
||||
pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps);
|
||||
return (0);
|
||||
}
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
92
x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch
Normal file
92
x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch
Normal file
@@ -0,0 +1,92 @@
|
||||
From 72f5e5f608506c18c484bc5bc3e58bd83aeb7691 Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Canciani <ranma42@gmail.com>
|
||||
Date: Tue, 22 Feb 2011 22:04:49 +0100
|
||||
Subject: [PATCH 04/22] test: Add Makefile for Win32
|
||||
|
||||
---
|
||||
test/Makefile.win32 | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 files changed, 73 insertions(+), 0 deletions(-)
|
||||
create mode 100644 test/Makefile.win32
|
||||
|
||||
diff --git a/test/Makefile.win32 b/test/Makefile.win32
|
||||
new file mode 100644
|
||||
index 0000000..c71afe1
|
||||
--- /dev/null
|
||||
+++ b/test/Makefile.win32
|
||||
@@ -0,0 +1,73 @@
|
||||
+CC = cl
|
||||
+LINK = link
|
||||
+
|
||||
+CFG_VAR = $(CFG)
|
||||
+ifeq ($(CFG_VAR),)
|
||||
+CFG_VAR=release
|
||||
+endif
|
||||
+
|
||||
+CFLAGS = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -D_BIND_TO_CURRENT_VCLIBS_VERSION -D_MT -I../pixman -I. -I../
|
||||
+TEST_LDADD = ../pixman/$(CFG_VAR)/pixman-1.lib
|
||||
+INCLUDES = -I../pixman -I$(top_builddir)/pixman
|
||||
+
|
||||
+# optimization flags
|
||||
+ifeq ($(CFG_VAR),debug)
|
||||
+CFLAGS += -Od -Zi
|
||||
+else
|
||||
+CFLAGS += -O2
|
||||
+endif
|
||||
+
|
||||
+SOURCES = \
|
||||
+ a1-trap-test.c \
|
||||
+ pdf-op-test.c \
|
||||
+ region-test.c \
|
||||
+ region-translate-test.c \
|
||||
+ fetch-test.c \
|
||||
+ oob-test.c \
|
||||
+ trap-crasher.c \
|
||||
+ alpha-loop.c \
|
||||
+ scaling-crash-test.c \
|
||||
+ gradient-crash-test.c \
|
||||
+ alphamap.c \
|
||||
+ stress-test.c \
|
||||
+ composite-traps-test.c \
|
||||
+ blitters-test.c \
|
||||
+ scaling-test.c \
|
||||
+ affine-test.c \
|
||||
+ composite.c \
|
||||
+ utils.c
|
||||
+
|
||||
+TESTS = \
|
||||
+ $(CFG_VAR)/a1-trap-test.exe \
|
||||
+ $(CFG_VAR)/pdf-op-test.exe \
|
||||
+ $(CFG_VAR)/region-test.exe \
|
||||
+ $(CFG_VAR)/region-translate-test.exe \
|
||||
+ $(CFG_VAR)/fetch-test.exe \
|
||||
+ $(CFG_VAR)/oob-test.exe \
|
||||
+ $(CFG_VAR)/trap-crasher.exe \
|
||||
+ $(CFG_VAR)/alpha-loop.exe \
|
||||
+ $(CFG_VAR)/scaling-crash-test.exe \
|
||||
+ $(CFG_VAR)/gradient-crash-test.exe \
|
||||
+ $(CFG_VAR)/alphamap.exe \
|
||||
+ $(CFG_VAR)/stress-test.exe \
|
||||
+ $(CFG_VAR)/composite-traps-test.exe \
|
||||
+ $(CFG_VAR)/blitters-test.exe \
|
||||
+ $(CFG_VAR)/scaling-test.exe \
|
||||
+ $(CFG_VAR)/affine-test.exe \
|
||||
+ $(CFG_VAR)/composite.exe
|
||||
+
|
||||
+
|
||||
+OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES))
|
||||
+
|
||||
+$(CFG_VAR)/%.obj: %.c
|
||||
+ @mkdir -p $(CFG_VAR)
|
||||
+ @$(CC) -c $(CFLAGS) -Fo"$@" $<
|
||||
+
|
||||
+$(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj
|
||||
+ $(LINK) /NOLOGO /OUT:$@ $< $(CFG_VAR)/utils.obj $(TEST_LDADD)
|
||||
+
|
||||
+all: $(OBJECTS) $(TESTS)
|
||||
+ @exit 0
|
||||
+
|
||||
+clean:
|
||||
+ @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb || exit 0
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
From 8868778ea1fdc8e70da76b3b00ea78106c5840d8 Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Canciani <ranma42@gmail.com>
|
||||
Date: Tue, 22 Feb 2011 22:43:48 +0100
|
||||
Subject: [PATCH 05/22] Do not include unused headers
|
||||
|
||||
pixman-combine32.h is included without being used both in
|
||||
pixman-image.c and in pixman-general.c.
|
||||
---
|
||||
pixman/pixman-general.c | 2 --
|
||||
pixman/pixman-image.c | 1 -
|
||||
2 files changed, 0 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
|
||||
index 16ea3a4..872fb7e 100644
|
||||
--- a/pixman/pixman-general.c
|
||||
+++ b/pixman/pixman-general.c
|
||||
@@ -36,8 +36,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pixman-private.h"
|
||||
-#include "pixman-combine32.h"
|
||||
-#include "pixman-private.h"
|
||||
|
||||
static void
|
||||
general_src_iter_init (pixman_implementation_t *imp,
|
||||
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
|
||||
index 9103ca6..84bacf8 100644
|
||||
--- a/pixman/pixman-image.c
|
||||
+++ b/pixman/pixman-image.c
|
||||
@@ -30,7 +30,6 @@
|
||||
#include <assert.h>
|
||||
|
||||
#include "pixman-private.h"
|
||||
-#include "pixman-combine32.h"
|
||||
|
||||
pixman_bool_t
|
||||
_pixman_init_gradient (gradient_t * gradient,
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
63
x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch
Normal file
63
x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch
Normal file
@@ -0,0 +1,63 @@
|
||||
From 9ebde285fa990bfa1524f166fbfb1368c346b14a Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Canciani <ranma42@gmail.com>
|
||||
Date: Thu, 24 Feb 2011 12:53:39 +0100
|
||||
Subject: [PATCH 06/22] test: Silence MSVC warnings
|
||||
|
||||
MSVC does not notice non-returning functions (abort() / assert(0))
|
||||
and warns about paths which end with them in non-void functions:
|
||||
|
||||
c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) :
|
||||
warning C4715: 'reader' : not all control paths return a value
|
||||
c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) :
|
||||
warning C4715: 'real_reader' : not all control paths return a value
|
||||
c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) :
|
||||
warning C4715: 'calc_op' : not all control paths return a value
|
||||
|
||||
These warnings can be silenced by adding a return after the
|
||||
termination call.
|
||||
---
|
||||
test/composite.c | 1 +
|
||||
test/fetch-test.c | 1 +
|
||||
test/stress-test.c | 2 +-
|
||||
3 files changed, 3 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/test/composite.c b/test/composite.c
|
||||
index 08c6689..a86e5ed 100644
|
||||
--- a/test/composite.c
|
||||
+++ b/test/composite.c
|
||||
@@ -426,6 +426,7 @@ calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
|
||||
case PIXMAN_OP_HSL_LUMINOSITY:
|
||||
default:
|
||||
abort();
|
||||
+ return 0; /* silence MSVC */
|
||||
}
|
||||
#undef mult_chan
|
||||
}
|
||||
diff --git a/test/fetch-test.c b/test/fetch-test.c
|
||||
index 314a072..60bc765 100644
|
||||
--- a/test/fetch-test.c
|
||||
+++ b/test/fetch-test.c
|
||||
@@ -110,6 +110,7 @@ reader (const void *src, int size)
|
||||
return *(uint32_t *)src;
|
||||
default:
|
||||
assert(0);
|
||||
+ return 0; /* silence MSVC */
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/test/stress-test.c b/test/stress-test.c
|
||||
index bcbc1f8..166dc6d 100644
|
||||
--- a/test/stress-test.c
|
||||
+++ b/test/stress-test.c
|
||||
@@ -128,7 +128,7 @@ real_reader (const void *src, int size)
|
||||
return *(uint32_t *)src;
|
||||
default:
|
||||
assert (0);
|
||||
- break;
|
||||
+ return 0; /* silence MSVC */
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,466 @@
|
||||
From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Mon, 21 Feb 2011 01:29:02 +0200
|
||||
Subject: [PATCH 07/22] Main loop template for fast single pass bilinear scaling
|
||||
|
||||
Can be used for implementing SIMD optimized fast path
|
||||
functions which work with bilinear scaled source images.
|
||||
|
||||
Similar to the template for nearest scaling main loop, the
|
||||
following types of mask are supported:
|
||||
1. no mask
|
||||
2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
|
||||
3. solid mask
|
||||
|
||||
PAD repeat is fully supported. NONE repeat is partially
|
||||
supported (right now only works if source image has alpha
|
||||
channel or when alpha channel of the source image does not
|
||||
have any effect on the compositing operation).
|
||||
---
|
||||
pixman/pixman-fast-path.h | 432 +++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 files changed, 432 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
|
||||
index d081222..1885d47 100644
|
||||
--- a/pixman/pixman-fast-path.h
|
||||
+++ b/pixman/pixman-fast-path.h
|
||||
@@ -587,4 +587,436 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
|
||||
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
|
||||
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
|
||||
|
||||
+/*****************************************************************************/
|
||||
+
|
||||
+/*
|
||||
+ * Identify 5 zones in each scanline for bilinear scaling. Depending on
|
||||
+ * whether 2 pixels to be interpolated are fetched from the image itself,
|
||||
+ * from the padding area around it or from both image and padding area.
|
||||
+ */
|
||||
+static force_inline void
|
||||
+bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
|
||||
+ pixman_fixed_t vx,
|
||||
+ pixman_fixed_t unit_x,
|
||||
+ int32_t * left_pad,
|
||||
+ int32_t * left_tz,
|
||||
+ int32_t * width,
|
||||
+ int32_t * right_tz,
|
||||
+ int32_t * right_pad)
|
||||
+{
|
||||
+ int width1 = *width, left_pad1, right_pad1;
|
||||
+ int width2 = *width, left_pad2, right_pad2;
|
||||
+
|
||||
+ pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
|
||||
+ &width1, &left_pad1, &right_pad1);
|
||||
+ pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
|
||||
+ unit_x, &width2, &left_pad2, &right_pad2);
|
||||
+
|
||||
+ *left_pad = left_pad2;
|
||||
+ *left_tz = left_pad1 - left_pad2;
|
||||
+ *right_tz = right_pad2 - right_pad1;
|
||||
+ *right_pad = right_pad1;
|
||||
+ *width -= *left_pad + *left_tz + *right_tz + *right_pad;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Main loop template for single pass bilinear scaling. It needs to be
|
||||
+ * provided with 'scanline_func' which should do the compositing operation.
|
||||
+ * The needed function has the following prototype:
|
||||
+ *
|
||||
+ * scanline_func (dst_type_t * dst,
|
||||
+ * const mask_type_ * mask,
|
||||
+ * const src_type_t * src_top,
|
||||
+ * const src_type_t * src_bottom,
|
||||
+ * int32_t width,
|
||||
+ * int weight_top,
|
||||
+ * int weight_bottom,
|
||||
+ * pixman_fixed_t vx,
|
||||
+ * pixman_fixed_t unit_x,
|
||||
+ * pixman_fixed_t max_vx,
|
||||
+ * pixman_bool_t zero_src)
|
||||
+ *
|
||||
+ * Where:
|
||||
+ * dst - destination scanline buffer for storing results
|
||||
+ * mask - mask buffer (or single value for solid mask)
|
||||
+ * src_top, src_bottom - two source scanlines
|
||||
+ * width - number of pixels to process
|
||||
+ * weight_top - weight of the top row for interpolation
|
||||
+ * weight_bottom - weight of the bottom row for interpolation
|
||||
+ * vx - initial position for fetching the first pair of
|
||||
+ * pixels from the source buffer
|
||||
+ * unit_x - position increment needed to move to the next pair
|
||||
+ * of pixels
|
||||
+ * max_vx - image size as a fixed point value, can be used for
|
||||
+ * implementing NORMAL repeat (when it is supported)
|
||||
+ * zero_src - boolean hint variable, which is set to TRUE when
|
||||
+ * all source pixels are fetched from zero padding
|
||||
+ * zone for NONE repeat
|
||||
+ *
|
||||
+ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
|
||||
+ * but sometimes it may be less than that for NONE repeat when handling
|
||||
+ * fuzzy antialiased top or bottom image edges. Also both top and
|
||||
+ * bottom weight variables are guaranteed to have value in 0-255
|
||||
+ * range and can fit into unsigned byte or be used with 8-bit SIMD
|
||||
+ * multiplication instructions.
|
||||
+ */
|
||||
+#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
|
||||
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
|
||||
+static void \
|
||||
+fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
|
||||
+ pixman_op_t op, \
|
||||
+ pixman_image_t * src_image, \
|
||||
+ pixman_image_t * mask_image, \
|
||||
+ pixman_image_t * dst_image, \
|
||||
+ int32_t src_x, \
|
||||
+ int32_t src_y, \
|
||||
+ int32_t mask_x, \
|
||||
+ int32_t mask_y, \
|
||||
+ int32_t dst_x, \
|
||||
+ int32_t dst_y, \
|
||||
+ int32_t width, \
|
||||
+ int32_t height) \
|
||||
+{ \
|
||||
+ dst_type_t *dst_line; \
|
||||
+ mask_type_t *mask_line; \
|
||||
+ src_type_t *src_first_line; \
|
||||
+ int y1, y2; \
|
||||
+ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
|
||||
+ pixman_vector_t v; \
|
||||
+ pixman_fixed_t vx, vy; \
|
||||
+ pixman_fixed_t unit_x, unit_y; \
|
||||
+ int32_t left_pad, left_tz, right_tz, right_pad; \
|
||||
+ \
|
||||
+ dst_type_t *dst; \
|
||||
+ mask_type_t solid_mask; \
|
||||
+ const mask_type_t *mask = &solid_mask; \
|
||||
+ int src_stride, mask_stride, dst_stride; \
|
||||
+ \
|
||||
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
|
||||
+ if (have_mask) \
|
||||
+ { \
|
||||
+ if (mask_is_solid) \
|
||||
+ { \
|
||||
+ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
|
||||
+ mask_stride = 0; \
|
||||
+ } \
|
||||
+ else \
|
||||
+ { \
|
||||
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
|
||||
+ mask_stride, mask_line, 1); \
|
||||
+ } \
|
||||
+ } \
|
||||
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
|
||||
+ * transformed from destination space to source space */ \
|
||||
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
|
||||
+ \
|
||||
+ /* reference point is the center of the pixel */ \
|
||||
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
|
||||
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
|
||||
+ v.vector[2] = pixman_fixed_1; \
|
||||
+ \
|
||||
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
|
||||
+ return; \
|
||||
+ \
|
||||
+ unit_x = src_image->common.transform->matrix[0][0]; \
|
||||
+ unit_y = src_image->common.transform->matrix[1][1]; \
|
||||
+ \
|
||||
+ v.vector[0] -= pixman_fixed_1 / 2; \
|
||||
+ v.vector[1] -= pixman_fixed_1 / 2; \
|
||||
+ \
|
||||
+ vy = v.vector[1]; \
|
||||
+ \
|
||||
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
|
||||
+ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
|
||||
+ { \
|
||||
+ bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
|
||||
+ &left_pad, &left_tz, &width, &right_tz, &right_pad); \
|
||||
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
|
||||
+ { \
|
||||
+ /* PAD repeat does not need special handling for 'transition zones' and */ \
|
||||
+ /* they can be combined with 'padding zones' safely */ \
|
||||
+ left_pad += left_tz; \
|
||||
+ right_pad += right_tz; \
|
||||
+ left_tz = right_tz = 0; \
|
||||
+ } \
|
||||
+ v.vector[0] += left_pad * unit_x; \
|
||||
+ } \
|
||||
+ \
|
||||
+ while (--height >= 0) \
|
||||
+ { \
|
||||
+ int weight1, weight2; \
|
||||
+ dst = dst_line; \
|
||||
+ dst_line += dst_stride; \
|
||||
+ vx = v.vector[0]; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ { \
|
||||
+ mask = mask_line; \
|
||||
+ mask_line += mask_stride; \
|
||||
+ } \
|
||||
+ \
|
||||
+ y1 = pixman_fixed_to_int (vy); \
|
||||
+ weight2 = (vy >> 8) & 0xff; \
|
||||
+ if (weight2) \
|
||||
+ { \
|
||||
+ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
|
||||
+ y2 = y1 + 1; \
|
||||
+ weight1 = 256 - weight2; \
|
||||
+ } \
|
||||
+ else \
|
||||
+ { \
|
||||
+ /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
|
||||
+ y2 = y1; \
|
||||
+ weight1 = weight2 = 128; \
|
||||
+ } \
|
||||
+ vy += unit_y; \
|
||||
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
|
||||
+ { \
|
||||
+ src_type_t *src1, *src2; \
|
||||
+ src_type_t buf1[2]; \
|
||||
+ src_type_t buf2[2]; \
|
||||
+ repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
|
||||
+ repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
|
||||
+ src1 = src_first_line + src_stride * y1; \
|
||||
+ src2 = src_first_line + src_stride * y2; \
|
||||
+ \
|
||||
+ if (left_pad > 0) \
|
||||
+ { \
|
||||
+ buf1[0] = buf1[1] = src1[0]; \
|
||||
+ buf2[0] = buf2[1] = src2[0]; \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
|
||||
+ dst += left_pad; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ mask += left_pad; \
|
||||
+ } \
|
||||
+ if (width > 0) \
|
||||
+ { \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
|
||||
+ dst += width; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ mask += width; \
|
||||
+ } \
|
||||
+ if (right_pad > 0) \
|
||||
+ { \
|
||||
+ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
|
||||
+ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
|
||||
+ } \
|
||||
+ } \
|
||||
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
|
||||
+ { \
|
||||
+ src_type_t *src1, *src2; \
|
||||
+ src_type_t buf1[2]; \
|
||||
+ src_type_t buf2[2]; \
|
||||
+ /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
|
||||
+ if (y1 < 0) \
|
||||
+ { \
|
||||
+ weight1 = 0; \
|
||||
+ y1 = 0; \
|
||||
+ } \
|
||||
+ if (y1 >= src_image->bits.height) \
|
||||
+ { \
|
||||
+ weight1 = 0; \
|
||||
+ y1 = src_image->bits.height - 1; \
|
||||
+ } \
|
||||
+ if (y2 < 0) \
|
||||
+ { \
|
||||
+ weight2 = 0; \
|
||||
+ y2 = 0; \
|
||||
+ } \
|
||||
+ if (y2 >= src_image->bits.height) \
|
||||
+ { \
|
||||
+ weight2 = 0; \
|
||||
+ y2 = src_image->bits.height - 1; \
|
||||
+ } \
|
||||
+ src1 = src_first_line + src_stride * y1; \
|
||||
+ src2 = src_first_line + src_stride * y2; \
|
||||
+ \
|
||||
+ if (left_pad > 0) \
|
||||
+ { \
|
||||
+ buf1[0] = buf1[1] = 0; \
|
||||
+ buf2[0] = buf2[1] = 0; \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
|
||||
+ dst += left_pad; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ mask += left_pad; \
|
||||
+ } \
|
||||
+ if (left_tz > 0) \
|
||||
+ { \
|
||||
+ buf1[0] = 0; \
|
||||
+ buf1[1] = src1[0]; \
|
||||
+ buf2[0] = 0; \
|
||||
+ buf2[1] = src2[0]; \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ buf1, buf2, left_tz, weight1, weight2, \
|
||||
+ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
|
||||
+ dst += left_tz; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ mask += left_tz; \
|
||||
+ vx += left_tz * unit_x; \
|
||||
+ } \
|
||||
+ if (width > 0) \
|
||||
+ { \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
|
||||
+ dst += width; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ mask += width; \
|
||||
+ vx += width * unit_x; \
|
||||
+ } \
|
||||
+ if (right_tz > 0) \
|
||||
+ { \
|
||||
+ buf1[0] = src1[src_image->bits.width - 1]; \
|
||||
+ buf1[1] = 0; \
|
||||
+ buf2[0] = src2[src_image->bits.width - 1]; \
|
||||
+ buf2[1] = 0; \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ buf1, buf2, right_tz, weight1, weight2, \
|
||||
+ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
|
||||
+ dst += right_tz; \
|
||||
+ if (have_mask && !mask_is_solid) \
|
||||
+ mask += right_tz; \
|
||||
+ } \
|
||||
+ if (right_pad > 0) \
|
||||
+ { \
|
||||
+ buf1[0] = buf1[1] = 0; \
|
||||
+ buf2[0] = buf2[1] = 0; \
|
||||
+ scanline_func (dst, mask, \
|
||||
+ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
|
||||
+ } \
|
||||
+ } \
|
||||
+ else \
|
||||
+ { \
|
||||
+ scanline_func (dst, mask, src_first_line + src_stride * y1, \
|
||||
+ src_first_line + src_stride * y2, width, \
|
||||
+ weight1, weight2, vx, unit_x, max_vx, FALSE); \
|
||||
+ } \
|
||||
+ } \
|
||||
+}
|
||||
+
|
||||
+/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
|
||||
+#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
|
||||
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
|
||||
+ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
|
||||
+ dst_type_t, repeat_mode, have_mask, mask_is_solid)
|
||||
+
|
||||
+#define SCALED_BILINEAR_FLAGS \
|
||||
+ (FAST_PATH_SCALE_TRANSFORM | \
|
||||
+ FAST_PATH_NO_ALPHA_MAP | \
|
||||
+ FAST_PATH_BILINEAR_FILTER | \
|
||||
+ FAST_PATH_NO_ACCESSORS | \
|
||||
+ FAST_PATH_NARROW_FORMAT)
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ (SCALED_BILINEAR_FLAGS | \
|
||||
+ FAST_PATH_PAD_REPEAT | \
|
||||
+ FAST_PATH_X_UNIT_POSITIVE), \
|
||||
+ PIXMAN_null, 0, \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ (SCALED_BILINEAR_FLAGS | \
|
||||
+ FAST_PATH_NONE_REPEAT | \
|
||||
+ FAST_PATH_X_UNIT_POSITIVE), \
|
||||
+ PIXMAN_null, 0, \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
|
||||
+ PIXMAN_null, 0, \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ (SCALED_BILINEAR_FLAGS | \
|
||||
+ FAST_PATH_PAD_REPEAT | \
|
||||
+ FAST_PATH_X_UNIT_POSITIVE), \
|
||||
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ (SCALED_BILINEAR_FLAGS | \
|
||||
+ FAST_PATH_NONE_REPEAT | \
|
||||
+ FAST_PATH_X_UNIT_POSITIVE), \
|
||||
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
|
||||
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ (SCALED_BILINEAR_FLAGS | \
|
||||
+ FAST_PATH_PAD_REPEAT | \
|
||||
+ FAST_PATH_X_UNIT_POSITIVE), \
|
||||
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ (SCALED_BILINEAR_FLAGS | \
|
||||
+ FAST_PATH_NONE_REPEAT | \
|
||||
+ FAST_PATH_X_UNIT_POSITIVE), \
|
||||
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
|
||||
+ { PIXMAN_OP_ ## op, \
|
||||
+ PIXMAN_ ## s, \
|
||||
+ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
|
||||
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
|
||||
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
|
||||
+ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
|
||||
+ }
|
||||
+
|
||||
+/* Prefer the use of 'cover' variant, because it is faster */
|
||||
+#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
|
||||
+ SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
|
||||
+ SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
|
||||
+ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
|
||||
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
|
||||
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
|
||||
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
|
||||
+
|
||||
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
|
||||
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
|
||||
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
|
||||
+ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
|
||||
+
|
||||
#endif
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
From 0df43b8ae5031dd83775d00b57b6bed809db0e89 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Mon, 21 Feb 2011 02:07:09 +0200
|
||||
Subject: [PATCH 08/22] test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds'
|
||||
|
||||
Individual correctness check for the new bilinear scaling related
|
||||
supplementary function. This test program uses a bit wider range
|
||||
of input arguments, not covered by other tests.
|
||||
---
|
||||
test/Makefile.am | 2 +
|
||||
test/scaling-helpers-test.c | 93 +++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 95 insertions(+), 0 deletions(-)
|
||||
create mode 100644 test/scaling-helpers-test.c
|
||||
|
||||
diff --git a/test/Makefile.am b/test/Makefile.am
|
||||
index 057e9ce..9dc7219 100644
|
||||
--- a/test/Makefile.am
|
||||
+++ b/test/Makefile.am
|
||||
@@ -13,6 +13,7 @@ TESTPROGRAMS = \
|
||||
trap-crasher \
|
||||
alpha-loop \
|
||||
scaling-crash-test \
|
||||
+ scaling-helpers-test \
|
||||
gradient-crash-test \
|
||||
alphamap \
|
||||
stress-test \
|
||||
@@ -33,6 +34,7 @@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
|
||||
composite_SOURCES = composite.c utils.c utils.h
|
||||
gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
|
||||
stress_test_SOURCES = stress-test.c utils.c utils.h
|
||||
+scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h
|
||||
|
||||
# Benchmarks
|
||||
|
||||
diff --git a/test/scaling-helpers-test.c b/test/scaling-helpers-test.c
|
||||
new file mode 100644
|
||||
index 0000000..c186138
|
||||
--- /dev/null
|
||||
+++ b/test/scaling-helpers-test.c
|
||||
@@ -0,0 +1,93 @@
|
||||
+#include <config.h>
|
||||
+#include <stdint.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <stdio.h>
|
||||
+#include <assert.h>
|
||||
+#include "utils.h"
|
||||
+#include "pixman-fast-path.h"
|
||||
+
|
||||
+/* A trivial reference implementation for
|
||||
+ * 'bilinear_pad_repeat_get_scanline_bounds'
|
||||
+ */
|
||||
+static void
|
||||
+bilinear_pad_repeat_get_scanline_bounds_ref (int32_t source_image_width,
|
||||
+ pixman_fixed_t vx_,
|
||||
+ pixman_fixed_t unit_x,
|
||||
+ int32_t * left_pad,
|
||||
+ int32_t * left_tz,
|
||||
+ int32_t * width,
|
||||
+ int32_t * right_tz,
|
||||
+ int32_t * right_pad)
|
||||
+{
|
||||
+ int w = *width;
|
||||
+ *left_pad = 0;
|
||||
+ *left_tz = 0;
|
||||
+ *width = 0;
|
||||
+ *right_tz = 0;
|
||||
+ *right_pad = 0;
|
||||
+ int64_t vx = vx_;
|
||||
+ while (--w >= 0)
|
||||
+ {
|
||||
+ if (vx < 0)
|
||||
+ {
|
||||
+ if (vx + pixman_fixed_1 < 0)
|
||||
+ *left_pad += 1;
|
||||
+ else
|
||||
+ *left_tz += 1;
|
||||
+ }
|
||||
+ else if (vx + pixman_fixed_1 >= pixman_int_to_fixed (source_image_width))
|
||||
+ {
|
||||
+ if (vx >= pixman_int_to_fixed (source_image_width))
|
||||
+ *right_pad += 1;
|
||||
+ else
|
||||
+ *right_tz += 1;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ *width += 1;
|
||||
+ }
|
||||
+ vx += unit_x;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+main (void)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < 10000; i++)
|
||||
+ {
|
||||
+ int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1;
|
||||
+ int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2;
|
||||
+ pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16);
|
||||
+ int32_t width = lcg_rand_N(10000);
|
||||
+ int32_t source_image_width = lcg_rand_N(10000) + 1;
|
||||
+ pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1;
|
||||
+ width1 = width2 = width;
|
||||
+
|
||||
+ bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width,
|
||||
+ vx,
|
||||
+ unit_x,
|
||||
+ &left_pad1,
|
||||
+ &left_tz1,
|
||||
+ &width1,
|
||||
+ &right_tz1,
|
||||
+ &right_pad1);
|
||||
+
|
||||
+ bilinear_pad_repeat_get_scanline_bounds (source_image_width,
|
||||
+ vx,
|
||||
+ unit_x,
|
||||
+ &left_pad2,
|
||||
+ &left_tz2,
|
||||
+ &width2,
|
||||
+ &right_tz2,
|
||||
+ &right_pad2);
|
||||
+
|
||||
+ assert (left_pad1 == left_pad2);
|
||||
+ assert (left_tz1 == left_tz2);
|
||||
+ assert (width1 == width2);
|
||||
+ assert (right_tz1 == right_tz2);
|
||||
+ assert (right_pad1 == right_pad2);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
From 350029396d911941591149cc82b5e68a78ad6747 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Mon, 21 Feb 2011 20:18:02 +0200
|
||||
Subject: [PATCH 09/22] SSE2 optimization for bilinear scaled 'src_8888_8888'
|
||||
|
||||
A primitive naive implementation of bilinear scaling using SSE2 intrinsics,
|
||||
which only handles one pixel at a time. It is approximately 2x faster than
|
||||
pixman general compositing path. Single pass processing without intermediate
|
||||
temporary buffer contributes to ~15% and loop unrolling contributes to ~20%
|
||||
of this speedup.
|
||||
|
||||
Benchmark on Intel Core i7 (x86-64):
|
||||
Using cairo-perf-trace:
|
||||
before: image firefox-planet-gnome 12.566 12.610 0.23% 6/6
|
||||
after: image firefox-planet-gnome 10.961 11.013 0.19% 5/6
|
||||
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
|
||||
after: op=1, src=20028888, dst=20028888, speed=165.38 MPix/s
|
||||
---
|
||||
pixman/pixman-sse2.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 files changed, 112 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
|
||||
index 88287b4..696005f 100644
|
||||
--- a/pixman/pixman-sse2.c
|
||||
+++ b/pixman/pixman-sse2.c
|
||||
@@ -5567,6 +5567,114 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
|
||||
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
|
||||
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
|
||||
|
||||
+static void
|
||||
+bilinear_interpolate_line_sse2 (uint32_t * out,
|
||||
+ const uint32_t * top,
|
||||
+ const uint32_t * bottom,
|
||||
+ int wt,
|
||||
+ int wb,
|
||||
+ pixman_fixed_t x,
|
||||
+ pixman_fixed_t ux,
|
||||
+ int width)
|
||||
+{
|
||||
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
|
||||
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
|
||||
+ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
|
||||
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
|
||||
+ const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
|
||||
+ const __m128i xmm_zero = _mm_setzero_si128 ();
|
||||
+ __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
|
||||
+ uint32_t pix1, pix2, pix3, pix4;
|
||||
+
|
||||
+ #define INTERPOLATE_ONE_PIXEL(pix) \
|
||||
+ do { \
|
||||
+ __m128i xmm_wh, xmm_lo, xmm_hi, a; \
|
||||
+ /* fetch 2x2 pixel block into sse2 register */ \
|
||||
+ uint32_t tl = top [pixman_fixed_to_int (x)]; \
|
||||
+ uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \
|
||||
+ uint32_t bl = bottom [pixman_fixed_to_int (x)]; \
|
||||
+ uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \
|
||||
+ a = _mm_set_epi32 (tr, tl, br, bl); \
|
||||
+ x += ux; \
|
||||
+ /* vertical interpolation */ \
|
||||
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
|
||||
+ xmm_wt), \
|
||||
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
|
||||
+ xmm_wb)); \
|
||||
+ /* calculate horizontal weights */ \
|
||||
+ xmm_wh = _mm_add_epi16 (xmm_addc, \
|
||||
+ _mm_xor_si128 (xmm_xorc, \
|
||||
+ _mm_srli_epi16 (xmm_x, 8))); \
|
||||
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
|
||||
+ /* horizontal interpolation */ \
|
||||
+ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
|
||||
+ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
|
||||
+ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
|
||||
+ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
|
||||
+ /* shift and pack the result */ \
|
||||
+ a = _mm_srli_epi32 (a, 16); \
|
||||
+ a = _mm_packs_epi32 (a, a); \
|
||||
+ a = _mm_packus_epi16 (a, a); \
|
||||
+ pix = _mm_cvtsi128_si32 (a); \
|
||||
+ } while (0)
|
||||
+
|
||||
+ while ((width -= 4) >= 0)
|
||||
+ {
|
||||
+ INTERPOLATE_ONE_PIXEL (pix1);
|
||||
+ INTERPOLATE_ONE_PIXEL (pix2);
|
||||
+ INTERPOLATE_ONE_PIXEL (pix3);
|
||||
+ INTERPOLATE_ONE_PIXEL (pix4);
|
||||
+ *out++ = pix1;
|
||||
+ *out++ = pix2;
|
||||
+ *out++ = pix3;
|
||||
+ *out++ = pix4;
|
||||
+ }
|
||||
+ if (width & 2)
|
||||
+ {
|
||||
+ INTERPOLATE_ONE_PIXEL (pix1);
|
||||
+ INTERPOLATE_ONE_PIXEL (pix2);
|
||||
+ *out++ = pix1;
|
||||
+ *out++ = pix2;
|
||||
+ }
|
||||
+ if (width & 1)
|
||||
+ {
|
||||
+ INTERPOLATE_ONE_PIXEL (pix1);
|
||||
+ *out = pix1;
|
||||
+ }
|
||||
+
|
||||
+ #undef INTERPOLATE_ONE_PIXEL
|
||||
+}
|
||||
+
|
||||
+static force_inline void
|
||||
+scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
|
||||
+ const uint32_t * mask,
|
||||
+ const uint32_t * src_top,
|
||||
+ const uint32_t * src_bottom,
|
||||
+ int32_t w,
|
||||
+ int wt,
|
||||
+ int wb,
|
||||
+ pixman_fixed_t vx,
|
||||
+ pixman_fixed_t unit_x,
|
||||
+ pixman_fixed_t max_vx,
|
||||
+ pixman_bool_t zero_src)
|
||||
+{
|
||||
+ bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
|
||||
+ wt, wb, vx, unit_x, w);
|
||||
+}
|
||||
+
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
|
||||
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
|
||||
+ uint32_t, uint32_t, uint32_t,
|
||||
+ COVER, FALSE, FALSE)
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
|
||||
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
|
||||
+ uint32_t, uint32_t, uint32_t,
|
||||
+ PAD, FALSE, FALSE)
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
|
||||
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
|
||||
+ uint32_t, uint32_t, uint32_t,
|
||||
+ NONE, FALSE, FALSE)
|
||||
+
|
||||
static const pixman_fast_path_t sse2_fast_paths[] =
|
||||
{
|
||||
/* PIXMAN_OP_OVER */
|
||||
@@ -5668,6 +5776,10 @@ static const pixman_fast_path_t sse2_fast_paths[] =
|
||||
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
|
||||
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
|
||||
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
|
||||
+
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,288 @@
|
||||
From 17feaa9c50bb8521b0366345efe181bd99754957 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Tue, 22 Feb 2011 18:45:03 +0200
|
||||
Subject: [PATCH 10/22] ARM: NEON optimization for bilinear scaled 'src_8888_8888'
|
||||
|
||||
Initial NEON optimization for bilinear scaling. Can be probably
|
||||
improved more.
|
||||
|
||||
Benchmark on ARM Cortex-A8:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
|
||||
after: op=1, src=20028888, dst=20028888, speed=44.27 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 197 ++++++++++++++++++++++++++++++++++++++++++
|
||||
pixman/pixman-arm-neon.c | 45 ++++++++++
|
||||
2 files changed, 242 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index 47daf45..c168e10 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2391,3 +2391,200 @@ generate_composite_function_nearest_scanline \
|
||||
10, /* dst_r_basereg */ \
|
||||
8, /* src_basereg */ \
|
||||
15 /* mask_basereg */
|
||||
+
|
||||
+/******************************************************************************/
|
||||
+
|
||||
+/* Supplementary macro for setting function attributes */
|
||||
+.macro pixman_asm_function fname
|
||||
+ .func fname
|
||||
+ .global fname
|
||||
+#ifdef __ELF__
|
||||
+ .hidden fname
|
||||
+ .type fname, %function
|
||||
+#endif
|
||||
+fname:
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_interpolate_last_pixel
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d0}, [TMP1]
|
||||
+ vshr.u16 d30, d24, #8
|
||||
+ vld1.32 {d1}, [TMP2]
|
||||
+ vmull.u8 q1, d0, d28
|
||||
+ vmlal.u8 q1, d1, d29
|
||||
+ /* 5 cycles bubble */
|
||||
+ vshll.u16 q0, d2, #8
|
||||
+ vmlsl.u16 q0, d2, d30
|
||||
+ vmlal.u16 q0, d3, d30
|
||||
+ /* 5 cycles bubble */
|
||||
+ vshrn.u32 d0, q0, #16
|
||||
+ /* 3 cycles bubble */
|
||||
+ vmovn.u16 d0, q0
|
||||
+ /* 1 cycle bubble */
|
||||
+ vst1.32 {d0[0]}, [OUT, :32]!
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_interpolate_two_pixels
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d0}, [TMP1]
|
||||
+ vld1.32 {d1}, [TMP2]
|
||||
+ vmull.u8 q1, d0, d28
|
||||
+ vmlal.u8 q1, d1, d29
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d20}, [TMP1]
|
||||
+ vld1.32 {d21}, [TMP2]
|
||||
+ vmull.u8 q11, d20, d28
|
||||
+ vmlal.u8 q11, d21, d29
|
||||
+ vshr.u16 q15, q12, #8
|
||||
+ vadd.u16 q12, q12, q13
|
||||
+ vshll.u16 q0, d2, #8
|
||||
+ vmlsl.u16 q0, d2, d30
|
||||
+ vmlal.u16 q0, d3, d30
|
||||
+ vshll.u16 q10, d22, #8
|
||||
+ vmlsl.u16 q10, d22, d31
|
||||
+ vmlal.u16 q10, d23, d31
|
||||
+ vshrn.u32 d30, q0, #16
|
||||
+ vshrn.u32 d31, q10, #16
|
||||
+ vmovn.u16 d0, q15
|
||||
+ vst1.32 {d0}, [OUT]!
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_interpolate_four_pixels
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d0}, [TMP1]
|
||||
+ vld1.32 {d1}, [TMP2]
|
||||
+ vmull.u8 q1, d0, d28
|
||||
+ vmlal.u8 q1, d1, d29
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d20}, [TMP1]
|
||||
+ vld1.32 {d21}, [TMP2]
|
||||
+ vmull.u8 q11, d20, d28
|
||||
+ vmlal.u8 q11, d21, d29
|
||||
+ vshr.u16 q15, q12, #8
|
||||
+ vadd.u16 q12, q12, q13
|
||||
+ vshll.u16 q0, d2, #8
|
||||
+ vmlsl.u16 q0, d2, d30
|
||||
+ vmlal.u16 q0, d3, d30
|
||||
+ vshll.u16 q10, d22, #8
|
||||
+ vmlsl.u16 q10, d22, d31
|
||||
+ vmlal.u16 q10, d23, d31
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d4}, [TMP1]
|
||||
+ vld1.32 {d5}, [TMP2]
|
||||
+ vmull.u8 q3, d4, d28
|
||||
+ vmlal.u8 q3, d5, d29
|
||||
+ mov TMP1, X, asr #16
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP1, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {d16}, [TMP1]
|
||||
+ vld1.32 {d17}, [TMP2]
|
||||
+ vmull.u8 q9, d16, d28
|
||||
+ vmlal.u8 q9, d17, d29
|
||||
+ vshr.u16 q15, q12, #8
|
||||
+ vadd.u16 q12, q12, q13
|
||||
+ vshll.u16 q2, d6, #8
|
||||
+ vmlsl.u16 q2, d6, d30
|
||||
+ vmlal.u16 q2, d7, d30
|
||||
+ vshll.u16 q8, d18, #8
|
||||
+ vmlsl.u16 q8, d18, d31
|
||||
+ vmlal.u16 q8, d19, d31
|
||||
+ vshrn.u32 d0, q0, #16
|
||||
+ vshrn.u32 d1, q10, #16
|
||||
+ vshrn.u32 d4, q2, #16
|
||||
+ vshrn.u32 d5, q8, #16
|
||||
+ vmovn.u16 d0, q0
|
||||
+ vmovn.u16 d1, q2
|
||||
+ vst1.32 {d0, d1}, [OUT]!
|
||||
+.endm
|
||||
+
|
||||
+
|
||||
+/*
|
||||
+ * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out,
|
||||
+ * const uint32_t * top,
|
||||
+ * const uint32_t * bottom,
|
||||
+ * int wt,
|
||||
+ * int wb,
|
||||
+ * pixman_fixed_t x,
|
||||
+ * pixman_fixed_t ux,
|
||||
+ * int width)
|
||||
+ */
|
||||
+
|
||||
+pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
|
||||
+ OUT .req r0
|
||||
+ TOP .req r1
|
||||
+ BOTTOM .req r2
|
||||
+ WT .req r3
|
||||
+ WB .req r4
|
||||
+ X .req r5
|
||||
+ UX .req r6
|
||||
+ WIDTH .req ip
|
||||
+ TMP1 .req r3
|
||||
+ TMP2 .req r4
|
||||
+
|
||||
+ mov ip, sp
|
||||
+ push {r4, r5, r6, r7}
|
||||
+ ldmia ip, {WB, X, UX, WIDTH}
|
||||
+
|
||||
+ cmp WIDTH, #0
|
||||
+ ble 3f
|
||||
+ vdup.u16 q12, X
|
||||
+ vdup.u16 q13, UX
|
||||
+ vdup.u8 d28, WT
|
||||
+ vdup.u8 d29, WB
|
||||
+ vadd.u16 d25, d25, d26
|
||||
+ vadd.u16 q13, q13, q13
|
||||
+
|
||||
+ subs WIDTH, WIDTH, #4
|
||||
+ blt 1f
|
||||
+0:
|
||||
+ bilinear_interpolate_four_pixels
|
||||
+ subs WIDTH, WIDTH, #4
|
||||
+ bge 0b
|
||||
+1:
|
||||
+ tst WIDTH, #2
|
||||
+ beq 2f
|
||||
+ bilinear_interpolate_two_pixels
|
||||
+2:
|
||||
+ tst WIDTH, #1
|
||||
+ beq 3f
|
||||
+ bilinear_interpolate_last_pixel
|
||||
+3:
|
||||
+ pop {r4, r5, r6, r7}
|
||||
+ bx lr
|
||||
+
|
||||
+ .unreq OUT
|
||||
+ .unreq TOP
|
||||
+ .unreq BOTTOM
|
||||
+ .unreq WT
|
||||
+ .unreq WB
|
||||
+ .unreq X
|
||||
+ .unreq UX
|
||||
+ .unreq WIDTH
|
||||
+ .unreq TMP1
|
||||
+ .unreq TMP2
|
||||
+.endfunc
|
||||
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
|
||||
index 3e0c0d1..c7c0254 100644
|
||||
--- a/pixman/pixman-arm-neon.c
|
||||
+++ b/pixman/pixman-arm-neon.c
|
||||
@@ -232,6 +232,47 @@ pixman_blt_neon (uint32_t *src_bits,
|
||||
}
|
||||
}
|
||||
|
||||
+void
|
||||
+pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out,
|
||||
+ const uint32_t * top,
|
||||
+ const uint32_t * bottom,
|
||||
+ int wt,
|
||||
+ int wb,
|
||||
+ pixman_fixed_t x,
|
||||
+ pixman_fixed_t ux,
|
||||
+ int width);
|
||||
+
|
||||
+static force_inline void
|
||||
+scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst,
|
||||
+ const uint32_t * mask,
|
||||
+ const uint32_t * src_top,
|
||||
+ const uint32_t * src_bottom,
|
||||
+ int32_t w,
|
||||
+ int wt,
|
||||
+ int wb,
|
||||
+ pixman_fixed_t vx,
|
||||
+ pixman_fixed_t unit_x,
|
||||
+ pixman_fixed_t max_vx,
|
||||
+ pixman_bool_t zero_src)
|
||||
+{
|
||||
+ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
|
||||
+ src_bottom, wt, wb,
|
||||
+ vx, unit_x, w);
|
||||
+}
|
||||
+
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
|
||||
+ scaled_bilinear_scanline_neon_8888_8888_SRC,
|
||||
+ uint32_t, uint32_t, uint32_t,
|
||||
+ COVER, FALSE, FALSE)
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
|
||||
+ scaled_bilinear_scanline_neon_8888_8888_SRC,
|
||||
+ uint32_t, uint32_t, uint32_t,
|
||||
+ PAD, FALSE, FALSE)
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
|
||||
+ scaled_bilinear_scanline_neon_8888_8888_SRC,
|
||||
+ uint32_t, uint32_t, uint32_t,
|
||||
+ NONE, FALSE, FALSE)
|
||||
+
|
||||
static const pixman_fast_path_t arm_neon_fast_paths[] =
|
||||
{
|
||||
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
|
||||
@@ -343,6 +384,10 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
|
||||
PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
|
||||
PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
|
||||
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
|
||||
+
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
From 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
|
||||
Date: Mon, 7 Mar 2011 13:45:54 -0500
|
||||
Subject: [PATCH 11/22] test: In image_endian_swap() use pixman_image_get_format() to get the bpp.
|
||||
|
||||
There is no reason to pass in the bpp as an argument; it can be gotten
|
||||
directly from the image.
|
||||
---
|
||||
test/affine-test.c | 6 +++---
|
||||
test/blitters-test.c | 4 ++--
|
||||
test/composite-traps-test.c | 2 +-
|
||||
test/scaling-test.c | 6 +++---
|
||||
test/utils.c | 9 +++++++--
|
||||
test/utils.h | 2 +-
|
||||
6 files changed, 17 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/test/affine-test.c b/test/affine-test.c
|
||||
index b7a1fa6..ed8000c 100644
|
||||
--- a/test/affine-test.c
|
||||
+++ b/test/affine-test.c
|
||||
@@ -95,8 +95,8 @@ test_composite (int testnum,
|
||||
dst_img = pixman_image_create_bits (
|
||||
dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
|
||||
|
||||
- image_endian_swap (src_img, src_bpp * 8);
|
||||
- image_endian_swap (dst_img, dst_bpp * 8);
|
||||
+ image_endian_swap (src_img);
|
||||
+ image_endian_swap (dst_img);
|
||||
|
||||
pixman_transform_init_identity (&transform);
|
||||
|
||||
@@ -251,7 +251,7 @@ test_composite (int testnum,
|
||||
dstbuf[i] &= 0xFFFFFF;
|
||||
}
|
||||
|
||||
- image_endian_swap (dst_img, dst_bpp * 8);
|
||||
+ image_endian_swap (dst_img);
|
||||
|
||||
if (verbose)
|
||||
{
|
||||
diff --git a/test/blitters-test.c b/test/blitters-test.c
|
||||
index 42181ef..63e7cb3 100644
|
||||
--- a/test/blitters-test.c
|
||||
+++ b/test/blitters-test.c
|
||||
@@ -61,7 +61,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
|
||||
pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)]));
|
||||
}
|
||||
|
||||
- image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
|
||||
+ image_endian_swap (img);
|
||||
|
||||
if (used_fmt) *used_fmt = fmt;
|
||||
return img;
|
||||
@@ -101,7 +101,7 @@ free_random_image (uint32_t initcrc,
|
||||
/* swap endiannes in order to provide identical results on both big
|
||||
* and litte endian systems
|
||||
*/
|
||||
- image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
|
||||
+ image_endian_swap (img);
|
||||
crc32 = compute_crc32 (initcrc, data, stride * height);
|
||||
}
|
||||
|
||||
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
|
||||
index 8f32778..298537d 100644
|
||||
--- a/test/composite-traps-test.c
|
||||
+++ b/test/composite-traps-test.c
|
||||
@@ -218,7 +218,7 @@ test_composite (int testnum,
|
||||
dst_bits[i] &= 0xFFFFFF;
|
||||
}
|
||||
|
||||
- image_endian_swap (dst_img, dst_bpp * 8);
|
||||
+ image_endian_swap (dst_img);
|
||||
|
||||
if (verbose)
|
||||
{
|
||||
diff --git a/test/scaling-test.c b/test/scaling-test.c
|
||||
index dbb9d39..82370f7 100644
|
||||
--- a/test/scaling-test.c
|
||||
+++ b/test/scaling-test.c
|
||||
@@ -140,8 +140,8 @@ test_composite (int testnum,
|
||||
dst_img = pixman_image_create_bits (
|
||||
dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
|
||||
|
||||
- image_endian_swap (src_img, src_bpp * 8);
|
||||
- image_endian_swap (dst_img, dst_bpp * 8);
|
||||
+ image_endian_swap (src_img);
|
||||
+ image_endian_swap (dst_img);
|
||||
|
||||
if (lcg_rand_n (4) > 0)
|
||||
{
|
||||
@@ -330,7 +330,7 @@ test_composite (int testnum,
|
||||
dstbuf[i] &= 0xFFFFFF;
|
||||
}
|
||||
|
||||
- image_endian_swap (dst_img, dst_bpp * 8);
|
||||
+ image_endian_swap (dst_img);
|
||||
|
||||
if (verbose)
|
||||
{
|
||||
diff --git a/test/utils.c b/test/utils.c
|
||||
index 2f21398..4bf02e1 100644
|
||||
--- a/test/utils.c
|
||||
+++ b/test/utils.c
|
||||
@@ -133,11 +133,12 @@ compute_crc32 (uint32_t in_crc32,
|
||||
/* perform endian conversion of pixel data
|
||||
*/
|
||||
void
|
||||
-image_endian_swap (pixman_image_t *img, int bpp)
|
||||
+image_endian_swap (pixman_image_t *img)
|
||||
{
|
||||
int stride = pixman_image_get_stride (img);
|
||||
uint32_t *data = pixman_image_get_data (img);
|
||||
int height = pixman_image_get_height (img);
|
||||
+ int bpp = PIXMAN_FORMAT_BPP (pixman_image_get_format (img));
|
||||
int i, j;
|
||||
|
||||
/* swap bytes only on big endian systems */
|
||||
@@ -145,10 +146,13 @@ image_endian_swap (pixman_image_t *img, int bpp)
|
||||
if (*(volatile uint8_t *)&endian_check_var != 0x12)
|
||||
return;
|
||||
|
||||
+ if (bpp == 8)
|
||||
+ return;
|
||||
+
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
uint8_t *line_data = (uint8_t *)data + stride * i;
|
||||
- /* swap bytes only for 16, 24 and 32 bpp for now */
|
||||
+
|
||||
switch (bpp)
|
||||
{
|
||||
case 1:
|
||||
@@ -208,6 +212,7 @@ image_endian_swap (pixman_image_t *img, int bpp)
|
||||
}
|
||||
break;
|
||||
default:
|
||||
+ assert (FALSE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
diff --git a/test/utils.h b/test/utils.h
|
||||
index 9c7bdb1..a5183f7 100644
|
||||
--- a/test/utils.h
|
||||
+++ b/test/utils.h
|
||||
@@ -60,7 +60,7 @@ compute_crc32 (uint32_t in_crc32,
|
||||
/* perform endian conversion of pixel data
|
||||
*/
|
||||
void
|
||||
-image_endian_swap (pixman_image_t *img, int bpp);
|
||||
+image_endian_swap (pixman_image_t *img);
|
||||
|
||||
/* Allocate memory that is bounded by protected pages,
|
||||
* so that out-of-bounds access will cause segfaults
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
From 84e361c8e357e26f299213fbeefe64c73447b116 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
|
||||
Date: Fri, 4 Mar 2011 15:51:18 -0500
|
||||
Subject: [PATCH 12/22] test: Do endian swapping of the source and destination images.
|
||||
|
||||
Otherwise the test fails on big endian. Fix for bug 34767, reported by
|
||||
Siarhei Siamashka.
|
||||
---
|
||||
test/composite-traps-test.c | 4 ++++
|
||||
1 files changed, 4 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
|
||||
index 298537d..cf30281 100644
|
||||
--- a/test/composite-traps-test.c
|
||||
+++ b/test/composite-traps-test.c
|
||||
@@ -139,6 +139,8 @@ test_composite (int testnum,
|
||||
pixman_image_set_source_clipping (src_img, 1);
|
||||
pixman_region_fini (&clip);
|
||||
}
|
||||
+
|
||||
+ image_endian_swap (src_img);
|
||||
}
|
||||
|
||||
/* Create destination image */
|
||||
@@ -157,6 +159,8 @@ test_composite (int testnum,
|
||||
|
||||
dst_img = pixman_image_create_bits (
|
||||
dst_format, dst_width, dst_height, dst_bits, dst_stride);
|
||||
+
|
||||
+ image_endian_swap (dst_img);
|
||||
}
|
||||
|
||||
/* Create traps */
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
From bb3d1b67fd0f42ae00af811c624ea1c44541034d Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Sun, 6 Mar 2011 16:17:12 +0200
|
||||
Subject: [PATCH 13/22] ARM: use prefetch in nearest scaled 'src_0565_0565'
|
||||
|
||||
Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
|
||||
after: op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
|
||||
after: op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-simd-asm.S | 27 +++++++++++++++++++++++++--
|
||||
1 files changed, 25 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
|
||||
index 7567700..dd1366d 100644
|
||||
--- a/pixman/pixman-arm-simd-asm.S
|
||||
+++ b/pixman/pixman-arm-simd-asm.S
|
||||
@@ -348,6 +348,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
TMP1 .req r4
|
||||
TMP2 .req r5
|
||||
VXMASK .req r6
|
||||
+ PF_OFFS .req r7
|
||||
|
||||
ldr UNIT_X, [sp]
|
||||
push {r4, r5, r6, r7}
|
||||
@@ -366,12 +367,33 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
strh TMP2, [DST], #2
|
||||
.endm
|
||||
|
||||
+ /*
|
||||
+ * stop prefetch before reaching the end of scanline (a good behaving
|
||||
+ * value selected based on some benchmarks with short scanlines)
|
||||
+ */
|
||||
+ #define PREFETCH_BRAKING_DISTANCE 32
|
||||
+
|
||||
/* now do the scaling */
|
||||
and TMP1, VXMASK, VX, lsr #15
|
||||
add VX, VX, UNIT_X
|
||||
- subs W, #4
|
||||
+ subs W, #(8 + PREFETCH_BRAKING_DISTANCE)
|
||||
+ blt 2f
|
||||
+ /* set prefetch distance to 80 pixels ahead */
|
||||
+ add PF_OFFS, VX, UNIT_X, lsl #6
|
||||
+ add PF_OFFS, PF_OFFS, UNIT_X, lsl #4
|
||||
+1: /* main loop, process 8 pixels per iteration with prefetch */
|
||||
+ subs W, W, #8
|
||||
+ add PF_OFFS, UNIT_X, lsl #3
|
||||
+ scale_2_pixels
|
||||
+ scale_2_pixels
|
||||
+ scale_2_pixels
|
||||
+ scale_2_pixels
|
||||
+ pld [SRC, PF_OFFS, lsr #15]
|
||||
+ bge 1b
|
||||
+2:
|
||||
+ subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
|
||||
blt 2f
|
||||
-1: /* main loop, process 4 pixels per iteration */
|
||||
+1: /* process the remaining pixels */
|
||||
scale_2_pixels
|
||||
scale_2_pixels
|
||||
subs W, W, #4
|
||||
@@ -394,6 +416,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
.unreq TMP1
|
||||
.unreq TMP2
|
||||
.unreq VXMASK
|
||||
+ .unreq PF_OFFS
|
||||
/* return */
|
||||
pop {r4, r5, r6, r7}
|
||||
bx lr
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Mon, 7 Mar 2011 03:10:43 +0200
|
||||
Subject: [PATCH 14/22] ARM: common macro for nearest scaling fast paths
|
||||
|
||||
The code of nearest scaled 'src_0565_0565' function was generalized
|
||||
and moved to a common macro, so that it can be reused for other
|
||||
fast paths.
|
||||
---
|
||||
pixman/pixman-arm-simd-asm.S | 60 +++++++++++++++++++++++++----------------
|
||||
1 files changed, 36 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
|
||||
index dd1366d..a9775e2 100644
|
||||
--- a/pixman/pixman-arm-simd-asm.S
|
||||
+++ b/pixman/pixman-arm-simd-asm.S
|
||||
@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
|
||||
.endfunc
|
||||
|
||||
/*
|
||||
- * Note: This function is only using armv4t instructions (not even armv6),
|
||||
+ * Note: This code is only using armv5te instructions (not even armv6),
|
||||
* but is scheduled for ARM Cortex-A8 pipeline. So it might need to
|
||||
* be split into a few variants, tuned for each microarchitecture.
|
||||
*
|
||||
* TODO: In order to get good performance on ARM9/ARM11 cores (which don't
|
||||
* have efficient write combining), it needs to be changed to use 16-byte
|
||||
* aligned writes using STM instruction.
|
||||
+ *
|
||||
+ * Nearest scanline scaler macro template uses the following arguments:
|
||||
+ * fname - name of the function to generate
|
||||
+ * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
|
||||
+ * t - type suffix for LDR/STR instructions
|
||||
+ * prefetch_distance - prefetch in the source image by that many
|
||||
+ * pixels ahead
|
||||
+ * prefetch_braking_distance - stop prefetching when that many pixels are
|
||||
+ * remaining before the end of scanline
|
||||
*/
|
||||
-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
+
|
||||
+.macro generate_nearest_scanline_func fname, bpp_shift, t, \
|
||||
+ prefetch_distance, \
|
||||
+ prefetch_braking_distance
|
||||
+
|
||||
+pixman_asm_function fname
|
||||
W .req r0
|
||||
DST .req r1
|
||||
SRC .req r2
|
||||
@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
|
||||
ldr UNIT_X, [sp]
|
||||
push {r4, r5, r6, r7}
|
||||
- mvn VXMASK, #1
|
||||
+ mvn VXMASK, #((1 << bpp_shift) - 1)
|
||||
|
||||
/* define helper macro */
|
||||
.macro scale_2_pixels
|
||||
- ldrh TMP1, [SRC, TMP1]
|
||||
- and TMP2, VXMASK, VX, lsr #15
|
||||
+ ldr&t TMP1, [SRC, TMP1]
|
||||
+ and TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
|
||||
add VX, VX, UNIT_X
|
||||
- strh TMP1, [DST], #2
|
||||
+ str&t TMP1, [DST], #(1 << bpp_shift)
|
||||
|
||||
- ldrh TMP2, [SRC, TMP2]
|
||||
- and TMP1, VXMASK, VX, lsr #15
|
||||
+ ldr&t TMP2, [SRC, TMP2]
|
||||
+ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
|
||||
add VX, VX, UNIT_X
|
||||
- strh TMP2, [DST], #2
|
||||
+ str&t TMP2, [DST], #(1 << bpp_shift)
|
||||
.endm
|
||||
|
||||
- /*
|
||||
- * stop prefetch before reaching the end of scanline (a good behaving
|
||||
- * value selected based on some benchmarks with short scanlines)
|
||||
- */
|
||||
- #define PREFETCH_BRAKING_DISTANCE 32
|
||||
-
|
||||
/* now do the scaling */
|
||||
- and TMP1, VXMASK, VX, lsr #15
|
||||
+ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
|
||||
add VX, VX, UNIT_X
|
||||
- subs W, #(8 + PREFETCH_BRAKING_DISTANCE)
|
||||
+ subs W, W, #(8 + prefetch_braking_distance)
|
||||
blt 2f
|
||||
- /* set prefetch distance to 80 pixels ahead */
|
||||
- add PF_OFFS, VX, UNIT_X, lsl #6
|
||||
- add PF_OFFS, PF_OFFS, UNIT_X, lsl #4
|
||||
+ /* calculate prefetch offset */
|
||||
+ mov PF_OFFS, #prefetch_distance
|
||||
+ mla PF_OFFS, UNIT_X, PF_OFFS, VX
|
||||
1: /* main loop, process 8 pixels per iteration with prefetch */
|
||||
subs W, W, #8
|
||||
add PF_OFFS, UNIT_X, lsl #3
|
||||
@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
scale_2_pixels
|
||||
scale_2_pixels
|
||||
scale_2_pixels
|
||||
- pld [SRC, PF_OFFS, lsr #15]
|
||||
+ pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)]
|
||||
bge 1b
|
||||
2:
|
||||
- subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
|
||||
+ subs W, W, #(4 - 8 - prefetch_braking_distance)
|
||||
blt 2f
|
||||
1: /* process the remaining pixels */
|
||||
scale_2_pixels
|
||||
@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
scale_2_pixels
|
||||
2:
|
||||
tst W, #1
|
||||
- ldrneh TMP1, [SRC, TMP1]
|
||||
- strneh TMP1, [DST], #2
|
||||
+ ldrne&t TMP1, [SRC, TMP1]
|
||||
+ strne&t TMP1, [DST]
|
||||
/* cleanup helper macro */
|
||||
.purgem scale_2_pixels
|
||||
.unreq DST
|
||||
@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
|
||||
pop {r4, r5, r6, r7}
|
||||
bx lr
|
||||
.endfunc
|
||||
+.endm
|
||||
+
|
||||
+generate_nearest_scanline_func \
|
||||
+ pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
From 5921c17639fe5fdc595c850e3347281c1c8746ba Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Sun, 6 Mar 2011 22:16:32 +0200
|
||||
Subject: [PATCH 15/22] ARM: assembly optimized nearest scaled 'src_8888_8888'
|
||||
|
||||
Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s
|
||||
after: op=1, src=20028888, dst=20028888, speed=39.79 MPix/s
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s
|
||||
after: op=1, src=20028888, dst=20028888, speed=163.12 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-simd-asm.S | 3 +++
|
||||
pixman/pixman-arm-simd.c | 9 +++++++++
|
||||
2 files changed, 12 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
|
||||
index a9775e2..858c690 100644
|
||||
--- a/pixman/pixman-arm-simd-asm.S
|
||||
+++ b/pixman/pixman-arm-simd-asm.S
|
||||
@@ -433,3 +433,6 @@ pixman_asm_function fname
|
||||
|
||||
generate_nearest_scanline_func \
|
||||
pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
|
||||
+
|
||||
+generate_nearest_scanline_func \
|
||||
+ pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32
|
||||
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
|
||||
index 6bbc109..a66f8df 100644
|
||||
--- a/pixman/pixman-arm-simd.c
|
||||
+++ b/pixman/pixman-arm-simd.c
|
||||
@@ -389,6 +389,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
|
||||
|
||||
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
|
||||
uint16_t, uint16_t)
|
||||
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
|
||||
+ uint32_t, uint32_t)
|
||||
|
||||
static const pixman_fast_path_t arm_simd_fast_paths[] =
|
||||
{
|
||||
@@ -411,6 +413,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
|
||||
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
|
||||
PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
|
||||
|
||||
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
|
||||
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
|
||||
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
|
||||
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
|
||||
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
|
||||
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
|
||||
+
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,130 @@
|
||||
From 66f4ee1b3bccf4516433d61dbf2035551a712fa2 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 10:59:46 +0200
|
||||
Subject: [PATCH 16/22] ARM: new bilinear fast path template macro in 'pixman-arm-common.h'
|
||||
|
||||
It can be reused in different ARM NEON bilinear scaling fast path functions.
|
||||
---
|
||||
pixman/pixman-arm-common.h | 45 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
pixman/pixman-arm-neon.c | 44 ++----------------------------------------
|
||||
2 files changed, 48 insertions(+), 41 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
|
||||
index 9b1322b..c3bf986 100644
|
||||
--- a/pixman/pixman-arm-common.h
|
||||
+++ b/pixman/pixman-arm-common.h
|
||||
@@ -361,4 +361,49 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
|
||||
SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
|
||||
SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
|
||||
|
||||
+/*****************************************************************************/
|
||||
+
|
||||
+#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \
|
||||
+ src_type, dst_type) \
|
||||
+void \
|
||||
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
|
||||
+ dst_type * dst, \
|
||||
+ const src_type * top, \
|
||||
+ const src_type * bottom, \
|
||||
+ int wt, \
|
||||
+ int wb, \
|
||||
+ pixman_fixed_t x, \
|
||||
+ pixman_fixed_t ux, \
|
||||
+ int width); \
|
||||
+ \
|
||||
+static force_inline void \
|
||||
+scaled_bilinear_scanline_##cputype##_##name##_##op ( \
|
||||
+ dst_type * dst, \
|
||||
+ const uint32_t * mask, \
|
||||
+ const src_type * src_top, \
|
||||
+ const src_type * src_bottom, \
|
||||
+ int32_t w, \
|
||||
+ int wt, \
|
||||
+ int wb, \
|
||||
+ pixman_fixed_t vx, \
|
||||
+ pixman_fixed_t unit_x, \
|
||||
+ pixman_fixed_t max_vx, \
|
||||
+ pixman_bool_t zero_src) \
|
||||
+{ \
|
||||
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
|
||||
+ return; \
|
||||
+ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
|
||||
+ dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
|
||||
+} \
|
||||
+ \
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
|
||||
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
|
||||
+ src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
|
||||
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
|
||||
+ src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \
|
||||
+FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
|
||||
+ scaled_bilinear_scanline_##cputype##_##name##_##op, \
|
||||
+ src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
|
||||
+
|
||||
#endif
|
||||
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
|
||||
index c7c0254..98ad5f2 100644
|
||||
--- a/pixman/pixman-arm-neon.c
|
||||
+++ b/pixman/pixman-arm-neon.c
|
||||
@@ -127,6 +127,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
|
||||
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
|
||||
OVER, uint16_t, uint16_t)
|
||||
|
||||
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
|
||||
+ uint32_t, uint32_t)
|
||||
+
|
||||
void
|
||||
pixman_composite_src_n_8_asm_neon (int32_t w,
|
||||
int32_t h,
|
||||
@@ -232,47 +235,6 @@ pixman_blt_neon (uint32_t *src_bits,
|
||||
}
|
||||
}
|
||||
|
||||
-void
|
||||
-pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out,
|
||||
- const uint32_t * top,
|
||||
- const uint32_t * bottom,
|
||||
- int wt,
|
||||
- int wb,
|
||||
- pixman_fixed_t x,
|
||||
- pixman_fixed_t ux,
|
||||
- int width);
|
||||
-
|
||||
-static force_inline void
|
||||
-scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst,
|
||||
- const uint32_t * mask,
|
||||
- const uint32_t * src_top,
|
||||
- const uint32_t * src_bottom,
|
||||
- int32_t w,
|
||||
- int wt,
|
||||
- int wb,
|
||||
- pixman_fixed_t vx,
|
||||
- pixman_fixed_t unit_x,
|
||||
- pixman_fixed_t max_vx,
|
||||
- pixman_bool_t zero_src)
|
||||
-{
|
||||
- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
|
||||
- src_bottom, wt, wb,
|
||||
- vx, unit_x, w);
|
||||
-}
|
||||
-
|
||||
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
|
||||
- scaled_bilinear_scanline_neon_8888_8888_SRC,
|
||||
- uint32_t, uint32_t, uint32_t,
|
||||
- COVER, FALSE, FALSE)
|
||||
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
|
||||
- scaled_bilinear_scanline_neon_8888_8888_SRC,
|
||||
- uint32_t, uint32_t, uint32_t,
|
||||
- PAD, FALSE, FALSE)
|
||||
-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
|
||||
- scaled_bilinear_scanline_neon_8888_8888_SRC,
|
||||
- uint32_t, uint32_t, uint32_t,
|
||||
- NONE, FALSE, FALSE)
|
||||
-
|
||||
static const pixman_fast_path_t arm_neon_fast_paths[] =
|
||||
{
|
||||
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
From 34098dba6763afd3636a14f9c2a079ab08f23b2d Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 11:34:15 +0200
|
||||
Subject: [PATCH 17/22] ARM: NEON: common macro template for bilinear scanline scalers
|
||||
|
||||
This allows to generate bilinear scanline scaling functions targeting
|
||||
various source and destination color formats. Right now a8r8g8b8/x8r8g8b8
|
||||
and r5g6b5 color formats are supported. More formats can be added if needed.
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 222 ++++++++++++++++++++++++++++++++++++++++++
|
||||
pixman/pixman-arm-neon-asm.h | 17 +++
|
||||
2 files changed, 239 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index c168e10..f3784f5 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2588,3 +2588,225 @@ pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
|
||||
.unreq TMP1
|
||||
.unreq TMP2
|
||||
.endfunc
|
||||
+
|
||||
+.purgem bilinear_interpolate_last_pixel
|
||||
+.purgem bilinear_interpolate_two_pixels
|
||||
+.purgem bilinear_interpolate_four_pixels
|
||||
+
|
||||
+/*
|
||||
+ * Bilinear scaling support code which tries to provide pixel fetching, color
|
||||
+ * format conversion, and interpolation as separate macros which can be used
|
||||
+ * as the basic building blocks for constructing bilinear scanline functions.
|
||||
+ */
|
||||
+
|
||||
+.macro bilinear_load_8888 reg1, reg2, tmp
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP2, asl #2
|
||||
+ add TMP2, BOTTOM, TMP2, asl #2
|
||||
+ vld1.32 {reg1}, [TMP1]
|
||||
+ vld1.32 {reg2}, [TMP2]
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_load_0565 reg1, reg2, tmp
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP2, asl #1
|
||||
+ add TMP2, BOTTOM, TMP2, asl #1
|
||||
+ vld1.32 {reg2[0]}, [TMP1]
|
||||
+ vld1.32 {reg2[1]}, [TMP2]
|
||||
+ convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_store_8888 numpix, tmp1, tmp2
|
||||
+.if numpix == 4
|
||||
+ vst1.32 {d0, d1}, [OUT]!
|
||||
+.elseif numpix == 2
|
||||
+ vst1.32 {d0}, [OUT]!
|
||||
+.elseif numpix == 1
|
||||
+ vst1.32 {d0[0]}, [OUT, :32]!
|
||||
+.else
|
||||
+ .error bilinear_store_8888 numpix is unsupported
|
||||
+.endif
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_store_0565 numpix, tmp1, tmp2
|
||||
+ vuzp.u8 d0, d1
|
||||
+ vuzp.u8 d2, d3
|
||||
+ vuzp.u8 d1, d3
|
||||
+ vuzp.u8 d0, d2
|
||||
+ convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
|
||||
+.if numpix == 4
|
||||
+ vst1.16 {d2}, [OUT]!
|
||||
+.elseif numpix == 2
|
||||
+ vst1.32 {d2[0]}, [OUT]!
|
||||
+.elseif numpix == 1
|
||||
+ vst1.16 {d2[0]}, [OUT]!
|
||||
+.else
|
||||
+ .error bilinear_store_0565 numpix is unsupported
|
||||
+.endif
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
|
||||
+ bilinear_load_&src_fmt d0, d1, d2
|
||||
+ vmull.u8 q1, d0, d28
|
||||
+ vmlal.u8 q1, d1, d29
|
||||
+ vshr.u16 d30, d24, #8
|
||||
+ /* 4 cycles bubble */
|
||||
+ vshll.u16 q0, d2, #8
|
||||
+ vmlsl.u16 q0, d2, d30
|
||||
+ vmlal.u16 q0, d3, d30
|
||||
+ /* 5 cycles bubble */
|
||||
+ vshrn.u32 d0, q0, #16
|
||||
+ /* 3 cycles bubble */
|
||||
+ vmovn.u16 d0, q0
|
||||
+ /* 1 cycle bubble */
|
||||
+ bilinear_store_&dst_fmt 1, q2, q3
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
|
||||
+ bilinear_load_&src_fmt d0, d1, d2
|
||||
+ vmull.u8 q1, d0, d28
|
||||
+ vmlal.u8 q1, d1, d29
|
||||
+ bilinear_load_&src_fmt d20, d21, d22
|
||||
+ vmull.u8 q11, d20, d28
|
||||
+ vmlal.u8 q11, d21, d29
|
||||
+ vshr.u16 q15, q12, #8
|
||||
+ vadd.u16 q12, q12, q13
|
||||
+ vshll.u16 q0, d2, #8
|
||||
+ vmlsl.u16 q0, d2, d30
|
||||
+ vmlal.u16 q0, d3, d30
|
||||
+ vshll.u16 q10, d22, #8
|
||||
+ vmlsl.u16 q10, d22, d31
|
||||
+ vmlal.u16 q10, d23, d31
|
||||
+ vshrn.u32 d30, q0, #16
|
||||
+ vshrn.u32 d31, q10, #16
|
||||
+ vmovn.u16 d0, q15
|
||||
+ bilinear_store_&dst_fmt 2, q2, q3
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
|
||||
+ bilinear_load_&src_fmt d0, d1, d2
|
||||
+ vmull.u8 q1, d0, d28
|
||||
+ vmlal.u8 q1, d1, d29
|
||||
+ bilinear_load_&src_fmt d20, d21, d22
|
||||
+ vmull.u8 q11, d20, d28
|
||||
+ vmlal.u8 q11, d21, d29
|
||||
+ bilinear_load_&src_fmt d4, d5, d6
|
||||
+ vmull.u8 q3, d4, d28
|
||||
+ vmlal.u8 q3, d5, d29
|
||||
+ bilinear_load_&src_fmt d16, d17, d18
|
||||
+ vmull.u8 q9, d16, d28
|
||||
+ vmlal.u8 q9, d17, d29
|
||||
+ pld [TMP1, PF_OFFS]
|
||||
+ vshr.u16 q15, q12, #8
|
||||
+ vadd.u16 q12, q12, q13
|
||||
+ vshll.u16 q0, d2, #8
|
||||
+ vmlsl.u16 q0, d2, d30
|
||||
+ vmlal.u16 q0, d3, d30
|
||||
+ vshll.u16 q10, d22, #8
|
||||
+ vmlsl.u16 q10, d22, d31
|
||||
+ vmlal.u16 q10, d23, d31
|
||||
+ vshr.u16 q15, q12, #8
|
||||
+ vshll.u16 q2, d6, #8
|
||||
+ vmlsl.u16 q2, d6, d30
|
||||
+ vmlal.u16 q2, d7, d30
|
||||
+ vshll.u16 q8, d18, #8
|
||||
+ pld [TMP2, PF_OFFS]
|
||||
+ vmlsl.u16 q8, d18, d31
|
||||
+ vmlal.u16 q8, d19, d31
|
||||
+ vadd.u16 q12, q12, q13
|
||||
+ vshrn.u32 d0, q0, #16
|
||||
+ vshrn.u32 d1, q10, #16
|
||||
+ vshrn.u32 d4, q2, #16
|
||||
+ vshrn.u32 d5, q8, #16
|
||||
+ vmovn.u16 d0, q0
|
||||
+ vmovn.u16 d1, q2
|
||||
+ bilinear_store_&dst_fmt 4, q2, q3
|
||||
+.endm
|
||||
+
|
||||
+/*
|
||||
+ * Main template macro for generating NEON optimized bilinear scanline
|
||||
+ * functions.
|
||||
+ *
|
||||
+ * TODO: use software pipelining and aligned writes to the destination buffer
|
||||
+ * in order to improve performance
|
||||
+ *
|
||||
+ * Bilinear scanline scaler macro template uses the following arguments:
|
||||
+ * fname - name of the function to generate
|
||||
+ * src_fmt - source color format (8888 or 0565)
|
||||
+ * dst_fmt - destination color format (8888 or 0565)
|
||||
+ * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes
|
||||
+ * prefetch_distance - prefetch in the source image by that many
|
||||
+ * pixels ahead
|
||||
+ */
|
||||
+
|
||||
+.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
|
||||
+ bpp_shift, prefetch_distance
|
||||
+
|
||||
+pixman_asm_function fname
|
||||
+ OUT .req r0
|
||||
+ TOP .req r1
|
||||
+ BOTTOM .req r2
|
||||
+ WT .req r3
|
||||
+ WB .req r4
|
||||
+ X .req r5
|
||||
+ UX .req r6
|
||||
+ WIDTH .req ip
|
||||
+ TMP1 .req r3
|
||||
+ TMP2 .req r4
|
||||
+ PF_OFFS .req r7
|
||||
+ TMP3 .req r8
|
||||
+ TMP4 .req r9
|
||||
+
|
||||
+ mov ip, sp
|
||||
+ push {r4, r5, r6, r7, r8, r9}
|
||||
+ mov PF_OFFS, #prefetch_distance
|
||||
+ ldmia ip, {WB, X, UX, WIDTH}
|
||||
+ mul PF_OFFS, PF_OFFS, UX
|
||||
+
|
||||
+ cmp WIDTH, #0
|
||||
+ ble 3f
|
||||
+
|
||||
+ vdup.u16 q12, X
|
||||
+ vdup.u16 q13, UX
|
||||
+ vdup.u8 d28, WT
|
||||
+ vdup.u8 d29, WB
|
||||
+ vadd.u16 d25, d25, d26
|
||||
+ vadd.u16 q13, q13, q13
|
||||
+
|
||||
+ subs WIDTH, WIDTH, #4
|
||||
+ blt 1f
|
||||
+ mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
|
||||
+0:
|
||||
+ bilinear_interpolate_four_pixels src_fmt, dst_fmt
|
||||
+ subs WIDTH, WIDTH, #4
|
||||
+ bge 0b
|
||||
+1:
|
||||
+ tst WIDTH, #2
|
||||
+ beq 2f
|
||||
+ bilinear_interpolate_two_pixels src_fmt, dst_fmt
|
||||
+2:
|
||||
+ tst WIDTH, #1
|
||||
+ beq 3f
|
||||
+ bilinear_interpolate_last_pixel src_fmt, dst_fmt
|
||||
+3:
|
||||
+ pop {r4, r5, r6, r7, r8, r9}
|
||||
+ bx lr
|
||||
+
|
||||
+ .unreq OUT
|
||||
+ .unreq TOP
|
||||
+ .unreq BOTTOM
|
||||
+ .unreq WT
|
||||
+ .unreq WB
|
||||
+ .unreq X
|
||||
+ .unreq UX
|
||||
+ .unreq WIDTH
|
||||
+ .unreq TMP1
|
||||
+ .unreq TMP2
|
||||
+ .unreq PF_OFFS
|
||||
+ .unreq TMP3
|
||||
+ .unreq TMP4
|
||||
+.endfunc
|
||||
+
|
||||
+.endm
|
||||
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
|
||||
index 24fa361..97adc6a 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.h
|
||||
+++ b/pixman/pixman-arm-neon-asm.h
|
||||
@@ -1158,3 +1158,20 @@ fname:
|
||||
vsri.u16 out, tmp1, #5
|
||||
vsri.u16 out, tmp2, #11
|
||||
.endm
|
||||
+
|
||||
+/*
|
||||
+ * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
|
||||
+ * returned in (out0, out1) registers pair. Requires one temporary
|
||||
+ * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
|
||||
+ * value from 'in' is lost
|
||||
+ */
|
||||
+.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
|
||||
+ vshl.u16 out0, in, #5 /* G top 6 bits */
|
||||
+ vshl.u16 tmp, in, #11 /* B top 5 bits */
|
||||
+ vsri.u16 in, in, #5 /* R is ready in top bits */
|
||||
+ vsri.u16 out0, out0, #6 /* G is ready in top bits */
|
||||
+ vsri.u16 tmp, tmp, #5 /* B is ready in top bits */
|
||||
+ vshr.u16 out1, in, #8 /* R is in place */
|
||||
+ vsri.u16 out0, tmp, #8 /* G & B is in place */
|
||||
+ vzip.u16 out0, out1 /* everything is in place */
|
||||
+.endm
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,226 @@
|
||||
From 11a0c5badbc59ce967707ef836313cc98f8aec4e Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 11:46:48 +0200
|
||||
Subject: [PATCH 18/22] ARM: use common macro template for bilinear scaled 'src_8888_8888'
|
||||
|
||||
This is a cleanup for old and now duplicated code. The performance improvement
|
||||
is mostly coming from the enabled use of software prefetch, but instructions
|
||||
scheduling is also slightly better.
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s
|
||||
after: op=1, src=20028888, dst=20028888, speed=74.36 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 191 +-----------------------------------------
|
||||
1 files changed, 3 insertions(+), 188 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index f3784f5..52dc444 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2405,194 +2405,6 @@ generate_composite_function_nearest_scanline \
|
||||
fname:
|
||||
.endm
|
||||
|
||||
-.macro bilinear_interpolate_last_pixel
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d0}, [TMP1]
|
||||
- vshr.u16 d30, d24, #8
|
||||
- vld1.32 {d1}, [TMP2]
|
||||
- vmull.u8 q1, d0, d28
|
||||
- vmlal.u8 q1, d1, d29
|
||||
- /* 5 cycles bubble */
|
||||
- vshll.u16 q0, d2, #8
|
||||
- vmlsl.u16 q0, d2, d30
|
||||
- vmlal.u16 q0, d3, d30
|
||||
- /* 5 cycles bubble */
|
||||
- vshrn.u32 d0, q0, #16
|
||||
- /* 3 cycles bubble */
|
||||
- vmovn.u16 d0, q0
|
||||
- /* 1 cycle bubble */
|
||||
- vst1.32 {d0[0]}, [OUT, :32]!
|
||||
-.endm
|
||||
-
|
||||
-.macro bilinear_interpolate_two_pixels
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add X, X, UX
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d0}, [TMP1]
|
||||
- vld1.32 {d1}, [TMP2]
|
||||
- vmull.u8 q1, d0, d28
|
||||
- vmlal.u8 q1, d1, d29
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add X, X, UX
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d20}, [TMP1]
|
||||
- vld1.32 {d21}, [TMP2]
|
||||
- vmull.u8 q11, d20, d28
|
||||
- vmlal.u8 q11, d21, d29
|
||||
- vshr.u16 q15, q12, #8
|
||||
- vadd.u16 q12, q12, q13
|
||||
- vshll.u16 q0, d2, #8
|
||||
- vmlsl.u16 q0, d2, d30
|
||||
- vmlal.u16 q0, d3, d30
|
||||
- vshll.u16 q10, d22, #8
|
||||
- vmlsl.u16 q10, d22, d31
|
||||
- vmlal.u16 q10, d23, d31
|
||||
- vshrn.u32 d30, q0, #16
|
||||
- vshrn.u32 d31, q10, #16
|
||||
- vmovn.u16 d0, q15
|
||||
- vst1.32 {d0}, [OUT]!
|
||||
-.endm
|
||||
-
|
||||
-.macro bilinear_interpolate_four_pixels
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add X, X, UX
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d0}, [TMP1]
|
||||
- vld1.32 {d1}, [TMP2]
|
||||
- vmull.u8 q1, d0, d28
|
||||
- vmlal.u8 q1, d1, d29
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add X, X, UX
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d20}, [TMP1]
|
||||
- vld1.32 {d21}, [TMP2]
|
||||
- vmull.u8 q11, d20, d28
|
||||
- vmlal.u8 q11, d21, d29
|
||||
- vshr.u16 q15, q12, #8
|
||||
- vadd.u16 q12, q12, q13
|
||||
- vshll.u16 q0, d2, #8
|
||||
- vmlsl.u16 q0, d2, d30
|
||||
- vmlal.u16 q0, d3, d30
|
||||
- vshll.u16 q10, d22, #8
|
||||
- vmlsl.u16 q10, d22, d31
|
||||
- vmlal.u16 q10, d23, d31
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add X, X, UX
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d4}, [TMP1]
|
||||
- vld1.32 {d5}, [TMP2]
|
||||
- vmull.u8 q3, d4, d28
|
||||
- vmlal.u8 q3, d5, d29
|
||||
- mov TMP1, X, asr #16
|
||||
- mov TMP2, X, asr #16
|
||||
- add X, X, UX
|
||||
- add TMP1, TOP, TMP1, asl #2
|
||||
- add TMP2, BOTTOM, TMP2, asl #2
|
||||
- vld1.32 {d16}, [TMP1]
|
||||
- vld1.32 {d17}, [TMP2]
|
||||
- vmull.u8 q9, d16, d28
|
||||
- vmlal.u8 q9, d17, d29
|
||||
- vshr.u16 q15, q12, #8
|
||||
- vadd.u16 q12, q12, q13
|
||||
- vshll.u16 q2, d6, #8
|
||||
- vmlsl.u16 q2, d6, d30
|
||||
- vmlal.u16 q2, d7, d30
|
||||
- vshll.u16 q8, d18, #8
|
||||
- vmlsl.u16 q8, d18, d31
|
||||
- vmlal.u16 q8, d19, d31
|
||||
- vshrn.u32 d0, q0, #16
|
||||
- vshrn.u32 d1, q10, #16
|
||||
- vshrn.u32 d4, q2, #16
|
||||
- vshrn.u32 d5, q8, #16
|
||||
- vmovn.u16 d0, q0
|
||||
- vmovn.u16 d1, q2
|
||||
- vst1.32 {d0, d1}, [OUT]!
|
||||
-.endm
|
||||
-
|
||||
-
|
||||
-/*
|
||||
- * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out,
|
||||
- * const uint32_t * top,
|
||||
- * const uint32_t * bottom,
|
||||
- * int wt,
|
||||
- * int wb,
|
||||
- * pixman_fixed_t x,
|
||||
- * pixman_fixed_t ux,
|
||||
- * int width)
|
||||
- */
|
||||
-
|
||||
-pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
|
||||
- OUT .req r0
|
||||
- TOP .req r1
|
||||
- BOTTOM .req r2
|
||||
- WT .req r3
|
||||
- WB .req r4
|
||||
- X .req r5
|
||||
- UX .req r6
|
||||
- WIDTH .req ip
|
||||
- TMP1 .req r3
|
||||
- TMP2 .req r4
|
||||
-
|
||||
- mov ip, sp
|
||||
- push {r4, r5, r6, r7}
|
||||
- ldmia ip, {WB, X, UX, WIDTH}
|
||||
-
|
||||
- cmp WIDTH, #0
|
||||
- ble 3f
|
||||
- vdup.u16 q12, X
|
||||
- vdup.u16 q13, UX
|
||||
- vdup.u8 d28, WT
|
||||
- vdup.u8 d29, WB
|
||||
- vadd.u16 d25, d25, d26
|
||||
- vadd.u16 q13, q13, q13
|
||||
-
|
||||
- subs WIDTH, WIDTH, #4
|
||||
- blt 1f
|
||||
-0:
|
||||
- bilinear_interpolate_four_pixels
|
||||
- subs WIDTH, WIDTH, #4
|
||||
- bge 0b
|
||||
-1:
|
||||
- tst WIDTH, #2
|
||||
- beq 2f
|
||||
- bilinear_interpolate_two_pixels
|
||||
-2:
|
||||
- tst WIDTH, #1
|
||||
- beq 3f
|
||||
- bilinear_interpolate_last_pixel
|
||||
-3:
|
||||
- pop {r4, r5, r6, r7}
|
||||
- bx lr
|
||||
-
|
||||
- .unreq OUT
|
||||
- .unreq TOP
|
||||
- .unreq BOTTOM
|
||||
- .unreq WT
|
||||
- .unreq WB
|
||||
- .unreq X
|
||||
- .unreq UX
|
||||
- .unreq WIDTH
|
||||
- .unreq TMP1
|
||||
- .unreq TMP2
|
||||
-.endfunc
|
||||
-
|
||||
-.purgem bilinear_interpolate_last_pixel
|
||||
-.purgem bilinear_interpolate_two_pixels
|
||||
-.purgem bilinear_interpolate_four_pixels
|
||||
-
|
||||
/*
|
||||
* Bilinear scaling support code which tries to provide pixel fetching, color
|
||||
* format conversion, and interpolation as separate macros which can be used
|
||||
@@ -2810,3 +2622,6 @@ pixman_asm_function fname
|
||||
.endfunc
|
||||
|
||||
.endm
|
||||
+
|
||||
+generate_bilinear_scanline_func \
|
||||
+ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
From 2ee27e7d79637da9173ee1bf3423e5a81534ccb4 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 11:53:04 +0200
|
||||
Subject: [PATCH 19/22] ARM: NEON optimization for bilinear scaled 'src_8888_0565'
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s
|
||||
after: op=1, src=20028888, dst=10020565, speed=61.65 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 3 +++
|
||||
pixman/pixman-arm-neon.c | 5 +++++
|
||||
2 files changed, 8 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index 52dc444..f0b42ca 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2625,3 +2625,6 @@ pixman_asm_function fname
|
||||
|
||||
generate_bilinear_scanline_func \
|
||||
pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
|
||||
+
|
||||
+generate_bilinear_scanline_func \
|
||||
+ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
|
||||
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
|
||||
index 98ad5f2..ba6de66 100644
|
||||
--- a/pixman/pixman-arm-neon.c
|
||||
+++ b/pixman/pixman-arm-neon.c
|
||||
@@ -129,6 +129,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
|
||||
|
||||
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
|
||||
uint32_t, uint32_t)
|
||||
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
|
||||
+ uint32_t, uint16_t)
|
||||
|
||||
void
|
||||
pixman_composite_src_n_8_asm_neon (int32_t w,
|
||||
@@ -350,6 +352,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
|
||||
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
|
||||
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
|
||||
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
|
||||
+
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
From 29003c3befe2159396d181ef9ac1caaadcabf382 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 13:21:53 +0200
|
||||
Subject: [PATCH 20/22] ARM: NEON optimization for bilinear scaled 'src_0565_x888'
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s
|
||||
after: op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 3 +++
|
||||
pixman/pixman-arm-neon.c | 4 ++++
|
||||
2 files changed, 7 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index f0b42ca..9245db9 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2628,3 +2628,6 @@ generate_bilinear_scanline_func \
|
||||
|
||||
generate_bilinear_scanline_func \
|
||||
pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
|
||||
+
|
||||
+generate_bilinear_scanline_func \
|
||||
+ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
|
||||
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
|
||||
index ba6de66..18e26eb 100644
|
||||
--- a/pixman/pixman-arm-neon.c
|
||||
+++ b/pixman/pixman-arm-neon.c
|
||||
@@ -131,6 +131,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
|
||||
uint32_t, uint32_t)
|
||||
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
|
||||
uint32_t, uint16_t)
|
||||
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
|
||||
+ uint16_t, uint32_t)
|
||||
|
||||
void
|
||||
pixman_composite_src_n_8_asm_neon (int32_t w,
|
||||
@@ -355,6 +357,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
|
||||
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
|
||||
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
|
||||
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
|
||||
+
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
From fe99673719091d4a880d031add1369332a75731b Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 13:27:41 +0200
|
||||
Subject: [PATCH 21/22] ARM: NEON optimization for bilinear scaled 'src_0565_0565'
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
|
||||
after: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 3 +++
|
||||
pixman/pixman-arm-neon.c | 3 +++
|
||||
2 files changed, 6 insertions(+), 0 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index 9245db9..2b6875b 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2631,3 +2631,6 @@ generate_bilinear_scanline_func \
|
||||
|
||||
generate_bilinear_scanline_func \
|
||||
pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
|
||||
+
|
||||
+generate_bilinear_scanline_func \
|
||||
+ pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
|
||||
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
|
||||
index 18e26eb..0a10ca1 100644
|
||||
--- a/pixman/pixman-arm-neon.c
|
||||
+++ b/pixman/pixman-arm-neon.c
|
||||
@@ -133,6 +133,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
|
||||
uint32_t, uint16_t)
|
||||
PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
|
||||
uint16_t, uint32_t)
|
||||
+PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
|
||||
+ uint16_t, uint16_t)
|
||||
|
||||
void
|
||||
pixman_composite_src_n_8_asm_neon (int32_t w,
|
||||
@@ -358,6 +360,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
|
||||
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
|
||||
|
||||
SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
|
||||
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
|
||||
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
From 70a923882ca24664344ba91a649e7aa12c3063f7 Mon Sep 17 00:00:00 2001
|
||||
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||
Date: Wed, 9 Mar 2011 13:55:48 +0200
|
||||
Subject: [PATCH 22/22] ARM: a bit faster NEON bilinear scaling for r5g6b5 source images
|
||||
|
||||
Instructions scheduling improved in the code responsible for fetching r5g6b5
|
||||
pixels and converting them to the intermediate x8r8g8b8 color format used in
|
||||
the interpolation part of code. Still a lot of NEON stalls are remaining,
|
||||
which can be resolved later by the use of pipelining.
|
||||
|
||||
Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
|
||||
Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
|
||||
before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
|
||||
op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
|
||||
after: op=1, src=10020565, dst=10020565, speed=41.35 MPix/s
|
||||
op=1, src=10020565, dst=20020888, speed=49.16 MPix/s
|
||||
---
|
||||
pixman/pixman-arm-neon-asm.S | 118 +++++++++++++++++++++++++++++++++++------
|
||||
1 files changed, 100 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
|
||||
index 2b6875b..71b30ac 100644
|
||||
--- a/pixman/pixman-arm-neon-asm.S
|
||||
+++ b/pixman/pixman-arm-neon-asm.S
|
||||
@@ -2430,6 +2430,101 @@ fname:
|
||||
convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
|
||||
.endm
|
||||
|
||||
+.macro bilinear_load_and_vertical_interpolate_two_8888 \
|
||||
+ acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
|
||||
+
|
||||
+ bilinear_load_8888 reg1, reg2, tmp1
|
||||
+ vmull.u8 acc1, reg1, d28
|
||||
+ vmlal.u8 acc1, reg2, d29
|
||||
+ bilinear_load_8888 reg3, reg4, tmp2
|
||||
+ vmull.u8 acc2, reg3, d28
|
||||
+ vmlal.u8 acc2, reg4, d29
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_load_and_vertical_interpolate_four_8888 \
|
||||
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
|
||||
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
|
||||
+
|
||||
+ bilinear_load_and_vertical_interpolate_two_8888 \
|
||||
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
|
||||
+ bilinear_load_and_vertical_interpolate_two_8888 \
|
||||
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_load_and_vertical_interpolate_two_0565 \
|
||||
+ acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
|
||||
+
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ mov TMP4, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP2, asl #1
|
||||
+ add TMP2, BOTTOM, TMP2, asl #1
|
||||
+ add TMP3, TOP, TMP4, asl #1
|
||||
+ add TMP4, BOTTOM, TMP4, asl #1
|
||||
+ vld1.32 {acc2lo[0]}, [TMP1]
|
||||
+ vld1.32 {acc2hi[0]}, [TMP3]
|
||||
+ vld1.32 {acc2lo[1]}, [TMP2]
|
||||
+ vld1.32 {acc2hi[1]}, [TMP4]
|
||||
+ convert_0565_to_x888 acc2, reg3, reg2, reg1
|
||||
+ vzip.u8 reg1, reg3
|
||||
+ vzip.u8 reg2, reg4
|
||||
+ vzip.u8 reg3, reg4
|
||||
+ vzip.u8 reg1, reg2
|
||||
+ vmull.u8 acc1, reg1, d28
|
||||
+ vmlal.u8 acc1, reg2, d29
|
||||
+ vmull.u8 acc2, reg3, d28
|
||||
+ vmlal.u8 acc2, reg4, d29
|
||||
+.endm
|
||||
+
|
||||
+.macro bilinear_load_and_vertical_interpolate_four_0565 \
|
||||
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
|
||||
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
|
||||
+
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ mov TMP4, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP2, asl #1
|
||||
+ add TMP2, BOTTOM, TMP2, asl #1
|
||||
+ add TMP3, TOP, TMP4, asl #1
|
||||
+ add TMP4, BOTTOM, TMP4, asl #1
|
||||
+ vld1.32 {xacc2lo[0]}, [TMP1]
|
||||
+ vld1.32 {xacc2hi[0]}, [TMP3]
|
||||
+ vld1.32 {xacc2lo[1]}, [TMP2]
|
||||
+ vld1.32 {xacc2hi[1]}, [TMP4]
|
||||
+ convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
|
||||
+ mov TMP2, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ mov TMP4, X, asr #16
|
||||
+ add X, X, UX
|
||||
+ add TMP1, TOP, TMP2, asl #1
|
||||
+ add TMP2, BOTTOM, TMP2, asl #1
|
||||
+ add TMP3, TOP, TMP4, asl #1
|
||||
+ add TMP4, BOTTOM, TMP4, asl #1
|
||||
+ vld1.32 {yacc2lo[0]}, [TMP1]
|
||||
+ vzip.u8 xreg1, xreg3
|
||||
+ vld1.32 {yacc2hi[0]}, [TMP3]
|
||||
+ vzip.u8 xreg2, xreg4
|
||||
+ vld1.32 {yacc2lo[1]}, [TMP2]
|
||||
+ vzip.u8 xreg3, xreg4
|
||||
+ vld1.32 {yacc2hi[1]}, [TMP4]
|
||||
+ vzip.u8 xreg1, xreg2
|
||||
+ convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
|
||||
+ vmull.u8 xacc1, xreg1, d28
|
||||
+ vzip.u8 yreg1, yreg3
|
||||
+ vmlal.u8 xacc1, xreg2, d29
|
||||
+ vzip.u8 yreg2, yreg4
|
||||
+ vmull.u8 xacc2, xreg3, d28
|
||||
+ vzip.u8 yreg3, yreg4
|
||||
+ vmlal.u8 xacc2, xreg4, d29
|
||||
+ vzip.u8 yreg1, yreg2
|
||||
+ vmull.u8 yacc1, yreg1, d28
|
||||
+ vmlal.u8 yacc1, yreg2, d29
|
||||
+ vmull.u8 yacc2, yreg3, d28
|
||||
+ vmlal.u8 yacc2, yreg4, d29
|
||||
+.endm
|
||||
+
|
||||
.macro bilinear_store_8888 numpix, tmp1, tmp2
|
||||
.if numpix == 4
|
||||
vst1.32 {d0, d1}, [OUT]!
|
||||
@@ -2477,12 +2572,8 @@ fname:
|
||||
.endm
|
||||
|
||||
.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
|
||||
- bilinear_load_&src_fmt d0, d1, d2
|
||||
- vmull.u8 q1, d0, d28
|
||||
- vmlal.u8 q1, d1, d29
|
||||
- bilinear_load_&src_fmt d20, d21, d22
|
||||
- vmull.u8 q11, d20, d28
|
||||
- vmlal.u8 q11, d21, d29
|
||||
+ bilinear_load_and_vertical_interpolate_two_&src_fmt \
|
||||
+ q1, q11, d0, d1, d20, d21, d22, d23
|
||||
vshr.u16 q15, q12, #8
|
||||
vadd.u16 q12, q12, q13
|
||||
vshll.u16 q0, d2, #8
|
||||
@@ -2498,18 +2589,9 @@ fname:
|
||||
.endm
|
||||
|
||||
.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
|
||||
- bilinear_load_&src_fmt d0, d1, d2
|
||||
- vmull.u8 q1, d0, d28
|
||||
- vmlal.u8 q1, d1, d29
|
||||
- bilinear_load_&src_fmt d20, d21, d22
|
||||
- vmull.u8 q11, d20, d28
|
||||
- vmlal.u8 q11, d21, d29
|
||||
- bilinear_load_&src_fmt d4, d5, d6
|
||||
- vmull.u8 q3, d4, d28
|
||||
- vmlal.u8 q3, d5, d29
|
||||
- bilinear_load_&src_fmt d16, d17, d18
|
||||
- vmull.u8 q9, d16, d28
|
||||
- vmlal.u8 q9, d17, d29
|
||||
+ bilinear_load_and_vertical_interpolate_four_&src_fmt \
|
||||
+ q1, q11, d0, d1, d20, d21, d22, d23 \
|
||||
+ q3, q9, d4, d5, d16, d17, d18, d19
|
||||
pld [TMP1, PF_OFFS]
|
||||
vshr.u16 q15, q12, #8
|
||||
vadd.u16 q12, q12, q13
|
||||
--
|
||||
1.7.3.4
|
||||
|
||||
74
x11-libs/pixman/pixman-0.21.6.ebuild
Normal file
74
x11-libs/pixman/pixman-0.21.6.ebuild
Normal file
@@ -0,0 +1,74 @@
|
||||
# Copyright 1999-2011 Gentoo Foundation
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
# $Header: $
|
||||
|
||||
EAPI=3
|
||||
inherit xorg-2 toolchain-funcs versionator
|
||||
|
||||
EGIT_REPO_URI="git://anongit.freedesktop.org/git/pixman"
|
||||
DESCRIPTION="Low-level pixel manipulation routines"
|
||||
|
||||
KEYWORDS="~arm"
|
||||
IUSE="altivec mmx sse2 simd neon"
|
||||
|
||||
pkg_setup() {
|
||||
xorg-2_pkg_setup
|
||||
CONFIGURE_OPTIONS="
|
||||
$(use_enable altivec vmx)
|
||||
$(use_enable simd arm-simd)
|
||||
$(use_enable neon arm-neon)
|
||||
--disable-gtk"
|
||||
|
||||
local enable_mmx="$(use mmx && echo 1 || echo 0)"
|
||||
local enable_sse2="$(use sse2 && echo 1 || echo 0)"
|
||||
|
||||
# this block fixes bug #260287
|
||||
if use x86; then
|
||||
if use sse2 && ! $(version_is_at_least "4.2" "$(gcc-version)"); then
|
||||
ewarn "SSE2 instructions require GCC 4.2 or higher."
|
||||
ewarn "pixman will be built *without* SSE2 support"
|
||||
enable_sse2="0"
|
||||
fi
|
||||
fi
|
||||
|
||||
# this block fixes bug #236558
|
||||
case "$enable_mmx,$enable_sse2" in
|
||||
'1,1')
|
||||
CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --enable-sse2" ;;
|
||||
'1,0')
|
||||
CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --disable-sse2" ;;
|
||||
'0,1')
|
||||
ewarn "You enabled SSE2 but have MMX disabled. This is an invalid."
|
||||
ewarn "pixman will be built *without* MMX/SSE2 support."
|
||||
CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;;
|
||||
'0,0')
|
||||
CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
src_prepare() {
|
||||
epatch "${FILESDIR}"/0002-Fix-compilation-on-Win32.patch
|
||||
epatch "${FILESDIR}"/0003-test-Fix-tests-for-compilation-on-Windows.patch
|
||||
epatch "${FILESDIR}"/0004-test-Add-Makefile-for-Win32.patch
|
||||
epatch "${FILESDIR}"/0005-Do-not-include-unused-headers.patch
|
||||
epatch "${FILESDIR}"/0006-test-Silence-MSVC-warnings.patch
|
||||
epatch "${FILESDIR}"/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
|
||||
epatch "${FILESDIR}"/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
|
||||
epatch "${FILESDIR}"/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
|
||||
epatch "${FILESDIR}"/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
|
||||
epatch "${FILESDIR}"/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
|
||||
epatch "${FILESDIR}"/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
|
||||
epatch "${FILESDIR}"/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
|
||||
epatch "${FILESDIR}"/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
|
||||
epatch "${FILESDIR}"/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
|
||||
epatch "${FILESDIR}"/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
|
||||
epatch "${FILESDIR}"/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
|
||||
epatch "${FILESDIR}"/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
|
||||
epatch "${FILESDIR}"/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
|
||||
epatch "${FILESDIR}"/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
|
||||
epatch "${FILESDIR}"/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
|
||||
epatch "${FILESDIR}"/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
|
||||
|
||||
# We patch Makefile.am and such, so eautoreconf!
|
||||
eautoreconf
|
||||
}
|
||||
Reference in New Issue
Block a user