diff --git a/x11-libs/pixman/Manifest b/x11-libs/pixman/Manifest new file mode 100644 index 0000000..82d548b --- /dev/null +++ b/x11-libs/pixman/Manifest @@ -0,0 +1,23 @@ +AUX 0002-Fix-compilation-on-Win32.patch 1373 RMD160 27ab9d8e5ee15ca0ca2316c2088488f77cc04193 SHA1 b60a844b6f97405d5974838126ce1a581a5578fb SHA256 c167e98ac16db2f09d11e71b6acfc2436ea06ad5f5e91d829463e8a2428c8f1d +AUX 0003-test-Fix-tests-for-compilation-on-Windows.patch 5857 RMD160 c03eda4e6678e85da3d3aecb6f8af77bbbcba396 SHA1 78bc36587fabe4e5d984c1535d60bac1f27665bd SHA256 c17670b7a3603e3591e5f3264441b01dd1861c24681cea9bb63c1a4896471f09 +AUX 0004-test-Add-Makefile-for-Win32.patch 2285 RMD160 956800336268328f68cbf80fcf0f1e1a8254ed41 SHA1 9bf6452ad0982af23ead0ebfff1c3a46ab8a1454 SHA256 23b281492ad50c090c3ae3d501f92a039edcd35b4019ba60566394a9b9c99a41 +AUX 0005-Do-not-include-unused-headers.patch 1138 RMD160 e73bcdb3d39a3fe29a8d61fec12facbe0c15bb1f SHA1 77e320b1f0702e6b31214a7057c759f0cdec37fd SHA256 036fad75930a7a5981d0fe58749c1d1c7b066931d1bbcb7695ad8f45208c66e3 +AUX 0006-test-Silence-MSVC-warnings.patch 1879 RMD160 1ee25d5477740736c3bbb1c925f14fa45b9baea9 SHA1 17b823ed9bca1423ce3e7df6384820cff5b2c4f7 SHA256 dd835cb47e6f54c7295e181c8cab32924f5b7aa79be630b1dffd4987b04535a2 +AUX 0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch 18432 RMD160 45c3975ea38fdb4fe9ed927c60a020cf65c30726 SHA1 45023ba64a3c48d73d8d43b70dd38fa885b7ba7f SHA256 b96ae6c8bea2a900dd013f134f5223bf415fdc9f492f3854ee2b095451276857 +AUX 0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch 3637 RMD160 a8b2d0ddc050a521c1510b0f34c465e6e17d8b1d SHA1 f6b92ea26d7773cc826d63c175742194523b8480 SHA256 aa1354d2395925d53108269dc7f45ca4c16509318af794704c458339541d1ea8 +AUX 0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch 5981 RMD160 577a6b80b87d4687798e86bd9fe777a536ca9d76 SHA1 ffe4dfd7b3464bf6271ae869483124f4b8df7fd7 SHA256 ab8e918705c5d8bc24944a9b34f1a6d941d6f88cd16db4476566cb4bbf535039 +AUX 0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch 9330 RMD160 8601746564959d01c01993bd359c4b4db0beec8f SHA1 76b52d88701def10885f9f0592dea3b19707f6c1 SHA256 2da797eebe471eabca3da195ea295faf462c3f38330a11eee18e7247f7370477 +AUX 0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch 4741 RMD160 9054134e9c656a955c595bf004e51fc5652687fd SHA1 7c361a399fa4a20ca8e413b2cf0f29847d519cb1 SHA256 9c8fb1eb06e054fb0fdfece9c33e0b311a3949ba3550c4a95f5943a914e7a770 +AUX 0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch 1080 RMD160 6ca0c5e9597a765f03ac719357d9add04acdecea SHA1 6f74002a839afb6e5d91a8565776edfe19a29f07 SHA256 af280e15b33683841a7df486c8bbd21c9268958865652bdaa6389fdd3909a457 +AUX 0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch 2486 RMD160 cdb29a25ba6726bda75db4c2a37b29d628957085 SHA1 12465119abc6ae4aba91fb4ffa5e21c7d3044ad4 SHA256 88bc1c5118b1550f5b59a16a511da2675697f79bed863368d284e4a1b260f833 +AUX 0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch 4606 RMD160 2f826d65e0e3b80ad0fe3371137df128336c257f SHA1 75b051949e226dea0ac55d9a2618b688808793e8 SHA256 85d417a2160944b774dad489432b637a5662e0416a3919f095a93607772278ac +AUX 0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch 2712 RMD160 8d335c0f1585ad9882fdef63c14328f36a2a7eac SHA1 f0f024b9248a85bc59a503a0088c24a9b97b0646 SHA256 bd97cb792274b8d6d498f07479a314b2ef1d9059b11acc3979871d38abb30ceb +AUX 0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch 7188 RMD160 4fa7ee0d71533a47d699b78e8af9e83d60dae450 SHA1 7fa1dea003735971baf7f199a240c8eec6917f30 SHA256 6b99d26015f8953bf43cb3a7495a02efec4b807ce23166ccd5faca711acc2475 +AUX 0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch 8195 RMD160 4cff07d6bd52966a57148064f4a7a2a9da73838f SHA1 fdd5b62e8f33d0f3f245d86fb1567e26c829a051 SHA256 b17b03a4f7516de8bd803320310b26b2c09b694730ae4eddb5ae56a092da03bb +AUX 0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch 6752 RMD160 53aaedec3527bd2a280b4b503504233306f92505 SHA1 db2606645ad5eb1bb0ec42b14116ef3aed0b9a3f SHA256 671c3c9d910f4ed8631149b31795879d35399679b8136847f6dbc94dda885a6d +AUX 0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch 2083 RMD160 e4ba51ef2842f4cf42acbfc85dc2ecb09fabe655 SHA1 ca597361fc9da1d4b74a875c145af1cb7e4abb34 SHA256 929326bc1eca3e45a9a42dca9890e9a5422621e0587470a0eb28ed088ff097b3 +AUX 0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch 2063 RMD160 222b88cd6453fb4adc7f4c14b7b9d89a2c0e5c6d SHA1 f14961b189217cdb125d6ec8c002e8237c7f804a SHA256 ce0e82c68cef5fd9b4f7557ebb8c903bda7b09b496b04f6b7b229f42d7cd4bf0 +AUX 0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch 2058 RMD160 316bc2158c3dda9edef498582b316020e2a5e3ed SHA1 54d123fdb551a0dede24f5d53fa98e359d5d5d13 SHA256 c540fd8abdb2a23d445f53f0ff35905b27e4104fd4d9c9d59cce2d68d4970a58 +AUX 0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch 5971 RMD160 5a6956cbf719d928d1e6d5a4cd07f0ec22a62c41 SHA1 f04f6938337145bde6410ad8983e8a7e3749d9fa SHA256 5320bdbf0d1ddd3b753dedae61d5785b9db27ccdaeb49880508138c0e6113f57 +DIST pixman-0.21.6.tar.bz2 457580 RMD160 6ad5979d123e0268426c08954fd7f6040f7a3859 SHA1 73198f8f9159e3ffc2294806f32fa2c8042b57e6 SHA256 35a9fc00fc55c022318a7ac48eb52de60360beec36008b0037f944f3d0d62e83 +EBUILD pixman-0.21.6.ebuild 3157 RMD160 9188bc6dac468b30d4888a5a507d2c3145cb36c9 SHA1 a2b33238bc243e1e99b298e65d76482b2d32e73a SHA256 060cb6f797fa67bee354865a599f240150fe80373bab99f164dfd27d10aa76f2 diff --git a/x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch b/x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch new file mode 100644 index 0000000..905f29e --- /dev/null +++ b/x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch @@ -0,0 +1,42 @@ +From 20ed723a5a42fb8636bc9a5f32974dec1b66a785 Mon Sep 17 00:00:00 2001 +From: Andrea Canciani +Date: Thu, 24 Feb 2011 10:44:04 +0100 +Subject: [PATCH 02/22] Fix compilation on Win32 + +Makefile.win32 contained a typo and was missing the dependency from +the built sources. +--- + pixman/Makefile.win32 | 6 ++++-- + 1 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32 +index 775fb5e..b5f9397 100644 +--- a/pixman/Makefile.win32 ++++ b/pixman/Makefile.win32 +@@ -56,6 +56,8 @@ SOURCES = \ + pixman-general.c \ + $(NULL) + ++BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c ++ + # MMX compilation flags + ifeq ($(MMX_VAR),on) + CFLAGS += $(MMX_CFLAGS) +@@ -122,7 +124,7 @@ endif + endif + + # pixman compilation and linking +-$(CFG_VAR)/%.obj: %.c ++$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES) + @mkdir -p $(CFG_VAR) + @$(CC) -c $(CFLAGS) -Fo"$@" $< + +@@ -141,4 +143,4 @@ pixman-combine64.h: pixman-combine.h.template make-combine.pl + + clean_r: + @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0 +- @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c pixman-combine64.c pixman-combine64.h || exit 0 ++ @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0 +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0003-test-Fix-tests-for-compilation-on-Windows.patch b/x11-libs/pixman/files/0003-test-Fix-tests-for-compilation-on-Windows.patch new file mode 100644 index 0000000..3789c79 --- /dev/null +++ b/x11-libs/pixman/files/0003-test-Fix-tests-for-compilation-on-Windows.patch @@ -0,0 +1,232 @@ +From 11305b4ecdd36a17592c5c75de9157874853ab20 Mon Sep 17 00:00:00 2001 +From: Andrea Canciani +Date: Tue, 22 Feb 2011 21:46:37 +0100 +Subject: [PATCH 03/22] test: Fix tests for compilation on Windows + +The Microsoft C compiler cannot handle subobject initialization and +Win32 does not provide snprintf. + +Work around these limitations by using normal struct initialization +and using sprintf (a manual check shows that the buffer size is +sufficient). +--- + test/composite.c | 29 +++++++++++++-------------- + test/fetch-test.c | 52 ++++++++++++++++++++++---------------------------- + test/trap-crasher.c | 20 +++++++++--------- + 3 files changed, 47 insertions(+), 54 deletions(-) + +diff --git a/test/composite.c b/test/composite.c +index e14f954..08c6689 100644 +--- a/test/composite.c ++++ b/test/composite.c +@@ -617,18 +617,18 @@ eval_diff (color_t *expected, color_t *test, pixman_format_code_t format) + } + + static char * +-describe_image (image_t *info, char *buf, int buflen) ++describe_image (image_t *info, char *buf) + { + if (info->size) + { +- snprintf (buf, buflen, "%s %dx%d%s", +- info->format->name, +- info->size, info->size, +- info->repeat ? "R" :""); ++ sprintf (buf, "%s %dx%d%s", ++ info->format->name, ++ info->size, info->size, ++ info->repeat ? "R" :""); + } + else + { +- snprintf (buf, buflen, "solid"); ++ sprintf (buf, "solid"); + } + + return buf; +@@ -710,10 +710,9 @@ composite_test (image_t *dst, + { + char buf[40]; + +- snprintf (buf, sizeof (buf), +- "%s %scomposite", +- op->name, +- component_alpha ? "CA " : ""); ++ sprintf (buf, "%s %scomposite", ++ op->name, ++ component_alpha ? "CA " : ""); + + printf ("%s test error of %.4f --\n" + " R G B A\n" +@@ -735,9 +734,9 @@ composite_test (image_t *dst, + mask->color->b, mask->color->a, + dst->color->r, dst->color->g, + dst->color->b, dst->color->a); +- printf ("src: %s, ", describe_image (src, buf, sizeof (buf))); +- printf ("mask: %s, ", describe_image (mask, buf, sizeof (buf))); +- printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf))); ++ printf ("src: %s, ", describe_image (src, buf)); ++ printf ("mask: %s, ", describe_image (mask, buf)); ++ printf ("dst: %s\n\n", describe_image (dst, buf)); + } + else + { +@@ -747,8 +746,8 @@ composite_test (image_t *dst, + src->color->b, src->color->a, + dst->color->r, dst->color->g, + dst->color->b, dst->color->a); +- printf ("src: %s, ", describe_image (src, buf, sizeof (buf))); +- printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf))); ++ printf ("src: %s, ", describe_image (src, buf)); ++ printf ("dst: %s\n\n", describe_image (dst, buf)); + } + + success = FALSE; +diff --git a/test/fetch-test.c b/test/fetch-test.c +index 2ca16dd..314a072 100644 +--- a/test/fetch-test.c ++++ b/test/fetch-test.c +@@ -8,7 +8,7 @@ + + static pixman_indexed_t mono_palette = + { +- .rgba = { 0x00000000, 0x00ffffff }, ++ 0, { 0x00000000, 0x00ffffff }, + }; + + +@@ -24,57 +24,53 @@ typedef struct { + static testcase_t testcases[] = + { + { +- .format = PIXMAN_a8r8g8b8, +- .width = 2, .height = 2, +- .stride = 8, +- .src = { 0x00112233, 0x44556677, +- 0x8899aabb, 0xccddeeff }, +- .dst = { 0x00112233, 0x44556677, +- 0x8899aabb, 0xccddeeff }, +- .indexed = NULL, ++ PIXMAN_a8r8g8b8, ++ 2, 2, ++ 8, ++ { 0x00112233, 0x44556677, ++ 0x8899aabb, 0xccddeeff }, ++ { 0x00112233, 0x44556677, ++ 0x8899aabb, 0xccddeeff }, ++ NULL, + }, + { +- .format = PIXMAN_g1, +- .width = 8, .height = 2, +- .stride = 4, ++ PIXMAN_g1, ++ 8, 2, ++ 4, + #ifdef WORDS_BIGENDIAN +- .src = + { + 0xaa000000, + 0x55000000 + }, + #else +- .src = + { + 0x00000055, + 0x000000aa + }, + #endif +- .dst = + { + 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, + 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff + }, +- .indexed = &mono_palette, ++ &mono_palette, + }, + #if 0 + { +- .format = PIXMAN_g8, +- .width = 4, .height = 2, +- .stride = 4, +- .src = { 0x01234567, +- 0x89abcdef }, +- .dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767, +- 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, }, ++ PIXMAN_g8, ++ 4, 2, ++ 4, ++ { 0x01234567, ++ 0x89abcdef }, ++ { 0x00010101, 0x00232323, 0x00454545, 0x00676767, ++ 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, }, + }, + #endif + /* FIXME: make this work on big endian */ + { +- .format = PIXMAN_yv12, +- .width = 8, .height = 2, +- .stride = 8, ++ PIXMAN_yv12, ++ 8, 2, ++ 8, + #ifdef WORDS_BIGENDIAN +- .src = + { + 0x00ff00ff, 0x00ff00ff, + 0xff00ff00, 0xff00ff00, +@@ -82,7 +78,6 @@ static testcase_t testcases[] = + 0x800080ff + }, + #else +- .src = + { + 0xff00ff00, 0xff00ff00, + 0x00ff00ff, 0x00ff00ff, +@@ -90,7 +85,6 @@ static testcase_t testcases[] = + 0xff800080 + }, + #endif +- .dst = + { + 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113, + 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff, +diff --git a/test/trap-crasher.c b/test/trap-crasher.c +index 42b82f6..7485e62 100644 +--- a/test/trap-crasher.c ++++ b/test/trap-crasher.c +@@ -7,21 +7,21 @@ main() + pixman_image_t *dst; + pixman_trapezoid_t traps[1] = { + { +- .top = 2147483646, +- .bottom = 2147483647, +- .left = { +- .p1 = { .x = 0, .y = 0 }, +- .p2 = { .x = 0, .y = 2147483647 } ++ 2147483646, ++ 2147483647, ++ { ++ { 0, 0 }, ++ { 0, 2147483647 } + }, +- .right = { +- .p1 = { .x = 65536, .y = 0 }, +- .p2 = { .x = 0, .y = 2147483647 } ++ { ++ { 65536, 0 }, ++ { 0, 2147483647 } + } + }, + }; +- ++ + dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1); +- ++ + pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps); + return (0); + } +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch b/x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch new file mode 100644 index 0000000..eba6de1 --- /dev/null +++ b/x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch @@ -0,0 +1,92 @@ +From 72f5e5f608506c18c484bc5bc3e58bd83aeb7691 Mon Sep 17 00:00:00 2001 +From: Andrea Canciani +Date: Tue, 22 Feb 2011 22:04:49 +0100 +Subject: [PATCH 04/22] test: Add Makefile for Win32 + +--- + test/Makefile.win32 | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 73 insertions(+), 0 deletions(-) + create mode 100644 test/Makefile.win32 + +diff --git a/test/Makefile.win32 b/test/Makefile.win32 +new file mode 100644 +index 0000000..c71afe1 +--- /dev/null ++++ b/test/Makefile.win32 +@@ -0,0 +1,73 @@ ++CC = cl ++LINK = link ++ ++CFG_VAR = $(CFG) ++ifeq ($(CFG_VAR),) ++CFG_VAR=release ++endif ++ ++CFLAGS = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -D_BIND_TO_CURRENT_VCLIBS_VERSION -D_MT -I../pixman -I. -I../ ++TEST_LDADD = ../pixman/$(CFG_VAR)/pixman-1.lib ++INCLUDES = -I../pixman -I$(top_builddir)/pixman ++ ++# optimization flags ++ifeq ($(CFG_VAR),debug) ++CFLAGS += -Od -Zi ++else ++CFLAGS += -O2 ++endif ++ ++SOURCES = \ ++ a1-trap-test.c \ ++ pdf-op-test.c \ ++ region-test.c \ ++ region-translate-test.c \ ++ fetch-test.c \ ++ oob-test.c \ ++ trap-crasher.c \ ++ alpha-loop.c \ ++ scaling-crash-test.c \ ++ gradient-crash-test.c \ ++ alphamap.c \ ++ stress-test.c \ ++ composite-traps-test.c \ ++ blitters-test.c \ ++ scaling-test.c \ ++ affine-test.c \ ++ composite.c \ ++ utils.c ++ ++TESTS = \ ++ $(CFG_VAR)/a1-trap-test.exe \ ++ $(CFG_VAR)/pdf-op-test.exe \ ++ $(CFG_VAR)/region-test.exe \ ++ $(CFG_VAR)/region-translate-test.exe \ ++ $(CFG_VAR)/fetch-test.exe \ ++ $(CFG_VAR)/oob-test.exe \ ++ $(CFG_VAR)/trap-crasher.exe \ ++ $(CFG_VAR)/alpha-loop.exe \ ++ $(CFG_VAR)/scaling-crash-test.exe \ ++ $(CFG_VAR)/gradient-crash-test.exe \ ++ $(CFG_VAR)/alphamap.exe \ ++ $(CFG_VAR)/stress-test.exe \ ++ $(CFG_VAR)/composite-traps-test.exe \ ++ $(CFG_VAR)/blitters-test.exe \ ++ $(CFG_VAR)/scaling-test.exe \ ++ $(CFG_VAR)/affine-test.exe \ ++ $(CFG_VAR)/composite.exe ++ ++ ++OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES)) ++ ++$(CFG_VAR)/%.obj: %.c ++ @mkdir -p $(CFG_VAR) ++ @$(CC) -c $(CFLAGS) -Fo"$@" $< ++ ++$(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj ++ $(LINK) /NOLOGO /OUT:$@ $< $(CFG_VAR)/utils.obj $(TEST_LDADD) ++ ++all: $(OBJECTS) $(TESTS) ++ @exit 0 ++ ++clean: ++ @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb || exit 0 +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0005-Do-not-include-unused-headers.patch b/x11-libs/pixman/files/0005-Do-not-include-unused-headers.patch new file mode 100644 index 0000000..14111aa --- /dev/null +++ b/x11-libs/pixman/files/0005-Do-not-include-unused-headers.patch @@ -0,0 +1,40 @@ +From 8868778ea1fdc8e70da76b3b00ea78106c5840d8 Mon Sep 17 00:00:00 2001 +From: Andrea Canciani +Date: Tue, 22 Feb 2011 22:43:48 +0100 +Subject: [PATCH 05/22] Do not include unused headers + +pixman-combine32.h is included without being used both in +pixman-image.c and in pixman-general.c. +--- + pixman/pixman-general.c | 2 -- + pixman/pixman-image.c | 1 - + 2 files changed, 0 insertions(+), 3 deletions(-) + +diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c +index 16ea3a4..872fb7e 100644 +--- a/pixman/pixman-general.c ++++ b/pixman/pixman-general.c +@@ -36,8 +36,6 @@ + #include + #include + #include "pixman-private.h" +-#include "pixman-combine32.h" +-#include "pixman-private.h" + + static void + general_src_iter_init (pixman_implementation_t *imp, +diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c +index 9103ca6..84bacf8 100644 +--- a/pixman/pixman-image.c ++++ b/pixman/pixman-image.c +@@ -30,7 +30,6 @@ + #include + + #include "pixman-private.h" +-#include "pixman-combine32.h" + + pixman_bool_t + _pixman_init_gradient (gradient_t * gradient, +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch b/x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch new file mode 100644 index 0000000..d7e5716 --- /dev/null +++ b/x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch @@ -0,0 +1,63 @@ +From 9ebde285fa990bfa1524f166fbfb1368c346b14a Mon Sep 17 00:00:00 2001 +From: Andrea Canciani +Date: Thu, 24 Feb 2011 12:53:39 +0100 +Subject: [PATCH 06/22] test: Silence MSVC warnings + +MSVC does not notice non-returning functions (abort() / assert(0)) +and warns about paths which end with them in non-void functions: + +c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) : +warning C4715: 'reader' : not all control paths return a value +c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) : +warning C4715: 'real_reader' : not all control paths return a value +c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) : +warning C4715: 'calc_op' : not all control paths return a value + +These warnings can be silenced by adding a return after the +termination call. +--- + test/composite.c | 1 + + test/fetch-test.c | 1 + + test/stress-test.c | 2 +- + 3 files changed, 3 insertions(+), 1 deletions(-) + +diff --git a/test/composite.c b/test/composite.c +index 08c6689..a86e5ed 100644 +--- a/test/composite.c ++++ b/test/composite.c +@@ -426,6 +426,7 @@ calc_op (pixman_op_t op, double src, double dst, double srca, double dsta) + case PIXMAN_OP_HSL_LUMINOSITY: + default: + abort(); ++ return 0; /* silence MSVC */ + } + #undef mult_chan + } +diff --git a/test/fetch-test.c b/test/fetch-test.c +index 314a072..60bc765 100644 +--- a/test/fetch-test.c ++++ b/test/fetch-test.c +@@ -110,6 +110,7 @@ reader (const void *src, int size) + return *(uint32_t *)src; + default: + assert(0); ++ return 0; /* silence MSVC */ + } + } + +diff --git a/test/stress-test.c b/test/stress-test.c +index bcbc1f8..166dc6d 100644 +--- a/test/stress-test.c ++++ b/test/stress-test.c +@@ -128,7 +128,7 @@ real_reader (const void *src, int size) + return *(uint32_t *)src; + default: + assert (0); +- break; ++ return 0; /* silence MSVC */ + } + } + +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch b/x11-libs/pixman/files/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch new file mode 100644 index 0000000..a5ab646 --- /dev/null +++ b/x11-libs/pixman/files/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch @@ -0,0 +1,466 @@ +From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Mon, 21 Feb 2011 01:29:02 +0200 +Subject: [PATCH 07/22] Main loop template for fast single pass bilinear scaling + +Can be used for implementing SIMD optimized fast path +functions which work with bilinear scaled source images. + +Similar to the template for nearest scaling main loop, the +following types of mask are supported: +1. no mask +2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag +3. solid mask + +PAD repeat is fully supported. NONE repeat is partially +supported (right now only works if source image has alpha +channel or when alpha channel of the source image does not +have any effect on the compositing operation). +--- + pixman/pixman-fast-path.h | 432 +++++++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 432 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h +index d081222..1885d47 100644 +--- a/pixman/pixman-fast-path.h ++++ b/pixman/pixman-fast-path.h +@@ -587,4 +587,436 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + ++/*****************************************************************************/ ++ ++/* ++ * Identify 5 zones in each scanline for bilinear scaling. Depending on ++ * whether 2 pixels to be interpolated are fetched from the image itself, ++ * from the padding area around it or from both image and padding area. ++ */ ++static force_inline void ++bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, ++ pixman_fixed_t vx, ++ pixman_fixed_t unit_x, ++ int32_t * left_pad, ++ int32_t * left_tz, ++ int32_t * width, ++ int32_t * right_tz, ++ int32_t * right_pad) ++{ ++ int width1 = *width, left_pad1, right_pad1; ++ int width2 = *width, left_pad2, right_pad2; ++ ++ pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, ++ &width1, &left_pad1, &right_pad1); ++ pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, ++ unit_x, &width2, &left_pad2, &right_pad2); ++ ++ *left_pad = left_pad2; ++ *left_tz = left_pad1 - left_pad2; ++ *right_tz = right_pad2 - right_pad1; ++ *right_pad = right_pad1; ++ *width -= *left_pad + *left_tz + *right_tz + *right_pad; ++} ++ ++/* ++ * Main loop template for single pass bilinear scaling. It needs to be ++ * provided with 'scanline_func' which should do the compositing operation. ++ * The needed function has the following prototype: ++ * ++ * scanline_func (dst_type_t * dst, ++ * const mask_type_ * mask, ++ * const src_type_t * src_top, ++ * const src_type_t * src_bottom, ++ * int32_t width, ++ * int weight_top, ++ * int weight_bottom, ++ * pixman_fixed_t vx, ++ * pixman_fixed_t unit_x, ++ * pixman_fixed_t max_vx, ++ * pixman_bool_t zero_src) ++ * ++ * Where: ++ * dst - destination scanline buffer for storing results ++ * mask - mask buffer (or single value for solid mask) ++ * src_top, src_bottom - two source scanlines ++ * width - number of pixels to process ++ * weight_top - weight of the top row for interpolation ++ * weight_bottom - weight of the bottom row for interpolation ++ * vx - initial position for fetching the first pair of ++ * pixels from the source buffer ++ * unit_x - position increment needed to move to the next pair ++ * of pixels ++ * max_vx - image size as a fixed point value, can be used for ++ * implementing NORMAL repeat (when it is supported) ++ * zero_src - boolean hint variable, which is set to TRUE when ++ * all source pixels are fetched from zero padding ++ * zone for NONE repeat ++ * ++ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, ++ * but sometimes it may be less than that for NONE repeat when handling ++ * fuzzy antialiased top or bottom image edges. Also both top and ++ * bottom weight variables are guaranteed to have value in 0-255 ++ * range and can fit into unsigned byte or be used with 8-bit SIMD ++ * multiplication instructions. ++ */ ++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ ++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \ ++static void \ ++fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ ++ pixman_op_t op, \ ++ pixman_image_t * src_image, \ ++ pixman_image_t * mask_image, \ ++ pixman_image_t * dst_image, \ ++ int32_t src_x, \ ++ int32_t src_y, \ ++ int32_t mask_x, \ ++ int32_t mask_y, \ ++ int32_t dst_x, \ ++ int32_t dst_y, \ ++ int32_t width, \ ++ int32_t height) \ ++{ \ ++ dst_type_t *dst_line; \ ++ mask_type_t *mask_line; \ ++ src_type_t *src_first_line; \ ++ int y1, y2; \ ++ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ ++ pixman_vector_t v; \ ++ pixman_fixed_t vx, vy; \ ++ pixman_fixed_t unit_x, unit_y; \ ++ int32_t left_pad, left_tz, right_tz, right_pad; \ ++ \ ++ dst_type_t *dst; \ ++ mask_type_t solid_mask; \ ++ const mask_type_t *mask = &solid_mask; \ ++ int src_stride, mask_stride, dst_stride; \ ++ \ ++ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ ++ if (have_mask) \ ++ { \ ++ if (mask_is_solid) \ ++ { \ ++ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \ ++ mask_stride = 0; \ ++ } \ ++ else \ ++ { \ ++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ ++ mask_stride, mask_line, 1); \ ++ } \ ++ } \ ++ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ ++ * transformed from destination space to source space */ \ ++ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ ++ \ ++ /* reference point is the center of the pixel */ \ ++ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ ++ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ ++ v.vector[2] = pixman_fixed_1; \ ++ \ ++ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ ++ return; \ ++ \ ++ unit_x = src_image->common.transform->matrix[0][0]; \ ++ unit_y = src_image->common.transform->matrix[1][1]; \ ++ \ ++ v.vector[0] -= pixman_fixed_1 / 2; \ ++ v.vector[1] -= pixman_fixed_1 / 2; \ ++ \ ++ vy = v.vector[1]; \ ++ \ ++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ ++ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ ++ { \ ++ bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ ++ &left_pad, &left_tz, &width, &right_tz, &right_pad); \ ++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ ++ { \ ++ /* PAD repeat does not need special handling for 'transition zones' and */ \ ++ /* they can be combined with 'padding zones' safely */ \ ++ left_pad += left_tz; \ ++ right_pad += right_tz; \ ++ left_tz = right_tz = 0; \ ++ } \ ++ v.vector[0] += left_pad * unit_x; \ ++ } \ ++ \ ++ while (--height >= 0) \ ++ { \ ++ int weight1, weight2; \ ++ dst = dst_line; \ ++ dst_line += dst_stride; \ ++ vx = v.vector[0]; \ ++ if (have_mask && !mask_is_solid) \ ++ { \ ++ mask = mask_line; \ ++ mask_line += mask_stride; \ ++ } \ ++ \ ++ y1 = pixman_fixed_to_int (vy); \ ++ weight2 = (vy >> 8) & 0xff; \ ++ if (weight2) \ ++ { \ ++ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ ++ y2 = y1 + 1; \ ++ weight1 = 256 - weight2; \ ++ } \ ++ else \ ++ { \ ++ /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ ++ y2 = y1; \ ++ weight1 = weight2 = 128; \ ++ } \ ++ vy += unit_y; \ ++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ ++ { \ ++ src_type_t *src1, *src2; \ ++ src_type_t buf1[2]; \ ++ src_type_t buf2[2]; \ ++ repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ ++ repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ ++ src1 = src_first_line + src_stride * y1; \ ++ src2 = src_first_line + src_stride * y2; \ ++ \ ++ if (left_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = src1[0]; \ ++ buf2[0] = buf2[1] = src2[0]; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ ++ dst += left_pad; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += left_pad; \ ++ } \ ++ if (width > 0) \ ++ { \ ++ scanline_func (dst, mask, \ ++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ ++ dst += width; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += width; \ ++ } \ ++ if (right_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ ++ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ ++ } \ ++ } \ ++ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ ++ { \ ++ src_type_t *src1, *src2; \ ++ src_type_t buf1[2]; \ ++ src_type_t buf2[2]; \ ++ /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ ++ if (y1 < 0) \ ++ { \ ++ weight1 = 0; \ ++ y1 = 0; \ ++ } \ ++ if (y1 >= src_image->bits.height) \ ++ { \ ++ weight1 = 0; \ ++ y1 = src_image->bits.height - 1; \ ++ } \ ++ if (y2 < 0) \ ++ { \ ++ weight2 = 0; \ ++ y2 = 0; \ ++ } \ ++ if (y2 >= src_image->bits.height) \ ++ { \ ++ weight2 = 0; \ ++ y2 = src_image->bits.height - 1; \ ++ } \ ++ src1 = src_first_line + src_stride * y1; \ ++ src2 = src_first_line + src_stride * y2; \ ++ \ ++ if (left_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = 0; \ ++ buf2[0] = buf2[1] = 0; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ ++ dst += left_pad; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += left_pad; \ ++ } \ ++ if (left_tz > 0) \ ++ { \ ++ buf1[0] = 0; \ ++ buf1[1] = src1[0]; \ ++ buf2[0] = 0; \ ++ buf2[1] = src2[0]; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, left_tz, weight1, weight2, \ ++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \ ++ dst += left_tz; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += left_tz; \ ++ vx += left_tz * unit_x; \ ++ } \ ++ if (width > 0) \ ++ { \ ++ scanline_func (dst, mask, \ ++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ ++ dst += width; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += width; \ ++ vx += width * unit_x; \ ++ } \ ++ if (right_tz > 0) \ ++ { \ ++ buf1[0] = src1[src_image->bits.width - 1]; \ ++ buf1[1] = 0; \ ++ buf2[0] = src2[src_image->bits.width - 1]; \ ++ buf2[1] = 0; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, right_tz, weight1, weight2, \ ++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \ ++ dst += right_tz; \ ++ if (have_mask && !mask_is_solid) \ ++ mask += right_tz; \ ++ } \ ++ if (right_pad > 0) \ ++ { \ ++ buf1[0] = buf1[1] = 0; \ ++ buf2[0] = buf2[1] = 0; \ ++ scanline_func (dst, mask, \ ++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ ++ } \ ++ } \ ++ else \ ++ { \ ++ scanline_func (dst, mask, src_first_line + src_stride * y1, \ ++ src_first_line + src_stride * y2, width, \ ++ weight1, weight2, vx, unit_x, max_vx, FALSE); \ ++ } \ ++ } \ ++} ++ ++/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ ++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ ++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \ ++ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ ++ dst_type_t, repeat_mode, have_mask, mask_is_solid) ++ ++#define SCALED_BILINEAR_FLAGS \ ++ (FAST_PATH_SCALE_TRANSFORM | \ ++ FAST_PATH_NO_ALPHA_MAP | \ ++ FAST_PATH_BILINEAR_FILTER | \ ++ FAST_PATH_NO_ACCESSORS | \ ++ FAST_PATH_NARROW_FORMAT) ++ ++#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_PAD_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_null, 0, \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_NONE_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_null, 0, \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ ++ PIXMAN_null, 0, \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_PAD_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_NONE_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ ++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_PAD_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ (SCALED_BILINEAR_FLAGS | \ ++ FAST_PATH_NONE_REPEAT | \ ++ FAST_PATH_X_UNIT_POSITIVE), \ ++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ ++ } ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ ++ { PIXMAN_OP_ ## op, \ ++ PIXMAN_ ## s, \ ++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ ++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ ++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ ++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ ++ } ++ ++/* Prefer the use of 'cover' variant, because it is faster */ ++#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ ++ SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ ++ SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ ++ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func) ++ ++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ ++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ ++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ ++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func) ++ ++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ ++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ ++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ ++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) ++ + #endif +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch b/x11-libs/pixman/files/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch new file mode 100644 index 0000000..a492a8f --- /dev/null +++ b/x11-libs/pixman/files/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch @@ -0,0 +1,136 @@ +From 0df43b8ae5031dd83775d00b57b6bed809db0e89 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Mon, 21 Feb 2011 02:07:09 +0200 +Subject: [PATCH 08/22] test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds' + +Individual correctness check for the new bilinear scaling related +supplementary function. This test program uses a bit wider range +of input arguments, not covered by other tests. +--- + test/Makefile.am | 2 + + test/scaling-helpers-test.c | 93 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 95 insertions(+), 0 deletions(-) + create mode 100644 test/scaling-helpers-test.c + +diff --git a/test/Makefile.am b/test/Makefile.am +index 057e9ce..9dc7219 100644 +--- a/test/Makefile.am ++++ b/test/Makefile.am +@@ -13,6 +13,7 @@ TESTPROGRAMS = \ + trap-crasher \ + alpha-loop \ + scaling-crash-test \ ++ scaling-helpers-test \ + gradient-crash-test \ + alphamap \ + stress-test \ +@@ -33,6 +34,7 @@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h + composite_SOURCES = composite.c utils.c utils.h + gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h + stress_test_SOURCES = stress-test.c utils.c utils.h ++scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h + + # Benchmarks + +diff --git a/test/scaling-helpers-test.c b/test/scaling-helpers-test.c +new file mode 100644 +index 0000000..c186138 +--- /dev/null ++++ b/test/scaling-helpers-test.c +@@ -0,0 +1,93 @@ ++#include ++#include ++#include ++#include ++#include ++#include "utils.h" ++#include "pixman-fast-path.h" ++ ++/* A trivial reference implementation for ++ * 'bilinear_pad_repeat_get_scanline_bounds' ++ */ ++static void ++bilinear_pad_repeat_get_scanline_bounds_ref (int32_t source_image_width, ++ pixman_fixed_t vx_, ++ pixman_fixed_t unit_x, ++ int32_t * left_pad, ++ int32_t * left_tz, ++ int32_t * width, ++ int32_t * right_tz, ++ int32_t * right_pad) ++{ ++ int w = *width; ++ *left_pad = 0; ++ *left_tz = 0; ++ *width = 0; ++ *right_tz = 0; ++ *right_pad = 0; ++ int64_t vx = vx_; ++ while (--w >= 0) ++ { ++ if (vx < 0) ++ { ++ if (vx + pixman_fixed_1 < 0) ++ *left_pad += 1; ++ else ++ *left_tz += 1; ++ } ++ else if (vx + pixman_fixed_1 >= pixman_int_to_fixed (source_image_width)) ++ { ++ if (vx >= pixman_int_to_fixed (source_image_width)) ++ *right_pad += 1; ++ else ++ *right_tz += 1; ++ } ++ else ++ { ++ *width += 1; ++ } ++ vx += unit_x; ++ } ++} ++ ++int ++main (void) ++{ ++ int i; ++ for (i = 0; i < 10000; i++) ++ { ++ int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1; ++ int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2; ++ pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16); ++ int32_t width = lcg_rand_N(10000); ++ int32_t source_image_width = lcg_rand_N(10000) + 1; ++ pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1; ++ width1 = width2 = width; ++ ++ bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width, ++ vx, ++ unit_x, ++ &left_pad1, ++ &left_tz1, ++ &width1, ++ &right_tz1, ++ &right_pad1); ++ ++ bilinear_pad_repeat_get_scanline_bounds (source_image_width, ++ vx, ++ unit_x, ++ &left_pad2, ++ &left_tz2, ++ &width2, ++ &right_tz2, ++ &right_pad2); ++ ++ assert (left_pad1 == left_pad2); ++ assert (left_tz1 == left_tz2); ++ assert (width1 == width2); ++ assert (right_tz1 == right_tz2); ++ assert (right_pad1 == right_pad2); ++ } ++ ++ return 0; ++} +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch b/x11-libs/pixman/files/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch new file mode 100644 index 0000000..58f8a5b --- /dev/null +++ b/x11-libs/pixman/files/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch @@ -0,0 +1,156 @@ +From 350029396d911941591149cc82b5e68a78ad6747 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Mon, 21 Feb 2011 20:18:02 +0200 +Subject: [PATCH 09/22] SSE2 optimization for bilinear scaled 'src_8888_8888' + +A primitive naive implementation of bilinear scaling using SSE2 intrinsics, +which only handles one pixel at a time. It is approximately 2x faster than +pixman general compositing path. Single pass processing without intermediate +temporary buffer contributes to ~15% and loop unrolling contributes to ~20% +of this speedup. + +Benchmark on Intel Core i7 (x86-64): + Using cairo-perf-trace: + before: image firefox-planet-gnome 12.566 12.610 0.23% 6/6 + after: image firefox-planet-gnome 10.961 11.013 0.19% 5/6 + + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s + after: op=1, src=20028888, dst=20028888, speed=165.38 MPix/s +--- + pixman/pixman-sse2.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 files changed, 112 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c +index 88287b4..696005f 100644 +--- a/pixman/pixman-sse2.c ++++ b/pixman/pixman-sse2.c +@@ -5567,6 +5567,114 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) + ++static void ++bilinear_interpolate_line_sse2 (uint32_t * out, ++ const uint32_t * top, ++ const uint32_t * bottom, ++ int wt, ++ int wb, ++ pixman_fixed_t x, ++ pixman_fixed_t ux, ++ int width) ++{ ++ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); ++ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); ++ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff); ++ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); ++ const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux); ++ const __m128i xmm_zero = _mm_setzero_si128 (); ++ __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x); ++ uint32_t pix1, pix2, pix3, pix4; ++ ++ #define INTERPOLATE_ONE_PIXEL(pix) \ ++ do { \ ++ __m128i xmm_wh, xmm_lo, xmm_hi, a; \ ++ /* fetch 2x2 pixel block into sse2 register */ \ ++ uint32_t tl = top [pixman_fixed_to_int (x)]; \ ++ uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \ ++ uint32_t bl = bottom [pixman_fixed_to_int (x)]; \ ++ uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \ ++ a = _mm_set_epi32 (tr, tl, br, bl); \ ++ x += ux; \ ++ /* vertical interpolation */ \ ++ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \ ++ xmm_wt), \ ++ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \ ++ xmm_wb)); \ ++ /* calculate horizontal weights */ \ ++ xmm_wh = _mm_add_epi16 (xmm_addc, \ ++ _mm_xor_si128 (xmm_xorc, \ ++ _mm_srli_epi16 (xmm_x, 8))); \ ++ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \ ++ /* horizontal interpolation */ \ ++ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \ ++ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \ ++ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \ ++ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \ ++ /* shift and pack the result */ \ ++ a = _mm_srli_epi32 (a, 16); \ ++ a = _mm_packs_epi32 (a, a); \ ++ a = _mm_packus_epi16 (a, a); \ ++ pix = _mm_cvtsi128_si32 (a); \ ++ } while (0) ++ ++ while ((width -= 4) >= 0) ++ { ++ INTERPOLATE_ONE_PIXEL (pix1); ++ INTERPOLATE_ONE_PIXEL (pix2); ++ INTERPOLATE_ONE_PIXEL (pix3); ++ INTERPOLATE_ONE_PIXEL (pix4); ++ *out++ = pix1; ++ *out++ = pix2; ++ *out++ = pix3; ++ *out++ = pix4; ++ } ++ if (width & 2) ++ { ++ INTERPOLATE_ONE_PIXEL (pix1); ++ INTERPOLATE_ONE_PIXEL (pix2); ++ *out++ = pix1; ++ *out++ = pix2; ++ } ++ if (width & 1) ++ { ++ INTERPOLATE_ONE_PIXEL (pix1); ++ *out = pix1; ++ } ++ ++ #undef INTERPOLATE_ONE_PIXEL ++} ++ ++static force_inline void ++scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, ++ const uint32_t * mask, ++ const uint32_t * src_top, ++ const uint32_t * src_bottom, ++ int32_t w, ++ int wt, ++ int wb, ++ pixman_fixed_t vx, ++ pixman_fixed_t unit_x, ++ pixman_fixed_t max_vx, ++ pixman_bool_t zero_src) ++{ ++ bilinear_interpolate_line_sse2 (dst, src_top, src_bottom, ++ wt, wb, vx, unit_x, w); ++} ++ ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC, ++ scaled_bilinear_scanline_sse2_8888_8888_SRC, ++ uint32_t, uint32_t, uint32_t, ++ COVER, FALSE, FALSE) ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC, ++ scaled_bilinear_scanline_sse2_8888_8888_SRC, ++ uint32_t, uint32_t, uint32_t, ++ PAD, FALSE, FALSE) ++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC, ++ scaled_bilinear_scanline_sse2_8888_8888_SRC, ++ uint32_t, uint32_t, uint32_t, ++ NONE, FALSE, FALSE) ++ + static const pixman_fast_path_t sse2_fast_paths[] = + { + /* PIXMAN_OP_OVER */ +@@ -5668,6 +5776,10 @@ static const pixman_fast_path_t sse2_fast_paths[] = + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + ++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888), ++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888), ++ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888), ++ + { PIXMAN_OP_NONE }, + }; + +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch b/x11-libs/pixman/files/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch new file mode 100644 index 0000000..e68a0f7 --- /dev/null +++ b/x11-libs/pixman/files/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch @@ -0,0 +1,288 @@ +From 17feaa9c50bb8521b0366345efe181bd99754957 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Tue, 22 Feb 2011 18:45:03 +0200 +Subject: [PATCH 10/22] ARM: NEON optimization for bilinear scaled 'src_8888_8888' + +Initial NEON optimization for bilinear scaling. Can be probably +improved more. + +Benchmark on ARM Cortex-A8: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s + after: op=1, src=20028888, dst=20028888, speed=44.27 MPix/s +--- + pixman/pixman-arm-neon-asm.S | 197 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 45 ++++++++++ + 2 files changed, 242 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 47daf45..c168e10 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2391,3 +2391,200 @@ generate_composite_function_nearest_scanline \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ ++ ++/******************************************************************************/ ++ ++/* Supplementary macro for setting function attributes */ ++.macro pixman_asm_function fname ++ .func fname ++ .global fname ++#ifdef __ELF__ ++ .hidden fname ++ .type fname, %function ++#endif ++fname: ++.endm ++ ++.macro bilinear_interpolate_last_pixel ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d0}, [TMP1] ++ vshr.u16 d30, d24, #8 ++ vld1.32 {d1}, [TMP2] ++ vmull.u8 q1, d0, d28 ++ vmlal.u8 q1, d1, d29 ++ /* 5 cycles bubble */ ++ vshll.u16 q0, d2, #8 ++ vmlsl.u16 q0, d2, d30 ++ vmlal.u16 q0, d3, d30 ++ /* 5 cycles bubble */ ++ vshrn.u32 d0, q0, #16 ++ /* 3 cycles bubble */ ++ vmovn.u16 d0, q0 ++ /* 1 cycle bubble */ ++ vst1.32 {d0[0]}, [OUT, :32]! ++.endm ++ ++.macro bilinear_interpolate_two_pixels ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d0}, [TMP1] ++ vld1.32 {d1}, [TMP2] ++ vmull.u8 q1, d0, d28 ++ vmlal.u8 q1, d1, d29 ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d20}, [TMP1] ++ vld1.32 {d21}, [TMP2] ++ vmull.u8 q11, d20, d28 ++ vmlal.u8 q11, d21, d29 ++ vshr.u16 q15, q12, #8 ++ vadd.u16 q12, q12, q13 ++ vshll.u16 q0, d2, #8 ++ vmlsl.u16 q0, d2, d30 ++ vmlal.u16 q0, d3, d30 ++ vshll.u16 q10, d22, #8 ++ vmlsl.u16 q10, d22, d31 ++ vmlal.u16 q10, d23, d31 ++ vshrn.u32 d30, q0, #16 ++ vshrn.u32 d31, q10, #16 ++ vmovn.u16 d0, q15 ++ vst1.32 {d0}, [OUT]! ++.endm ++ ++.macro bilinear_interpolate_four_pixels ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d0}, [TMP1] ++ vld1.32 {d1}, [TMP2] ++ vmull.u8 q1, d0, d28 ++ vmlal.u8 q1, d1, d29 ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d20}, [TMP1] ++ vld1.32 {d21}, [TMP2] ++ vmull.u8 q11, d20, d28 ++ vmlal.u8 q11, d21, d29 ++ vshr.u16 q15, q12, #8 ++ vadd.u16 q12, q12, q13 ++ vshll.u16 q0, d2, #8 ++ vmlsl.u16 q0, d2, d30 ++ vmlal.u16 q0, d3, d30 ++ vshll.u16 q10, d22, #8 ++ vmlsl.u16 q10, d22, d31 ++ vmlal.u16 q10, d23, d31 ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d4}, [TMP1] ++ vld1.32 {d5}, [TMP2] ++ vmull.u8 q3, d4, d28 ++ vmlal.u8 q3, d5, d29 ++ mov TMP1, X, asr #16 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP1, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {d16}, [TMP1] ++ vld1.32 {d17}, [TMP2] ++ vmull.u8 q9, d16, d28 ++ vmlal.u8 q9, d17, d29 ++ vshr.u16 q15, q12, #8 ++ vadd.u16 q12, q12, q13 ++ vshll.u16 q2, d6, #8 ++ vmlsl.u16 q2, d6, d30 ++ vmlal.u16 q2, d7, d30 ++ vshll.u16 q8, d18, #8 ++ vmlsl.u16 q8, d18, d31 ++ vmlal.u16 q8, d19, d31 ++ vshrn.u32 d0, q0, #16 ++ vshrn.u32 d1, q10, #16 ++ vshrn.u32 d4, q2, #16 ++ vshrn.u32 d5, q8, #16 ++ vmovn.u16 d0, q0 ++ vmovn.u16 d1, q2 ++ vst1.32 {d0, d1}, [OUT]! ++.endm ++ ++ ++/* ++ * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out, ++ * const uint32_t * top, ++ * const uint32_t * bottom, ++ * int wt, ++ * int wb, ++ * pixman_fixed_t x, ++ * pixman_fixed_t ux, ++ * int width) ++ */ ++ ++pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon ++ OUT .req r0 ++ TOP .req r1 ++ BOTTOM .req r2 ++ WT .req r3 ++ WB .req r4 ++ X .req r5 ++ UX .req r6 ++ WIDTH .req ip ++ TMP1 .req r3 ++ TMP2 .req r4 ++ ++ mov ip, sp ++ push {r4, r5, r6, r7} ++ ldmia ip, {WB, X, UX, WIDTH} ++ ++ cmp WIDTH, #0 ++ ble 3f ++ vdup.u16 q12, X ++ vdup.u16 q13, UX ++ vdup.u8 d28, WT ++ vdup.u8 d29, WB ++ vadd.u16 d25, d25, d26 ++ vadd.u16 q13, q13, q13 ++ ++ subs WIDTH, WIDTH, #4 ++ blt 1f ++0: ++ bilinear_interpolate_four_pixels ++ subs WIDTH, WIDTH, #4 ++ bge 0b ++1: ++ tst WIDTH, #2 ++ beq 2f ++ bilinear_interpolate_two_pixels ++2: ++ tst WIDTH, #1 ++ beq 3f ++ bilinear_interpolate_last_pixel ++3: ++ pop {r4, r5, r6, r7} ++ bx lr ++ ++ .unreq OUT ++ .unreq TOP ++ .unreq BOTTOM ++ .unreq WT ++ .unreq WB ++ .unreq X ++ .unreq UX ++ .unreq WIDTH ++ .unreq TMP1 ++ .unreq TMP2 ++.endfunc +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 3e0c0d1..c7c0254 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -232,6 +232,47 @@ pixman_blt_neon (uint32_t *src_bits, + } + } + ++void ++pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out, ++ const uint32_t * top, ++ const uint32_t * bottom, ++ int wt, ++ int wb, ++ pixman_fixed_t x, ++ pixman_fixed_t ux, ++ int width); ++ ++static force_inline void ++scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst, ++ const uint32_t * mask, ++ const uint32_t * src_top, ++ const uint32_t * src_bottom, ++ int32_t w, ++ int wt, ++ int wb, ++ pixman_fixed_t vx, ++ pixman_fixed_t unit_x, ++ pixman_fixed_t max_vx, ++ pixman_bool_t zero_src) ++{ ++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, ++ src_bottom, wt, wb, ++ vx, unit_x, w); ++} ++ ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC, ++ scaled_bilinear_scanline_neon_8888_8888_SRC, ++ uint32_t, uint32_t, uint32_t, ++ COVER, FALSE, FALSE) ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC, ++ scaled_bilinear_scanline_neon_8888_8888_SRC, ++ uint32_t, uint32_t, uint32_t, ++ PAD, FALSE, FALSE) ++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC, ++ scaled_bilinear_scanline_neon_8888_8888_SRC, ++ uint32_t, uint32_t, uint32_t, ++ NONE, FALSE, FALSE) ++ + static const pixman_fast_path_t arm_neon_fast_paths[] = + { + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), +@@ -343,6 +384,10 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + ++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888), ++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), ++ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), ++ + { PIXMAN_OP_NONE }, + }; + +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch b/x11-libs/pixman/files/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch new file mode 100644 index 0000000..4370eb0 --- /dev/null +++ b/x11-libs/pixman/files/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch @@ -0,0 +1,156 @@ +From 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= +Date: Mon, 7 Mar 2011 13:45:54 -0500 +Subject: [PATCH 11/22] test: In image_endian_swap() use pixman_image_get_format() to get the bpp. + +There is no reason to pass in the bpp as an argument; it can be gotten +directly from the image. +--- + test/affine-test.c | 6 +++--- + test/blitters-test.c | 4 ++-- + test/composite-traps-test.c | 2 +- + test/scaling-test.c | 6 +++--- + test/utils.c | 9 +++++++-- + test/utils.h | 2 +- + 6 files changed, 17 insertions(+), 12 deletions(-) + +diff --git a/test/affine-test.c b/test/affine-test.c +index b7a1fa6..ed8000c 100644 +--- a/test/affine-test.c ++++ b/test/affine-test.c +@@ -95,8 +95,8 @@ test_composite (int testnum, + dst_img = pixman_image_create_bits ( + dst_fmt, dst_width, dst_height, dstbuf, dst_stride); + +- image_endian_swap (src_img, src_bpp * 8); +- image_endian_swap (dst_img, dst_bpp * 8); ++ image_endian_swap (src_img); ++ image_endian_swap (dst_img); + + pixman_transform_init_identity (&transform); + +@@ -251,7 +251,7 @@ test_composite (int testnum, + dstbuf[i] &= 0xFFFFFF; + } + +- image_endian_swap (dst_img, dst_bpp * 8); ++ image_endian_swap (dst_img); + + if (verbose) + { +diff --git a/test/blitters-test.c b/test/blitters-test.c +index 42181ef..63e7cb3 100644 +--- a/test/blitters-test.c ++++ b/test/blitters-test.c +@@ -61,7 +61,7 @@ create_random_image (pixman_format_code_t *allowed_formats, + pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)])); + } + +- image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt)); ++ image_endian_swap (img); + + if (used_fmt) *used_fmt = fmt; + return img; +@@ -101,7 +101,7 @@ free_random_image (uint32_t initcrc, + /* swap endiannes in order to provide identical results on both big + * and litte endian systems + */ +- image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt)); ++ image_endian_swap (img); + crc32 = compute_crc32 (initcrc, data, stride * height); + } + +diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c +index 8f32778..298537d 100644 +--- a/test/composite-traps-test.c ++++ b/test/composite-traps-test.c +@@ -218,7 +218,7 @@ test_composite (int testnum, + dst_bits[i] &= 0xFFFFFF; + } + +- image_endian_swap (dst_img, dst_bpp * 8); ++ image_endian_swap (dst_img); + + if (verbose) + { +diff --git a/test/scaling-test.c b/test/scaling-test.c +index dbb9d39..82370f7 100644 +--- a/test/scaling-test.c ++++ b/test/scaling-test.c +@@ -140,8 +140,8 @@ test_composite (int testnum, + dst_img = pixman_image_create_bits ( + dst_fmt, dst_width, dst_height, dstbuf, dst_stride); + +- image_endian_swap (src_img, src_bpp * 8); +- image_endian_swap (dst_img, dst_bpp * 8); ++ image_endian_swap (src_img); ++ image_endian_swap (dst_img); + + if (lcg_rand_n (4) > 0) + { +@@ -330,7 +330,7 @@ test_composite (int testnum, + dstbuf[i] &= 0xFFFFFF; + } + +- image_endian_swap (dst_img, dst_bpp * 8); ++ image_endian_swap (dst_img); + + if (verbose) + { +diff --git a/test/utils.c b/test/utils.c +index 2f21398..4bf02e1 100644 +--- a/test/utils.c ++++ b/test/utils.c +@@ -133,11 +133,12 @@ compute_crc32 (uint32_t in_crc32, + /* perform endian conversion of pixel data + */ + void +-image_endian_swap (pixman_image_t *img, int bpp) ++image_endian_swap (pixman_image_t *img) + { + int stride = pixman_image_get_stride (img); + uint32_t *data = pixman_image_get_data (img); + int height = pixman_image_get_height (img); ++ int bpp = PIXMAN_FORMAT_BPP (pixman_image_get_format (img)); + int i, j; + + /* swap bytes only on big endian systems */ +@@ -145,10 +146,13 @@ image_endian_swap (pixman_image_t *img, int bpp) + if (*(volatile uint8_t *)&endian_check_var != 0x12) + return; + ++ if (bpp == 8) ++ return; ++ + for (i = 0; i < height; i++) + { + uint8_t *line_data = (uint8_t *)data + stride * i; +- /* swap bytes only for 16, 24 and 32 bpp for now */ ++ + switch (bpp) + { + case 1: +@@ -208,6 +212,7 @@ image_endian_swap (pixman_image_t *img, int bpp) + } + break; + default: ++ assert (FALSE); + break; + } + } +diff --git a/test/utils.h b/test/utils.h +index 9c7bdb1..a5183f7 100644 +--- a/test/utils.h ++++ b/test/utils.h +@@ -60,7 +60,7 @@ compute_crc32 (uint32_t in_crc32, + /* perform endian conversion of pixel data + */ + void +-image_endian_swap (pixman_image_t *img, int bpp); ++image_endian_swap (pixman_image_t *img); + + /* Allocate memory that is bounded by protected pages, + * so that out-of-bounds access will cause segfaults +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch b/x11-libs/pixman/files/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch new file mode 100644 index 0000000..375e253 --- /dev/null +++ b/x11-libs/pixman/files/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch @@ -0,0 +1,36 @@ +From 84e361c8e357e26f299213fbeefe64c73447b116 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= +Date: Fri, 4 Mar 2011 15:51:18 -0500 +Subject: [PATCH 12/22] test: Do endian swapping of the source and destination images. + +Otherwise the test fails on big endian. Fix for bug 34767, reported by +Siarhei Siamashka. +--- + test/composite-traps-test.c | 4 ++++ + 1 files changed, 4 insertions(+), 0 deletions(-) + +diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c +index 298537d..cf30281 100644 +--- a/test/composite-traps-test.c ++++ b/test/composite-traps-test.c +@@ -139,6 +139,8 @@ test_composite (int testnum, + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } ++ ++ image_endian_swap (src_img); + } + + /* Create destination image */ +@@ -157,6 +159,8 @@ test_composite (int testnum, + + dst_img = pixman_image_create_bits ( + dst_format, dst_width, dst_height, dst_bits, dst_stride); ++ ++ image_endian_swap (dst_img); + } + + /* Create traps */ +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch b/x11-libs/pixman/files/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch new file mode 100644 index 0000000..3a04397 --- /dev/null +++ b/x11-libs/pixman/files/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch @@ -0,0 +1,77 @@ +From bb3d1b67fd0f42ae00af811c624ea1c44541034d Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Sun, 6 Mar 2011 16:17:12 +0200 +Subject: [PATCH 13/22] ARM: use prefetch in nearest scaled 'src_0565_0565' + +Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s + after: op=1, src=10020565, dst=10020565, speed=73.63 MPix/s + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s + after: op=1, src=10020565, dst=10020565, speed=267.50 MPix/s +--- + pixman/pixman-arm-simd-asm.S | 27 +++++++++++++++++++++++++-- + 1 files changed, 25 insertions(+), 2 deletions(-) + +diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S +index 7567700..dd1366d 100644 +--- a/pixman/pixman-arm-simd-asm.S ++++ b/pixman/pixman-arm-simd-asm.S +@@ -348,6 +348,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + TMP1 .req r4 + TMP2 .req r5 + VXMASK .req r6 ++ PF_OFFS .req r7 + + ldr UNIT_X, [sp] + push {r4, r5, r6, r7} +@@ -366,12 +367,33 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + strh TMP2, [DST], #2 + .endm + ++ /* ++ * stop prefetch before reaching the end of scanline (a good behaving ++ * value selected based on some benchmarks with short scanlines) ++ */ ++ #define PREFETCH_BRAKING_DISTANCE 32 ++ + /* now do the scaling */ + and TMP1, VXMASK, VX, lsr #15 + add VX, VX, UNIT_X +- subs W, #4 ++ subs W, #(8 + PREFETCH_BRAKING_DISTANCE) ++ blt 2f ++ /* set prefetch distance to 80 pixels ahead */ ++ add PF_OFFS, VX, UNIT_X, lsl #6 ++ add PF_OFFS, PF_OFFS, UNIT_X, lsl #4 ++1: /* main loop, process 8 pixels per iteration with prefetch */ ++ subs W, W, #8 ++ add PF_OFFS, UNIT_X, lsl #3 ++ scale_2_pixels ++ scale_2_pixels ++ scale_2_pixels ++ scale_2_pixels ++ pld [SRC, PF_OFFS, lsr #15] ++ bge 1b ++2: ++ subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE) + blt 2f +-1: /* main loop, process 4 pixels per iteration */ ++1: /* process the remaining pixels */ + scale_2_pixels + scale_2_pixels + subs W, W, #4 +@@ -394,6 +416,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + .unreq TMP1 + .unreq TMP2 + .unreq VXMASK ++ .unreq PF_OFFS + /* return */ + pop {r4, r5, r6, r7} + bx lr +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch b/x11-libs/pixman/files/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch new file mode 100644 index 0000000..d22df37 --- /dev/null +++ b/x11-libs/pixman/files/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch @@ -0,0 +1,131 @@ +From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Mon, 7 Mar 2011 03:10:43 +0200 +Subject: [PATCH 14/22] ARM: common macro for nearest scaling fast paths + +The code of nearest scaled 'src_0565_0565' function was generalized +and moved to a common macro, so that it can be reused for other +fast paths. +--- + pixman/pixman-arm-simd-asm.S | 60 +++++++++++++++++++++++++---------------- + 1 files changed, 36 insertions(+), 24 deletions(-) + +diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S +index dd1366d..a9775e2 100644 +--- a/pixman/pixman-arm-simd-asm.S ++++ b/pixman/pixman-arm-simd-asm.S +@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6 + .endfunc + + /* +- * Note: This function is only using armv4t instructions (not even armv6), ++ * Note: This code is only using armv5te instructions (not even armv6), + * but is scheduled for ARM Cortex-A8 pipeline. So it might need to + * be split into a few variants, tuned for each microarchitecture. + * + * TODO: In order to get good performance on ARM9/ARM11 cores (which don't + * have efficient write combining), it needs to be changed to use 16-byte + * aligned writes using STM instruction. ++ * ++ * Nearest scanline scaler macro template uses the following arguments: ++ * fname - name of the function to generate ++ * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes ++ * t - type suffix for LDR/STR instructions ++ * prefetch_distance - prefetch in the source image by that many ++ * pixels ahead ++ * prefetch_braking_distance - stop prefetching when that many pixels are ++ * remaining before the end of scanline + */ +-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 ++ ++.macro generate_nearest_scanline_func fname, bpp_shift, t, \ ++ prefetch_distance, \ ++ prefetch_braking_distance ++ ++pixman_asm_function fname + W .req r0 + DST .req r1 + SRC .req r2 +@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + + ldr UNIT_X, [sp] + push {r4, r5, r6, r7} +- mvn VXMASK, #1 ++ mvn VXMASK, #((1 << bpp_shift) - 1) + + /* define helper macro */ + .macro scale_2_pixels +- ldrh TMP1, [SRC, TMP1] +- and TMP2, VXMASK, VX, lsr #15 ++ ldr&t TMP1, [SRC, TMP1] ++ and TMP2, VXMASK, VX, lsr #(16 - bpp_shift) + add VX, VX, UNIT_X +- strh TMP1, [DST], #2 ++ str&t TMP1, [DST], #(1 << bpp_shift) + +- ldrh TMP2, [SRC, TMP2] +- and TMP1, VXMASK, VX, lsr #15 ++ ldr&t TMP2, [SRC, TMP2] ++ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) + add VX, VX, UNIT_X +- strh TMP2, [DST], #2 ++ str&t TMP2, [DST], #(1 << bpp_shift) + .endm + +- /* +- * stop prefetch before reaching the end of scanline (a good behaving +- * value selected based on some benchmarks with short scanlines) +- */ +- #define PREFETCH_BRAKING_DISTANCE 32 +- + /* now do the scaling */ +- and TMP1, VXMASK, VX, lsr #15 ++ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift) + add VX, VX, UNIT_X +- subs W, #(8 + PREFETCH_BRAKING_DISTANCE) ++ subs W, W, #(8 + prefetch_braking_distance) + blt 2f +- /* set prefetch distance to 80 pixels ahead */ +- add PF_OFFS, VX, UNIT_X, lsl #6 +- add PF_OFFS, PF_OFFS, UNIT_X, lsl #4 ++ /* calculate prefetch offset */ ++ mov PF_OFFS, #prefetch_distance ++ mla PF_OFFS, UNIT_X, PF_OFFS, VX + 1: /* main loop, process 8 pixels per iteration with prefetch */ + subs W, W, #8 + add PF_OFFS, UNIT_X, lsl #3 +@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + scale_2_pixels + scale_2_pixels + scale_2_pixels +- pld [SRC, PF_OFFS, lsr #15] ++ pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)] + bge 1b + 2: +- subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE) ++ subs W, W, #(4 - 8 - prefetch_braking_distance) + blt 2f + 1: /* process the remaining pixels */ + scale_2_pixels +@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + scale_2_pixels + 2: + tst W, #1 +- ldrneh TMP1, [SRC, TMP1] +- strneh TMP1, [DST], #2 ++ ldrne&t TMP1, [SRC, TMP1] ++ strne&t TMP1, [DST] + /* cleanup helper macro */ + .purgem scale_2_pixels + .unreq DST +@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6 + pop {r4, r5, r6, r7} + bx lr + .endfunc ++.endm ++ ++generate_nearest_scanline_func \ ++ pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch b/x11-libs/pixman/files/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch new file mode 100644 index 0000000..99d746e --- /dev/null +++ b/x11-libs/pixman/files/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch @@ -0,0 +1,60 @@ +From 5921c17639fe5fdc595c850e3347281c1c8746ba Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Sun, 6 Mar 2011 22:16:32 +0200 +Subject: [PATCH 15/22] ARM: assembly optimized nearest scaled 'src_8888_8888' + +Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s + after: op=1, src=20028888, dst=20028888, speed=39.79 MPix/s + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s + after: op=1, src=20028888, dst=20028888, speed=163.12 MPix/s +--- + pixman/pixman-arm-simd-asm.S | 3 +++ + pixman/pixman-arm-simd.c | 9 +++++++++ + 2 files changed, 12 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S +index a9775e2..858c690 100644 +--- a/pixman/pixman-arm-simd-asm.S ++++ b/pixman/pixman-arm-simd-asm.S +@@ -433,3 +433,6 @@ pixman_asm_function fname + + generate_nearest_scanline_func \ + pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32 ++ ++generate_nearest_scanline_func \ ++ pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32 +diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c +index 6bbc109..a66f8df 100644 +--- a/pixman/pixman-arm-simd.c ++++ b/pixman/pixman-arm-simd.c +@@ -389,6 +389,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888, + + PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC, + uint16_t, uint16_t) ++PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC, ++ uint32_t, uint32_t) + + static const pixman_fast_path_t arm_simd_fast_paths[] = + { +@@ -411,6 +413,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565), + PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565), + ++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888), ++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888), ++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888), ++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888), ++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888), ++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888), ++ + { PIXMAN_OP_NONE }, + }; + +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch b/x11-libs/pixman/files/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch new file mode 100644 index 0000000..3131b7c --- /dev/null +++ b/x11-libs/pixman/files/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch @@ -0,0 +1,130 @@ +From 66f4ee1b3bccf4516433d61dbf2035551a712fa2 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 10:59:46 +0200 +Subject: [PATCH 16/22] ARM: new bilinear fast path template macro in 'pixman-arm-common.h' + +It can be reused in different ARM NEON bilinear scaling fast path functions. +--- + pixman/pixman-arm-common.h | 45 ++++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 44 ++---------------------------------------- + 2 files changed, 48 insertions(+), 41 deletions(-) + +diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h +index 9b1322b..c3bf986 100644 +--- a/pixman/pixman-arm-common.h ++++ b/pixman/pixman-arm-common.h +@@ -361,4 +361,49 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + ++/*****************************************************************************/ ++ ++#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \ ++ src_type, dst_type) \ ++void \ ++pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ ++ dst_type * dst, \ ++ const src_type * top, \ ++ const src_type * bottom, \ ++ int wt, \ ++ int wb, \ ++ pixman_fixed_t x, \ ++ pixman_fixed_t ux, \ ++ int width); \ ++ \ ++static force_inline void \ ++scaled_bilinear_scanline_##cputype##_##name##_##op ( \ ++ dst_type * dst, \ ++ const uint32_t * mask, \ ++ const src_type * src_top, \ ++ const src_type * src_bottom, \ ++ int32_t w, \ ++ int wt, \ ++ int wb, \ ++ pixman_fixed_t vx, \ ++ pixman_fixed_t unit_x, \ ++ pixman_fixed_t max_vx, \ ++ pixman_bool_t zero_src) \ ++{ \ ++ if ((flags & SKIP_ZERO_SRC) && zero_src) \ ++ return; \ ++ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ ++ dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ ++} \ ++ \ ++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ ++ scaled_bilinear_scanline_##cputype##_##name##_##op, \ ++ src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \ ++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ ++ scaled_bilinear_scanline_##cputype##_##name##_##op, \ ++ src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \ ++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ ++ scaled_bilinear_scanline_##cputype##_##name##_##op, \ ++ src_type, uint32_t, dst_type, PAD, FALSE, FALSE) ++ + #endif +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index c7c0254..98ad5f2 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -127,6 +127,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + ++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, ++ uint32_t, uint32_t) ++ + void + pixman_composite_src_n_8_asm_neon (int32_t w, + int32_t h, +@@ -232,47 +235,6 @@ pixman_blt_neon (uint32_t *src_bits, + } + } + +-void +-pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out, +- const uint32_t * top, +- const uint32_t * bottom, +- int wt, +- int wb, +- pixman_fixed_t x, +- pixman_fixed_t ux, +- int width); +- +-static force_inline void +-scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst, +- const uint32_t * mask, +- const uint32_t * src_top, +- const uint32_t * src_bottom, +- int32_t w, +- int wt, +- int wb, +- pixman_fixed_t vx, +- pixman_fixed_t unit_x, +- pixman_fixed_t max_vx, +- pixman_bool_t zero_src) +-{ +- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top, +- src_bottom, wt, wb, +- vx, unit_x, w); +-} +- +-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC, +- scaled_bilinear_scanline_neon_8888_8888_SRC, +- uint32_t, uint32_t, uint32_t, +- COVER, FALSE, FALSE) +-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC, +- scaled_bilinear_scanline_neon_8888_8888_SRC, +- uint32_t, uint32_t, uint32_t, +- PAD, FALSE, FALSE) +-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC, +- scaled_bilinear_scanline_neon_8888_8888_SRC, +- uint32_t, uint32_t, uint32_t, +- NONE, FALSE, FALSE) +- + static const pixman_fast_path_t arm_neon_fast_paths[] = + { + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565), +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch b/x11-libs/pixman/files/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch new file mode 100644 index 0000000..853e221 --- /dev/null +++ b/x11-libs/pixman/files/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch @@ -0,0 +1,271 @@ +From 34098dba6763afd3636a14f9c2a079ab08f23b2d Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 11:34:15 +0200 +Subject: [PATCH 17/22] ARM: NEON: common macro template for bilinear scanline scalers + +This allows to generate bilinear scanline scaling functions targeting +various source and destination color formats. Right now a8r8g8b8/x8r8g8b8 +and r5g6b5 color formats are supported. More formats can be added if needed. +--- + pixman/pixman-arm-neon-asm.S | 222 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon-asm.h | 17 +++ + 2 files changed, 239 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index c168e10..f3784f5 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2588,3 +2588,225 @@ pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon + .unreq TMP1 + .unreq TMP2 + .endfunc ++ ++.purgem bilinear_interpolate_last_pixel ++.purgem bilinear_interpolate_two_pixels ++.purgem bilinear_interpolate_four_pixels ++ ++/* ++ * Bilinear scaling support code which tries to provide pixel fetching, color ++ * format conversion, and interpolation as separate macros which can be used ++ * as the basic building blocks for constructing bilinear scanline functions. ++ */ ++ ++.macro bilinear_load_8888 reg1, reg2, tmp ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP2, asl #2 ++ add TMP2, BOTTOM, TMP2, asl #2 ++ vld1.32 {reg1}, [TMP1] ++ vld1.32 {reg2}, [TMP2] ++.endm ++ ++.macro bilinear_load_0565 reg1, reg2, tmp ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP2, asl #1 ++ add TMP2, BOTTOM, TMP2, asl #1 ++ vld1.32 {reg2[0]}, [TMP1] ++ vld1.32 {reg2[1]}, [TMP2] ++ convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp ++.endm ++ ++.macro bilinear_store_8888 numpix, tmp1, tmp2 ++.if numpix == 4 ++ vst1.32 {d0, d1}, [OUT]! ++.elseif numpix == 2 ++ vst1.32 {d0}, [OUT]! ++.elseif numpix == 1 ++ vst1.32 {d0[0]}, [OUT, :32]! ++.else ++ .error bilinear_store_8888 numpix is unsupported ++.endif ++.endm ++ ++.macro bilinear_store_0565 numpix, tmp1, tmp2 ++ vuzp.u8 d0, d1 ++ vuzp.u8 d2, d3 ++ vuzp.u8 d1, d3 ++ vuzp.u8 d0, d2 ++ convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2 ++.if numpix == 4 ++ vst1.16 {d2}, [OUT]! ++.elseif numpix == 2 ++ vst1.32 {d2[0]}, [OUT]! ++.elseif numpix == 1 ++ vst1.16 {d2[0]}, [OUT]! ++.else ++ .error bilinear_store_0565 numpix is unsupported ++.endif ++.endm ++ ++.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt ++ bilinear_load_&src_fmt d0, d1, d2 ++ vmull.u8 q1, d0, d28 ++ vmlal.u8 q1, d1, d29 ++ vshr.u16 d30, d24, #8 ++ /* 4 cycles bubble */ ++ vshll.u16 q0, d2, #8 ++ vmlsl.u16 q0, d2, d30 ++ vmlal.u16 q0, d3, d30 ++ /* 5 cycles bubble */ ++ vshrn.u32 d0, q0, #16 ++ /* 3 cycles bubble */ ++ vmovn.u16 d0, q0 ++ /* 1 cycle bubble */ ++ bilinear_store_&dst_fmt 1, q2, q3 ++.endm ++ ++.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt ++ bilinear_load_&src_fmt d0, d1, d2 ++ vmull.u8 q1, d0, d28 ++ vmlal.u8 q1, d1, d29 ++ bilinear_load_&src_fmt d20, d21, d22 ++ vmull.u8 q11, d20, d28 ++ vmlal.u8 q11, d21, d29 ++ vshr.u16 q15, q12, #8 ++ vadd.u16 q12, q12, q13 ++ vshll.u16 q0, d2, #8 ++ vmlsl.u16 q0, d2, d30 ++ vmlal.u16 q0, d3, d30 ++ vshll.u16 q10, d22, #8 ++ vmlsl.u16 q10, d22, d31 ++ vmlal.u16 q10, d23, d31 ++ vshrn.u32 d30, q0, #16 ++ vshrn.u32 d31, q10, #16 ++ vmovn.u16 d0, q15 ++ bilinear_store_&dst_fmt 2, q2, q3 ++.endm ++ ++.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt ++ bilinear_load_&src_fmt d0, d1, d2 ++ vmull.u8 q1, d0, d28 ++ vmlal.u8 q1, d1, d29 ++ bilinear_load_&src_fmt d20, d21, d22 ++ vmull.u8 q11, d20, d28 ++ vmlal.u8 q11, d21, d29 ++ bilinear_load_&src_fmt d4, d5, d6 ++ vmull.u8 q3, d4, d28 ++ vmlal.u8 q3, d5, d29 ++ bilinear_load_&src_fmt d16, d17, d18 ++ vmull.u8 q9, d16, d28 ++ vmlal.u8 q9, d17, d29 ++ pld [TMP1, PF_OFFS] ++ vshr.u16 q15, q12, #8 ++ vadd.u16 q12, q12, q13 ++ vshll.u16 q0, d2, #8 ++ vmlsl.u16 q0, d2, d30 ++ vmlal.u16 q0, d3, d30 ++ vshll.u16 q10, d22, #8 ++ vmlsl.u16 q10, d22, d31 ++ vmlal.u16 q10, d23, d31 ++ vshr.u16 q15, q12, #8 ++ vshll.u16 q2, d6, #8 ++ vmlsl.u16 q2, d6, d30 ++ vmlal.u16 q2, d7, d30 ++ vshll.u16 q8, d18, #8 ++ pld [TMP2, PF_OFFS] ++ vmlsl.u16 q8, d18, d31 ++ vmlal.u16 q8, d19, d31 ++ vadd.u16 q12, q12, q13 ++ vshrn.u32 d0, q0, #16 ++ vshrn.u32 d1, q10, #16 ++ vshrn.u32 d4, q2, #16 ++ vshrn.u32 d5, q8, #16 ++ vmovn.u16 d0, q0 ++ vmovn.u16 d1, q2 ++ bilinear_store_&dst_fmt 4, q2, q3 ++.endm ++ ++/* ++ * Main template macro for generating NEON optimized bilinear scanline ++ * functions. ++ * ++ * TODO: use software pipelining and aligned writes to the destination buffer ++ * in order to improve performance ++ * ++ * Bilinear scanline scaler macro template uses the following arguments: ++ * fname - name of the function to generate ++ * src_fmt - source color format (8888 or 0565) ++ * dst_fmt - destination color format (8888 or 0565) ++ * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes ++ * prefetch_distance - prefetch in the source image by that many ++ * pixels ahead ++ */ ++ ++.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \ ++ bpp_shift, prefetch_distance ++ ++pixman_asm_function fname ++ OUT .req r0 ++ TOP .req r1 ++ BOTTOM .req r2 ++ WT .req r3 ++ WB .req r4 ++ X .req r5 ++ UX .req r6 ++ WIDTH .req ip ++ TMP1 .req r3 ++ TMP2 .req r4 ++ PF_OFFS .req r7 ++ TMP3 .req r8 ++ TMP4 .req r9 ++ ++ mov ip, sp ++ push {r4, r5, r6, r7, r8, r9} ++ mov PF_OFFS, #prefetch_distance ++ ldmia ip, {WB, X, UX, WIDTH} ++ mul PF_OFFS, PF_OFFS, UX ++ ++ cmp WIDTH, #0 ++ ble 3f ++ ++ vdup.u16 q12, X ++ vdup.u16 q13, UX ++ vdup.u8 d28, WT ++ vdup.u8 d29, WB ++ vadd.u16 d25, d25, d26 ++ vadd.u16 q13, q13, q13 ++ ++ subs WIDTH, WIDTH, #4 ++ blt 1f ++ mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) ++0: ++ bilinear_interpolate_four_pixels src_fmt, dst_fmt ++ subs WIDTH, WIDTH, #4 ++ bge 0b ++1: ++ tst WIDTH, #2 ++ beq 2f ++ bilinear_interpolate_two_pixels src_fmt, dst_fmt ++2: ++ tst WIDTH, #1 ++ beq 3f ++ bilinear_interpolate_last_pixel src_fmt, dst_fmt ++3: ++ pop {r4, r5, r6, r7, r8, r9} ++ bx lr ++ ++ .unreq OUT ++ .unreq TOP ++ .unreq BOTTOM ++ .unreq WT ++ .unreq WB ++ .unreq X ++ .unreq UX ++ .unreq WIDTH ++ .unreq TMP1 ++ .unreq TMP2 ++ .unreq PF_OFFS ++ .unreq TMP3 ++ .unreq TMP4 ++.endfunc ++ ++.endm +diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h +index 24fa361..97adc6a 100644 +--- a/pixman/pixman-arm-neon-asm.h ++++ b/pixman/pixman-arm-neon-asm.h +@@ -1158,3 +1158,20 @@ fname: + vsri.u16 out, tmp1, #5 + vsri.u16 out, tmp2, #11 + .endm ++ ++/* ++ * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels ++ * returned in (out0, out1) registers pair. Requires one temporary ++ * 64-bit register (tmp). 'out1' and 'in' may overlap, the original ++ * value from 'in' is lost ++ */ ++.macro convert_four_0565_to_x888_packed in, out0, out1, tmp ++ vshl.u16 out0, in, #5 /* G top 6 bits */ ++ vshl.u16 tmp, in, #11 /* B top 5 bits */ ++ vsri.u16 in, in, #5 /* R is ready in top bits */ ++ vsri.u16 out0, out0, #6 /* G is ready in top bits */ ++ vsri.u16 tmp, tmp, #5 /* B is ready in top bits */ ++ vshr.u16 out1, in, #8 /* R is in place */ ++ vsri.u16 out0, tmp, #8 /* G & B is in place */ ++ vzip.u16 out0, out1 /* everything is in place */ ++.endm +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch b/x11-libs/pixman/files/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch new file mode 100644 index 0000000..2913568 --- /dev/null +++ b/x11-libs/pixman/files/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch @@ -0,0 +1,226 @@ +From 11a0c5badbc59ce967707ef836313cc98f8aec4e Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 11:46:48 +0200 +Subject: [PATCH 18/22] ARM: use common macro template for bilinear scaled 'src_8888_8888' + +This is a cleanup for old and now duplicated code. The performance improvement +is mostly coming from the enabled use of software prefetch, but instructions +scheduling is also slightly better. + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s + after: op=1, src=20028888, dst=20028888, speed=74.36 MPix/s +--- + pixman/pixman-arm-neon-asm.S | 191 +----------------------------------------- + 1 files changed, 3 insertions(+), 188 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index f3784f5..52dc444 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2405,194 +2405,6 @@ generate_composite_function_nearest_scanline \ + fname: + .endm + +-.macro bilinear_interpolate_last_pixel +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d0}, [TMP1] +- vshr.u16 d30, d24, #8 +- vld1.32 {d1}, [TMP2] +- vmull.u8 q1, d0, d28 +- vmlal.u8 q1, d1, d29 +- /* 5 cycles bubble */ +- vshll.u16 q0, d2, #8 +- vmlsl.u16 q0, d2, d30 +- vmlal.u16 q0, d3, d30 +- /* 5 cycles bubble */ +- vshrn.u32 d0, q0, #16 +- /* 3 cycles bubble */ +- vmovn.u16 d0, q0 +- /* 1 cycle bubble */ +- vst1.32 {d0[0]}, [OUT, :32]! +-.endm +- +-.macro bilinear_interpolate_two_pixels +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add X, X, UX +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d0}, [TMP1] +- vld1.32 {d1}, [TMP2] +- vmull.u8 q1, d0, d28 +- vmlal.u8 q1, d1, d29 +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add X, X, UX +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d20}, [TMP1] +- vld1.32 {d21}, [TMP2] +- vmull.u8 q11, d20, d28 +- vmlal.u8 q11, d21, d29 +- vshr.u16 q15, q12, #8 +- vadd.u16 q12, q12, q13 +- vshll.u16 q0, d2, #8 +- vmlsl.u16 q0, d2, d30 +- vmlal.u16 q0, d3, d30 +- vshll.u16 q10, d22, #8 +- vmlsl.u16 q10, d22, d31 +- vmlal.u16 q10, d23, d31 +- vshrn.u32 d30, q0, #16 +- vshrn.u32 d31, q10, #16 +- vmovn.u16 d0, q15 +- vst1.32 {d0}, [OUT]! +-.endm +- +-.macro bilinear_interpolate_four_pixels +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add X, X, UX +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d0}, [TMP1] +- vld1.32 {d1}, [TMP2] +- vmull.u8 q1, d0, d28 +- vmlal.u8 q1, d1, d29 +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add X, X, UX +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d20}, [TMP1] +- vld1.32 {d21}, [TMP2] +- vmull.u8 q11, d20, d28 +- vmlal.u8 q11, d21, d29 +- vshr.u16 q15, q12, #8 +- vadd.u16 q12, q12, q13 +- vshll.u16 q0, d2, #8 +- vmlsl.u16 q0, d2, d30 +- vmlal.u16 q0, d3, d30 +- vshll.u16 q10, d22, #8 +- vmlsl.u16 q10, d22, d31 +- vmlal.u16 q10, d23, d31 +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add X, X, UX +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d4}, [TMP1] +- vld1.32 {d5}, [TMP2] +- vmull.u8 q3, d4, d28 +- vmlal.u8 q3, d5, d29 +- mov TMP1, X, asr #16 +- mov TMP2, X, asr #16 +- add X, X, UX +- add TMP1, TOP, TMP1, asl #2 +- add TMP2, BOTTOM, TMP2, asl #2 +- vld1.32 {d16}, [TMP1] +- vld1.32 {d17}, [TMP2] +- vmull.u8 q9, d16, d28 +- vmlal.u8 q9, d17, d29 +- vshr.u16 q15, q12, #8 +- vadd.u16 q12, q12, q13 +- vshll.u16 q2, d6, #8 +- vmlsl.u16 q2, d6, d30 +- vmlal.u16 q2, d7, d30 +- vshll.u16 q8, d18, #8 +- vmlsl.u16 q8, d18, d31 +- vmlal.u16 q8, d19, d31 +- vshrn.u32 d0, q0, #16 +- vshrn.u32 d1, q10, #16 +- vshrn.u32 d4, q2, #16 +- vshrn.u32 d5, q8, #16 +- vmovn.u16 d0, q0 +- vmovn.u16 d1, q2 +- vst1.32 {d0, d1}, [OUT]! +-.endm +- +- +-/* +- * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out, +- * const uint32_t * top, +- * const uint32_t * bottom, +- * int wt, +- * int wb, +- * pixman_fixed_t x, +- * pixman_fixed_t ux, +- * int width) +- */ +- +-pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon +- OUT .req r0 +- TOP .req r1 +- BOTTOM .req r2 +- WT .req r3 +- WB .req r4 +- X .req r5 +- UX .req r6 +- WIDTH .req ip +- TMP1 .req r3 +- TMP2 .req r4 +- +- mov ip, sp +- push {r4, r5, r6, r7} +- ldmia ip, {WB, X, UX, WIDTH} +- +- cmp WIDTH, #0 +- ble 3f +- vdup.u16 q12, X +- vdup.u16 q13, UX +- vdup.u8 d28, WT +- vdup.u8 d29, WB +- vadd.u16 d25, d25, d26 +- vadd.u16 q13, q13, q13 +- +- subs WIDTH, WIDTH, #4 +- blt 1f +-0: +- bilinear_interpolate_four_pixels +- subs WIDTH, WIDTH, #4 +- bge 0b +-1: +- tst WIDTH, #2 +- beq 2f +- bilinear_interpolate_two_pixels +-2: +- tst WIDTH, #1 +- beq 3f +- bilinear_interpolate_last_pixel +-3: +- pop {r4, r5, r6, r7} +- bx lr +- +- .unreq OUT +- .unreq TOP +- .unreq BOTTOM +- .unreq WT +- .unreq WB +- .unreq X +- .unreq UX +- .unreq WIDTH +- .unreq TMP1 +- .unreq TMP2 +-.endfunc +- +-.purgem bilinear_interpolate_last_pixel +-.purgem bilinear_interpolate_two_pixels +-.purgem bilinear_interpolate_four_pixels +- + /* + * Bilinear scaling support code which tries to provide pixel fetching, color + * format conversion, and interpolation as separate macros which can be used +@@ -2810,3 +2622,6 @@ pixman_asm_function fname + .endfunc + + .endm ++ ++generate_bilinear_scanline_func \ ++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28 +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch b/x11-libs/pixman/files/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch new file mode 100644 index 0000000..56fd9b7 --- /dev/null +++ b/x11-libs/pixman/files/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch @@ -0,0 +1,51 @@ +From 2ee27e7d79637da9173ee1bf3423e5a81534ccb4 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 11:53:04 +0200 +Subject: [PATCH 19/22] ARM: NEON optimization for bilinear scaled 'src_8888_0565' + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s + after: op=1, src=20028888, dst=10020565, speed=61.65 MPix/s +--- + pixman/pixman-arm-neon-asm.S | 3 +++ + pixman/pixman-arm-neon.c | 5 +++++ + 2 files changed, 8 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 52dc444..f0b42ca 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2625,3 +2625,6 @@ pixman_asm_function fname + + generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28 ++ ++generate_bilinear_scanline_func \ ++ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 98ad5f2..ba6de66 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -129,6 +129,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, + uint32_t, uint32_t) ++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, ++ uint32_t, uint16_t) + + void + pixman_composite_src_n_8_asm_neon (int32_t w, +@@ -350,6 +352,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888), + ++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), ++ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), ++ + { PIXMAN_OP_NONE }, + }; + +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch b/x11-libs/pixman/files/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch new file mode 100644 index 0000000..17af7c5 --- /dev/null +++ b/x11-libs/pixman/files/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch @@ -0,0 +1,50 @@ +From 29003c3befe2159396d181ef9ac1caaadcabf382 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 13:21:53 +0200 +Subject: [PATCH 20/22] ARM: NEON optimization for bilinear scaled 'src_0565_x888' + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s + after: op=1, src=10020565, dst=20020888, speed=36.82 MPix/s +--- + pixman/pixman-arm-neon-asm.S | 3 +++ + pixman/pixman-arm-neon.c | 4 ++++ + 2 files changed, 7 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index f0b42ca..9245db9 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2628,3 +2628,6 @@ generate_bilinear_scanline_func \ + + generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28 ++ ++generate_bilinear_scanline_func \ ++ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index ba6de66..18e26eb 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -131,6 +131,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC, + uint32_t, uint32_t) + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, + uint32_t, uint16_t) ++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, ++ uint16_t, uint32_t) + + void + pixman_composite_src_n_8_asm_neon (int32_t w, +@@ -355,6 +357,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + ++ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), ++ + { PIXMAN_OP_NONE }, + }; + +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch b/x11-libs/pixman/files/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch new file mode 100644 index 0000000..24275a8 --- /dev/null +++ b/x11-libs/pixman/files/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch @@ -0,0 +1,49 @@ +From fe99673719091d4a880d031add1369332a75731b Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 13:27:41 +0200 +Subject: [PATCH 21/22] ARM: NEON optimization for bilinear scaled 'src_0565_0565' + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s + after: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s +--- + pixman/pixman-arm-neon-asm.S | 3 +++ + pixman/pixman-arm-neon.c | 3 +++ + 2 files changed, 6 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 9245db9..2b6875b 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2631,3 +2631,6 @@ generate_bilinear_scanline_func \ + + generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28 ++ ++generate_bilinear_scanline_func \ ++ pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 18e26eb..0a10ca1 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -133,6 +133,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC, + uint32_t, uint16_t) + PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC, + uint16_t, uint32_t) ++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC, ++ uint16_t, uint16_t) + + void + pixman_composite_src_n_8_asm_neon (int32_t w, +@@ -358,6 +360,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888), ++ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565), + + { PIXMAN_OP_NONE }, + }; +-- +1.7.3.4 + diff --git a/x11-libs/pixman/files/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch b/x11-libs/pixman/files/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch new file mode 100644 index 0000000..24f9652 --- /dev/null +++ b/x11-libs/pixman/files/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch @@ -0,0 +1,166 @@ +From 70a923882ca24664344ba91a649e7aa12c3063f7 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 9 Mar 2011 13:55:48 +0200 +Subject: [PATCH 22/22] ARM: a bit faster NEON bilinear scaling for r5g6b5 source images + +Instructions scheduling improved in the code responsible for fetching r5g6b5 +pixels and converting them to the intermediate x8r8g8b8 color format used in +the interpolation part of code. Still a lot of NEON stalls are remaining, +which can be resolved later by the use of pipelining. + +Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz: + Microbenchmark (scaling 2000x2000 image with scale factor close to 1x): + before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s + op=1, src=10020565, dst=20020888, speed=36.82 MPix/s + after: op=1, src=10020565, dst=10020565, speed=41.35 MPix/s + op=1, src=10020565, dst=20020888, speed=49.16 MPix/s +--- + pixman/pixman-arm-neon-asm.S | 118 +++++++++++++++++++++++++++++++++++------ + 1 files changed, 100 insertions(+), 18 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 2b6875b..71b30ac 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -2430,6 +2430,101 @@ fname: + convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp + .endm + ++.macro bilinear_load_and_vertical_interpolate_two_8888 \ ++ acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 ++ ++ bilinear_load_8888 reg1, reg2, tmp1 ++ vmull.u8 acc1, reg1, d28 ++ vmlal.u8 acc1, reg2, d29 ++ bilinear_load_8888 reg3, reg4, tmp2 ++ vmull.u8 acc2, reg3, d28 ++ vmlal.u8 acc2, reg4, d29 ++.endm ++ ++.macro bilinear_load_and_vertical_interpolate_four_8888 \ ++ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ ++ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi ++ ++ bilinear_load_and_vertical_interpolate_two_8888 \ ++ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi ++ bilinear_load_and_vertical_interpolate_two_8888 \ ++ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi ++.endm ++ ++.macro bilinear_load_and_vertical_interpolate_two_0565 \ ++ acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi ++ ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ mov TMP4, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP2, asl #1 ++ add TMP2, BOTTOM, TMP2, asl #1 ++ add TMP3, TOP, TMP4, asl #1 ++ add TMP4, BOTTOM, TMP4, asl #1 ++ vld1.32 {acc2lo[0]}, [TMP1] ++ vld1.32 {acc2hi[0]}, [TMP3] ++ vld1.32 {acc2lo[1]}, [TMP2] ++ vld1.32 {acc2hi[1]}, [TMP4] ++ convert_0565_to_x888 acc2, reg3, reg2, reg1 ++ vzip.u8 reg1, reg3 ++ vzip.u8 reg2, reg4 ++ vzip.u8 reg3, reg4 ++ vzip.u8 reg1, reg2 ++ vmull.u8 acc1, reg1, d28 ++ vmlal.u8 acc1, reg2, d29 ++ vmull.u8 acc2, reg3, d28 ++ vmlal.u8 acc2, reg4, d29 ++.endm ++ ++.macro bilinear_load_and_vertical_interpolate_four_0565 \ ++ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ ++ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi ++ ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ mov TMP4, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP2, asl #1 ++ add TMP2, BOTTOM, TMP2, asl #1 ++ add TMP3, TOP, TMP4, asl #1 ++ add TMP4, BOTTOM, TMP4, asl #1 ++ vld1.32 {xacc2lo[0]}, [TMP1] ++ vld1.32 {xacc2hi[0]}, [TMP3] ++ vld1.32 {xacc2lo[1]}, [TMP2] ++ vld1.32 {xacc2hi[1]}, [TMP4] ++ convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 ++ mov TMP2, X, asr #16 ++ add X, X, UX ++ mov TMP4, X, asr #16 ++ add X, X, UX ++ add TMP1, TOP, TMP2, asl #1 ++ add TMP2, BOTTOM, TMP2, asl #1 ++ add TMP3, TOP, TMP4, asl #1 ++ add TMP4, BOTTOM, TMP4, asl #1 ++ vld1.32 {yacc2lo[0]}, [TMP1] ++ vzip.u8 xreg1, xreg3 ++ vld1.32 {yacc2hi[0]}, [TMP3] ++ vzip.u8 xreg2, xreg4 ++ vld1.32 {yacc2lo[1]}, [TMP2] ++ vzip.u8 xreg3, xreg4 ++ vld1.32 {yacc2hi[1]}, [TMP4] ++ vzip.u8 xreg1, xreg2 ++ convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 ++ vmull.u8 xacc1, xreg1, d28 ++ vzip.u8 yreg1, yreg3 ++ vmlal.u8 xacc1, xreg2, d29 ++ vzip.u8 yreg2, yreg4 ++ vmull.u8 xacc2, xreg3, d28 ++ vzip.u8 yreg3, yreg4 ++ vmlal.u8 xacc2, xreg4, d29 ++ vzip.u8 yreg1, yreg2 ++ vmull.u8 yacc1, yreg1, d28 ++ vmlal.u8 yacc1, yreg2, d29 ++ vmull.u8 yacc2, yreg3, d28 ++ vmlal.u8 yacc2, yreg4, d29 ++.endm ++ + .macro bilinear_store_8888 numpix, tmp1, tmp2 + .if numpix == 4 + vst1.32 {d0, d1}, [OUT]! +@@ -2477,12 +2572,8 @@ fname: + .endm + + .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt +- bilinear_load_&src_fmt d0, d1, d2 +- vmull.u8 q1, d0, d28 +- vmlal.u8 q1, d1, d29 +- bilinear_load_&src_fmt d20, d21, d22 +- vmull.u8 q11, d20, d28 +- vmlal.u8 q11, d21, d29 ++ bilinear_load_and_vertical_interpolate_two_&src_fmt \ ++ q1, q11, d0, d1, d20, d21, d22, d23 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vshll.u16 q0, d2, #8 +@@ -2498,18 +2589,9 @@ fname: + .endm + + .macro bilinear_interpolate_four_pixels src_fmt, dst_fmt +- bilinear_load_&src_fmt d0, d1, d2 +- vmull.u8 q1, d0, d28 +- vmlal.u8 q1, d1, d29 +- bilinear_load_&src_fmt d20, d21, d22 +- vmull.u8 q11, d20, d28 +- vmlal.u8 q11, d21, d29 +- bilinear_load_&src_fmt d4, d5, d6 +- vmull.u8 q3, d4, d28 +- vmlal.u8 q3, d5, d29 +- bilinear_load_&src_fmt d16, d17, d18 +- vmull.u8 q9, d16, d28 +- vmlal.u8 q9, d17, d29 ++ bilinear_load_and_vertical_interpolate_four_&src_fmt \ ++ q1, q11, d0, d1, d20, d21, d22, d23 \ ++ q3, q9, d4, d5, d16, d17, d18, d19 + pld [TMP1, PF_OFFS] + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 +-- +1.7.3.4 + diff --git a/x11-libs/pixman/pixman-0.21.6.ebuild b/x11-libs/pixman/pixman-0.21.6.ebuild new file mode 100644 index 0000000..63738aa --- /dev/null +++ b/x11-libs/pixman/pixman-0.21.6.ebuild @@ -0,0 +1,74 @@ +# Copyright 1999-2011 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Header: $ + +EAPI=3 +inherit xorg-2 toolchain-funcs versionator + +EGIT_REPO_URI="git://anongit.freedesktop.org/git/pixman" +DESCRIPTION="Low-level pixel manipulation routines" + +KEYWORDS="~arm" +IUSE="altivec mmx sse2 simd neon" + +pkg_setup() { + xorg-2_pkg_setup + CONFIGURE_OPTIONS=" + $(use_enable altivec vmx) + $(use_enable simd arm-simd) + $(use_enable neon arm-neon) + --disable-gtk" + + local enable_mmx="$(use mmx && echo 1 || echo 0)" + local enable_sse2="$(use sse2 && echo 1 || echo 0)" + + # this block fixes bug #260287 + if use x86; then + if use sse2 && ! $(version_is_at_least "4.2" "$(gcc-version)"); then + ewarn "SSE2 instructions require GCC 4.2 or higher." + ewarn "pixman will be built *without* SSE2 support" + enable_sse2="0" + fi + fi + + # this block fixes bug #236558 + case "$enable_mmx,$enable_sse2" in + '1,1') + CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --enable-sse2" ;; + '1,0') + CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --disable-sse2" ;; + '0,1') + ewarn "You enabled SSE2 but have MMX disabled. This is an invalid." + ewarn "pixman will be built *without* MMX/SSE2 support." + CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;; + '0,0') + CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;; + esac +} + +src_prepare() { + epatch "${FILESDIR}"/0002-Fix-compilation-on-Win32.patch + epatch "${FILESDIR}"/0003-test-Fix-tests-for-compilation-on-Windows.patch + epatch "${FILESDIR}"/0004-test-Add-Makefile-for-Win32.patch + epatch "${FILESDIR}"/0005-Do-not-include-unused-headers.patch + epatch "${FILESDIR}"/0006-test-Silence-MSVC-warnings.patch + epatch "${FILESDIR}"/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch + epatch "${FILESDIR}"/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch + epatch "${FILESDIR}"/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch + epatch "${FILESDIR}"/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch + epatch "${FILESDIR}"/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch + epatch "${FILESDIR}"/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch + epatch "${FILESDIR}"/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch + epatch "${FILESDIR}"/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch + epatch "${FILESDIR}"/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch + epatch "${FILESDIR}"/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch + epatch "${FILESDIR}"/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch + epatch "${FILESDIR}"/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch + epatch "${FILESDIR}"/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch + epatch "${FILESDIR}"/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch + epatch "${FILESDIR}"/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch + epatch "${FILESDIR}"/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch + + # We patch Makefile.am and such, so eautoreconf! + eautoreconf +}