diff --git a/x11-libs/pixman/Manifest b/x11-libs/pixman/Manifest
new file mode 100644
index 0000000..82d548b
--- /dev/null
+++ b/x11-libs/pixman/Manifest
@@ -0,0 +1,23 @@
+AUX 0002-Fix-compilation-on-Win32.patch 1373 RMD160 27ab9d8e5ee15ca0ca2316c2088488f77cc04193 SHA1 b60a844b6f97405d5974838126ce1a581a5578fb SHA256 c167e98ac16db2f09d11e71b6acfc2436ea06ad5f5e91d829463e8a2428c8f1d
+AUX 0003-test-Fix-tests-for-compilation-on-Windows.patch 5857 RMD160 c03eda4e6678e85da3d3aecb6f8af77bbbcba396 SHA1 78bc36587fabe4e5d984c1535d60bac1f27665bd SHA256 c17670b7a3603e3591e5f3264441b01dd1861c24681cea9bb63c1a4896471f09
+AUX 0004-test-Add-Makefile-for-Win32.patch 2285 RMD160 956800336268328f68cbf80fcf0f1e1a8254ed41 SHA1 9bf6452ad0982af23ead0ebfff1c3a46ab8a1454 SHA256 23b281492ad50c090c3ae3d501f92a039edcd35b4019ba60566394a9b9c99a41
+AUX 0005-Do-not-include-unused-headers.patch 1138 RMD160 e73bcdb3d39a3fe29a8d61fec12facbe0c15bb1f SHA1 77e320b1f0702e6b31214a7057c759f0cdec37fd SHA256 036fad75930a7a5981d0fe58749c1d1c7b066931d1bbcb7695ad8f45208c66e3
+AUX 0006-test-Silence-MSVC-warnings.patch 1879 RMD160 1ee25d5477740736c3bbb1c925f14fa45b9baea9 SHA1 17b823ed9bca1423ce3e7df6384820cff5b2c4f7 SHA256 dd835cb47e6f54c7295e181c8cab32924f5b7aa79be630b1dffd4987b04535a2
+AUX 0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch 18432 RMD160 45c3975ea38fdb4fe9ed927c60a020cf65c30726 SHA1 45023ba64a3c48d73d8d43b70dd38fa885b7ba7f SHA256 b96ae6c8bea2a900dd013f134f5223bf415fdc9f492f3854ee2b095451276857
+AUX 0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch 3637 RMD160 a8b2d0ddc050a521c1510b0f34c465e6e17d8b1d SHA1 f6b92ea26d7773cc826d63c175742194523b8480 SHA256 aa1354d2395925d53108269dc7f45ca4c16509318af794704c458339541d1ea8
+AUX 0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch 5981 RMD160 577a6b80b87d4687798e86bd9fe777a536ca9d76 SHA1 ffe4dfd7b3464bf6271ae869483124f4b8df7fd7 SHA256 ab8e918705c5d8bc24944a9b34f1a6d941d6f88cd16db4476566cb4bbf535039
+AUX 0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch 9330 RMD160 8601746564959d01c01993bd359c4b4db0beec8f SHA1 76b52d88701def10885f9f0592dea3b19707f6c1 SHA256 2da797eebe471eabca3da195ea295faf462c3f38330a11eee18e7247f7370477
+AUX 0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch 4741 RMD160 9054134e9c656a955c595bf004e51fc5652687fd SHA1 7c361a399fa4a20ca8e413b2cf0f29847d519cb1 SHA256 9c8fb1eb06e054fb0fdfece9c33e0b311a3949ba3550c4a95f5943a914e7a770
+AUX 0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch 1080 RMD160 6ca0c5e9597a765f03ac719357d9add04acdecea SHA1 6f74002a839afb6e5d91a8565776edfe19a29f07 SHA256 af280e15b33683841a7df486c8bbd21c9268958865652bdaa6389fdd3909a457
+AUX 0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch 2486 RMD160 cdb29a25ba6726bda75db4c2a37b29d628957085 SHA1 12465119abc6ae4aba91fb4ffa5e21c7d3044ad4 SHA256 88bc1c5118b1550f5b59a16a511da2675697f79bed863368d284e4a1b260f833
+AUX 0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch 4606 RMD160 2f826d65e0e3b80ad0fe3371137df128336c257f SHA1 75b051949e226dea0ac55d9a2618b688808793e8 SHA256 85d417a2160944b774dad489432b637a5662e0416a3919f095a93607772278ac
+AUX 0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch 2712 RMD160 8d335c0f1585ad9882fdef63c14328f36a2a7eac SHA1 f0f024b9248a85bc59a503a0088c24a9b97b0646 SHA256 bd97cb792274b8d6d498f07479a314b2ef1d9059b11acc3979871d38abb30ceb
+AUX 0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch 7188 RMD160 4fa7ee0d71533a47d699b78e8af9e83d60dae450 SHA1 7fa1dea003735971baf7f199a240c8eec6917f30 SHA256 6b99d26015f8953bf43cb3a7495a02efec4b807ce23166ccd5faca711acc2475
+AUX 0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch 8195 RMD160 4cff07d6bd52966a57148064f4a7a2a9da73838f SHA1 fdd5b62e8f33d0f3f245d86fb1567e26c829a051 SHA256 b17b03a4f7516de8bd803320310b26b2c09b694730ae4eddb5ae56a092da03bb
+AUX 0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch 6752 RMD160 53aaedec3527bd2a280b4b503504233306f92505 SHA1 db2606645ad5eb1bb0ec42b14116ef3aed0b9a3f SHA256 671c3c9d910f4ed8631149b31795879d35399679b8136847f6dbc94dda885a6d
+AUX 0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch 2083 RMD160 e4ba51ef2842f4cf42acbfc85dc2ecb09fabe655 SHA1 ca597361fc9da1d4b74a875c145af1cb7e4abb34 SHA256 929326bc1eca3e45a9a42dca9890e9a5422621e0587470a0eb28ed088ff097b3
+AUX 0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch 2063 RMD160 222b88cd6453fb4adc7f4c14b7b9d89a2c0e5c6d SHA1 f14961b189217cdb125d6ec8c002e8237c7f804a SHA256 ce0e82c68cef5fd9b4f7557ebb8c903bda7b09b496b04f6b7b229f42d7cd4bf0
+AUX 0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch 2058 RMD160 316bc2158c3dda9edef498582b316020e2a5e3ed SHA1 54d123fdb551a0dede24f5d53fa98e359d5d5d13 SHA256 c540fd8abdb2a23d445f53f0ff35905b27e4104fd4d9c9d59cce2d68d4970a58
+AUX 0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch 5971 RMD160 5a6956cbf719d928d1e6d5a4cd07f0ec22a62c41 SHA1 f04f6938337145bde6410ad8983e8a7e3749d9fa SHA256 5320bdbf0d1ddd3b753dedae61d5785b9db27ccdaeb49880508138c0e6113f57
+DIST pixman-0.21.6.tar.bz2 457580 RMD160 6ad5979d123e0268426c08954fd7f6040f7a3859 SHA1 73198f8f9159e3ffc2294806f32fa2c8042b57e6 SHA256 35a9fc00fc55c022318a7ac48eb52de60360beec36008b0037f944f3d0d62e83
+EBUILD pixman-0.21.6.ebuild 3157 RMD160 9188bc6dac468b30d4888a5a507d2c3145cb36c9 SHA1 a2b33238bc243e1e99b298e65d76482b2d32e73a SHA256 060cb6f797fa67bee354865a599f240150fe80373bab99f164dfd27d10aa76f2
diff --git a/x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch b/x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch
new file mode 100644
index 0000000..905f29e
--- /dev/null
+++ b/x11-libs/pixman/files/0002-Fix-compilation-on-Win32.patch
@@ -0,0 +1,42 @@
+From 20ed723a5a42fb8636bc9a5f32974dec1b66a785 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Thu, 24 Feb 2011 10:44:04 +0100
+Subject: [PATCH 02/22] Fix compilation on Win32
+
+Makefile.win32 contained a typo and was missing the dependency from
+the built sources.
+---
+ pixman/Makefile.win32 |    6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
+index 775fb5e..b5f9397 100644
+--- a/pixman/Makefile.win32
++++ b/pixman/Makefile.win32
+@@ -56,6 +56,8 @@ SOURCES =				\
+ 	pixman-general.c		\
+ 	$(NULL)
+ 
++BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
++
+ # MMX compilation flags
+ ifeq ($(MMX_VAR),on)
+ CFLAGS += $(MMX_CFLAGS)
+@@ -122,7 +124,7 @@ endif
+ endif
+ 
+ # pixman compilation and linking
+-$(CFG_VAR)/%.obj: %.c
++$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES)
+ 	@mkdir -p $(CFG_VAR)
+ 	@$(CC) -c $(CFLAGS) -Fo"$@" $<
+ 
+@@ -141,4 +143,4 @@ pixman-combine64.h: pixman-combine.h.template make-combine.pl
+ 
+ clean_r:
+ 	@rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0
+-	@rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c pixman-combine64.c pixman-combine64.h || exit 0
++	@rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0003-test-Fix-tests-for-compilation-on-Windows.patch b/x11-libs/pixman/files/0003-test-Fix-tests-for-compilation-on-Windows.patch
new file mode 100644
index 0000000..3789c79
--- /dev/null
+++ b/x11-libs/pixman/files/0003-test-Fix-tests-for-compilation-on-Windows.patch
@@ -0,0 +1,232 @@
+From 11305b4ecdd36a17592c5c75de9157874853ab20 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 21:46:37 +0100
+Subject: [PATCH 03/22] test: Fix tests for compilation on Windows
+
+The Microsoft C compiler cannot handle subobject initialization and
+Win32 does not provide snprintf.
+
+Work around these limitations by using normal struct initialization
+and using sprintf (a manual check shows that the buffer size is
+sufficient).
+---
+ test/composite.c    |   29 +++++++++++++--------------
+ test/fetch-test.c   |   52 ++++++++++++++++++++++----------------------------
+ test/trap-crasher.c |   20 +++++++++---------
+ 3 files changed, 47 insertions(+), 54 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index e14f954..08c6689 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -617,18 +617,18 @@ eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
+ }
+ 
+ static char *
+-describe_image (image_t *info, char *buf, int buflen)
++describe_image (image_t *info, char *buf)
+ {
+     if (info->size)
+     {
+-	snprintf (buf, buflen, "%s %dx%d%s",
+-		  info->format->name,
+-		  info->size, info->size,
+-		  info->repeat ? "R" :"");
++	sprintf (buf, "%s %dx%d%s",
++		 info->format->name,
++		 info->size, info->size,
++		 info->repeat ? "R" :"");
+     }
+     else
+     {
+-	snprintf (buf, buflen, "solid");
++	sprintf (buf, "solid");
+     }
+ 
+     return buf;
+@@ -710,10 +710,9 @@ composite_test (image_t *dst,
+     {
+ 	char buf[40];
+ 
+-	snprintf (buf, sizeof (buf),
+-		  "%s %scomposite",
+-		  op->name,
+-		  component_alpha ? "CA " : "");
++	sprintf (buf, "%s %scomposite",
++		 op->name,
++		 component_alpha ? "CA " : "");
+ 
+ 	printf ("%s test error of %.4f --\n"
+ 		"           R    G    B    A\n"
+@@ -735,9 +734,9 @@ composite_test (image_t *dst,
+ 		    mask->color->b, mask->color->a,
+ 		    dst->color->r, dst->color->g,
+ 		    dst->color->b, dst->color->a);
+-	    printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+-	    printf ("mask: %s, ", describe_image (mask, buf, sizeof (buf)));
+-	    printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
++	    printf ("src: %s, ", describe_image (src, buf));
++	    printf ("mask: %s, ", describe_image (mask, buf));
++	    printf ("dst: %s\n\n", describe_image (dst, buf));
+ 	}
+ 	else
+ 	{
+@@ -747,8 +746,8 @@ composite_test (image_t *dst,
+ 		    src->color->b, src->color->a,
+ 		    dst->color->r, dst->color->g,
+ 		    dst->color->b, dst->color->a);
+-	    printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+-	    printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
++	    printf ("src: %s, ", describe_image (src, buf));
++	    printf ("dst: %s\n\n", describe_image (dst, buf));
+ 	}
+ 
+ 	success = FALSE;
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 2ca16dd..314a072 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -8,7 +8,7 @@
+ 
+ static pixman_indexed_t mono_palette =
+ {
+-    .rgba = { 0x00000000, 0x00ffffff },
++    0, { 0x00000000, 0x00ffffff },
+ };
+ 
+ 
+@@ -24,57 +24,53 @@ typedef struct {
+ static testcase_t testcases[] =
+ {
+     {
+-	.format = PIXMAN_a8r8g8b8,
+-	.width = 2, .height = 2,
+-	.stride = 8,
+-	.src = { 0x00112233, 0x44556677,
+-	         0x8899aabb, 0xccddeeff },
+-	.dst = { 0x00112233, 0x44556677,
+-	         0x8899aabb, 0xccddeeff },
+-	.indexed = NULL,
++	PIXMAN_a8r8g8b8,
++	2, 2,
++	8,
++	{ 0x00112233, 0x44556677,
++	  0x8899aabb, 0xccddeeff },
++	{ 0x00112233, 0x44556677,
++	  0x8899aabb, 0xccddeeff },
++	NULL,
+     },
+     {
+-	.format = PIXMAN_g1,
+-	.width = 8, .height = 2,
+-	.stride = 4,
++	PIXMAN_g1,
++	8, 2,
++	4,
+ #ifdef WORDS_BIGENDIAN
+-	.src =
+ 	{
+ 	    0xaa000000,
+ 	    0x55000000
+ 	},
+ #else
+-	.src =
+ 	{
+ 	    0x00000055,
+ 	    0x000000aa
+ 	},
+ #endif
+-	.dst =
+ 	{
+ 	    0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
+ 	    0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
+ 	},
+-	.indexed = &mono_palette,
++	&mono_palette,
+     },
+ #if 0
+     {
+-	.format = PIXMAN_g8,
+-	.width = 4, .height = 2,
+-	.stride = 4,
+-	.src = { 0x01234567,
+-	         0x89abcdef },
+-	.dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
+-	         0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
++	PIXMAN_g8,
++	4, 2,
++	4,
++	{ 0x01234567,
++	  0x89abcdef },
++	{ 0x00010101, 0x00232323, 0x00454545, 0x00676767,
++	  0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
+     },
+ #endif
+     /* FIXME: make this work on big endian */
+     {
+-	.format = PIXMAN_yv12,
+-	.width = 8, .height = 2,
+-	.stride = 8,
++	PIXMAN_yv12,
++	8, 2,
++	8,
+ #ifdef WORDS_BIGENDIAN
+-	.src =
+ 	{
+ 	    0x00ff00ff, 0x00ff00ff,
+ 	    0xff00ff00, 0xff00ff00,
+@@ -82,7 +78,6 @@ static testcase_t testcases[] =
+ 	    0x800080ff
+ 	},
+ #else
+-	.src =
+ 	{
+ 	    0xff00ff00, 0xff00ff00,
+ 	    0x00ff00ff, 0x00ff00ff,
+@@ -90,7 +85,6 @@ static testcase_t testcases[] =
+ 	    0xff800080
+ 	},
+ #endif
+-	.dst =
+ 	{
+ 	    0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
+ 	    0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
+diff --git a/test/trap-crasher.c b/test/trap-crasher.c
+index 42b82f6..7485e62 100644
+--- a/test/trap-crasher.c
++++ b/test/trap-crasher.c
+@@ -7,21 +7,21 @@ main()
+     pixman_image_t *dst;
+     pixman_trapezoid_t traps[1] = {
+ 	{
+-	    .top = 2147483646,
+-	    .bottom = 2147483647,
+-	    .left = {
+-		.p1 = { .x = 0, .y = 0 },
+-		.p2 = { .x = 0, .y = 2147483647 }
++	    2147483646,
++	    2147483647,
++	    {
++		{ 0, 0 },
++		{ 0, 2147483647 }
+ 	    },
+-	    .right = {
+-		.p1 = { .x = 65536, .y = 0 },
+-		.p2 = { .x = 0, .y = 2147483647 }
++	    {
++		{ 65536, 0 },
++		{ 0, 2147483647 }
+ 	    }
+ 	},
+     };
+-    
++
+     dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1);
+-    
++
+     pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps);
+     return (0);
+ }
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch b/x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch
new file mode 100644
index 0000000..eba6de1
--- /dev/null
+++ b/x11-libs/pixman/files/0004-test-Add-Makefile-for-Win32.patch
@@ -0,0 +1,92 @@
+From 72f5e5f608506c18c484bc5bc3e58bd83aeb7691 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 22:04:49 +0100
+Subject: [PATCH 04/22] test: Add Makefile for Win32
+
+---
+ test/Makefile.win32 |   73 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 73 insertions(+), 0 deletions(-)
+ create mode 100644 test/Makefile.win32
+
+diff --git a/test/Makefile.win32 b/test/Makefile.win32
+new file mode 100644
+index 0000000..c71afe1
+--- /dev/null
++++ b/test/Makefile.win32
+@@ -0,0 +1,73 @@
++CC   = cl
++LINK = link
++
++CFG_VAR = $(CFG)
++ifeq ($(CFG_VAR),)
++CFG_VAR=release
++endif
++
++CFLAGS     = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -D_BIND_TO_CURRENT_VCLIBS_VERSION -D_MT -I../pixman -I. -I../
++TEST_LDADD = ../pixman/$(CFG_VAR)/pixman-1.lib
++INCLUDES = -I../pixman -I$(top_builddir)/pixman
++
++# optimization flags
++ifeq ($(CFG_VAR),debug)
++CFLAGS += -Od -Zi
++else
++CFLAGS += -O2
++endif
++
++SOURCES =			\
++	a1-trap-test.c		\
++	pdf-op-test.c		\
++	region-test.c		\
++	region-translate-test.c	\
++	fetch-test.c		\
++	oob-test.c		\
++	trap-crasher.c		\
++	alpha-loop.c		\
++	scaling-crash-test.c	\
++	gradient-crash-test.c	\
++	alphamap.c		\
++	stress-test.c		\
++	composite-traps-test.c	\
++	blitters-test.c		\
++	scaling-test.c		\
++	affine-test.c		\
++	composite.c		\
++	utils.c
++
++TESTS =						\
++	$(CFG_VAR)/a1-trap-test.exe		\
++	$(CFG_VAR)/pdf-op-test.exe		\
++	$(CFG_VAR)/region-test.exe		\
++	$(CFG_VAR)/region-translate-test.exe	\
++	$(CFG_VAR)/fetch-test.exe		\
++	$(CFG_VAR)/oob-test.exe			\
++	$(CFG_VAR)/trap-crasher.exe		\
++	$(CFG_VAR)/alpha-loop.exe		\
++	$(CFG_VAR)/scaling-crash-test.exe	\
++	$(CFG_VAR)/gradient-crash-test.exe	\
++	$(CFG_VAR)/alphamap.exe			\
++	$(CFG_VAR)/stress-test.exe		\
++	$(CFG_VAR)/composite-traps-test.exe	\
++	$(CFG_VAR)/blitters-test.exe		\
++	$(CFG_VAR)/scaling-test.exe		\
++	$(CFG_VAR)/affine-test.exe		\
++	$(CFG_VAR)/composite.exe
++
++
++OBJECTS     = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES))
++
++$(CFG_VAR)/%.obj: %.c
++	@mkdir -p $(CFG_VAR)
++	@$(CC) -c $(CFLAGS) -Fo"$@" $<
++
++$(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj
++	$(LINK) /NOLOGO /OUT:$@ $< $(CFG_VAR)/utils.obj $(TEST_LDADD)
++
++all: $(OBJECTS) $(TESTS)
++	@exit 0
++
++clean:
++	@rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb || exit 0
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0005-Do-not-include-unused-headers.patch b/x11-libs/pixman/files/0005-Do-not-include-unused-headers.patch
new file mode 100644
index 0000000..14111aa
--- /dev/null
+++ b/x11-libs/pixman/files/0005-Do-not-include-unused-headers.patch
@@ -0,0 +1,40 @@
+From 8868778ea1fdc8e70da76b3b00ea78106c5840d8 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 22:43:48 +0100
+Subject: [PATCH 05/22] Do not include unused headers
+
+pixman-combine32.h is included without being used both in
+pixman-image.c and in pixman-general.c.
+---
+ pixman/pixman-general.c |    2 --
+ pixman/pixman-image.c   |    1 -
+ 2 files changed, 0 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 16ea3a4..872fb7e 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -36,8 +36,6 @@
+ #include <stdlib.h>
+ #include <string.h>
+ #include "pixman-private.h"
+-#include "pixman-combine32.h"
+-#include "pixman-private.h"
+ 
+ static void
+ general_src_iter_init (pixman_implementation_t *imp,
+diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
+index 9103ca6..84bacf8 100644
+--- a/pixman/pixman-image.c
++++ b/pixman/pixman-image.c
+@@ -30,7 +30,6 @@
+ #include <assert.h>
+ 
+ #include "pixman-private.h"
+-#include "pixman-combine32.h"
+ 
+ pixman_bool_t
+ _pixman_init_gradient (gradient_t *                  gradient,
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch b/x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch
new file mode 100644
index 0000000..d7e5716
--- /dev/null
+++ b/x11-libs/pixman/files/0006-test-Silence-MSVC-warnings.patch
@@ -0,0 +1,63 @@
+From 9ebde285fa990bfa1524f166fbfb1368c346b14a Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Thu, 24 Feb 2011 12:53:39 +0100
+Subject: [PATCH 06/22] test: Silence MSVC warnings
+
+MSVC does not notice non-returning functions (abort() / assert(0))
+and warns about paths which end with them in non-void functions:
+
+c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) :
+warning C4715: 'reader' : not all control paths return a value
+c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) :
+warning C4715: 'real_reader' : not all control paths return a value
+c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) :
+warning C4715: 'calc_op' : not all control paths return a value
+
+These warnings can be silenced by adding a return after the
+termination call.
+---
+ test/composite.c   |    1 +
+ test/fetch-test.c  |    1 +
+ test/stress-test.c |    2 +-
+ 3 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index 08c6689..a86e5ed 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -426,6 +426,7 @@ calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
+     case PIXMAN_OP_HSL_LUMINOSITY:
+     default:
+ 	abort();
++	return 0; /* silence MSVC */
+     }
+ #undef mult_chan
+ }
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 314a072..60bc765 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -110,6 +110,7 @@ reader (const void *src, int size)
+ 	return *(uint32_t *)src;
+     default:
+ 	assert(0);
++	return 0; /* silence MSVC */
+     }
+ }
+ 
+diff --git a/test/stress-test.c b/test/stress-test.c
+index bcbc1f8..166dc6d 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -128,7 +128,7 @@ real_reader (const void *src, int size)
+ 	return *(uint32_t *)src;
+     default:
+ 	assert (0);
+-	break;
++	return 0; /* silence MSVC */
+     }
+ }
+ 
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch b/x11-libs/pixman/files/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
new file mode 100644
index 0000000..a5ab646
--- /dev/null
+++ b/x11-libs/pixman/files/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
@@ -0,0 +1,466 @@
+From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 01:29:02 +0200
+Subject: [PATCH 07/22] Main loop template for fast single pass bilinear scaling
+
+Can be used for implementing SIMD optimized fast path
+functions which work with bilinear scaled source images.
+
+Similar to the template for nearest scaling main loop, the
+following types of mask are supported:
+1. no mask
+2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+3. solid mask
+
+PAD repeat is fully supported. NONE repeat is partially
+supported (right now only works if source image has alpha
+channel or when alpha channel of the source image does not
+have any effect on the compositing operation).
+---
+ pixman/pixman-fast-path.h |  432 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 432 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
+index d081222..1885d47 100644
+--- a/pixman/pixman-fast-path.h
++++ b/pixman/pixman-fast-path.h
+@@ -587,4 +587,436 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+ 
++/*****************************************************************************/
++
++/*
++ * Identify 5 zones in each scanline for bilinear scaling. Depending on
++ * whether 2 pixels to be interpolated are fetched from the image itself,
++ * from the padding area around it or from both image and padding area.
++ */
++static force_inline void
++bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
++					 pixman_fixed_t  vx,
++					 pixman_fixed_t  unit_x,
++					 int32_t *       left_pad,
++					 int32_t *       left_tz,
++					 int32_t *       width,
++					 int32_t *       right_tz,
++					 int32_t *       right_pad)
++{
++	int width1 = *width, left_pad1, right_pad1;
++	int width2 = *width, left_pad2, right_pad2;
++
++	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
++					&width1, &left_pad1, &right_pad1);
++	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
++					unit_x, &width2, &left_pad2, &right_pad2);
++
++	*left_pad = left_pad2;
++	*left_tz = left_pad1 - left_pad2;
++	*right_tz = right_pad2 - right_pad1;
++	*right_pad = right_pad1;
++	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
++}
++
++/*
++ * Main loop template for single pass bilinear scaling. It needs to be
++ * provided with 'scanline_func' which should do the compositing operation.
++ * The needed function has the following prototype:
++ *
++ *	scanline_func (dst_type_t *       dst,
++ *		       const mask_type_ * mask,
++ *		       const src_type_t * src_top,
++ *		       const src_type_t * src_bottom,
++ *		       int32_t            width,
++ *		       int                weight_top,
++ *		       int                weight_bottom,
++ *		       pixman_fixed_t     vx,
++ *		       pixman_fixed_t     unit_x,
++ *		       pixman_fixed_t     max_vx,
++ *		       pixman_bool_t      zero_src)
++ *
++ * Where:
++ *  dst                 - destination scanline buffer for storing results
++ *  mask                - mask buffer (or single value for solid mask)
++ *  src_top, src_bottom - two source scanlines
++ *  width               - number of pixels to process
++ *  weight_top          - weight of the top row for interpolation
++ *  weight_bottom       - weight of the bottom row for interpolation
++ *  vx                  - initial position for fetching the first pair of
++ *                        pixels from the source buffer
++ *  unit_x              - position increment needed to move to the next pair
++ *                        of pixels
++ *  max_vx              - image size as a fixed point value, can be used for
++ *                        implementing NORMAL repeat (when it is supported)
++ *  zero_src            - boolean hint variable, which is set to TRUE when
++ *                        all source pixels are fetched from zero padding
++ *                        zone for NONE repeat
++ *
++ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
++ *       but sometimes it may be less than that for NONE repeat when handling
++ *       fuzzy antialiased top or bottom image edges. Also both top and
++ *       bottom weight variables are guaranteed to have value in 0-255
++ *       range and can fit into unsigned byte or be used with 8-bit SIMD
++ *       multiplication instructions.
++ */
++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
++				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
++static void											\
++fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
++						   pixman_op_t              op,			\
++						   pixman_image_t *         src_image,		\
++						   pixman_image_t *         mask_image,		\
++						   pixman_image_t *         dst_image,		\
++						   int32_t                  src_x,		\
++						   int32_t                  src_y,		\
++						   int32_t                  mask_x,		\
++						   int32_t                  mask_y,		\
++						   int32_t                  dst_x,		\
++						   int32_t                  dst_y,		\
++						   int32_t                  width,		\
++						   int32_t                  height)		\
++{												\
++    dst_type_t *dst_line;									\
++    mask_type_t *mask_line;									\
++    src_type_t *src_first_line;									\
++    int       y1, y2;										\
++    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
++    pixman_vector_t v;										\
++    pixman_fixed_t vx, vy;									\
++    pixman_fixed_t unit_x, unit_y;								\
++    int32_t left_pad, left_tz, right_tz, right_pad;						\
++												\
++    dst_type_t *dst;										\
++    mask_type_t solid_mask;									\
++    const mask_type_t *mask = &solid_mask;							\
++    int src_stride, mask_stride, dst_stride;							\
++												\
++    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);	\
++    if (have_mask)										\
++    {												\
++	if (mask_is_solid)									\
++	{											\
++	    solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);	\
++	    mask_stride = 0;									\
++	}											\
++	else											\
++	{											\
++	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
++				   mask_stride, mask_line, 1);					\
++	}											\
++    }												\
++    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
++     * transformed from destination space to source space */					\
++    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
++												\
++    /* reference point is the center of the pixel */						\
++    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
++    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
++    v.vector[2] = pixman_fixed_1;								\
++												\
++    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
++	return;											\
++												\
++    unit_x = src_image->common.transform->matrix[0][0];						\
++    unit_y = src_image->common.transform->matrix[1][1];						\
++												\
++    v.vector[0] -= pixman_fixed_1 / 2;								\
++    v.vector[1] -= pixman_fixed_1 / 2;								\
++												\
++    vy = v.vector[1];										\
++												\
++    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
++	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
++    {												\
++	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
++					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
++	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
++	{											\
++	    /* PAD repeat does not need special handling for 'transition zones' and */		\
++	    /* they can be combined with 'padding zones' safely */				\
++	    left_pad += left_tz;								\
++	    right_pad += right_tz;								\
++	    left_tz = right_tz = 0;								\
++	}											\
++	v.vector[0] += left_pad * unit_x;							\
++    }												\
++												\
++    while (--height >= 0)									\
++    {												\
++	int weight1, weight2;									\
++	dst = dst_line;										\
++	dst_line += dst_stride;									\
++	vx = v.vector[0];									\
++	if (have_mask && !mask_is_solid)							\
++	{											\
++	    mask = mask_line;									\
++	    mask_line += mask_stride;								\
++	}											\
++												\
++	y1 = pixman_fixed_to_int (vy);								\
++	weight2 = (vy >> 8) & 0xff;								\
++	if (weight2)										\
++	{											\
++	    /* normal case, both row weights are in 0-255 range and fit unsigned byte */	\
++	    y2 = y1 + 1;									\
++	    weight1 = 256 - weight2;								\
++	}											\
++	else											\
++	{											\
++	    /* set both top and bottom row to the same scanline, and weights to 128+128 */	\
++	    y2 = y1;										\
++	    weight1 = weight2 = 128;								\
++	}											\
++	vy += unit_y;										\
++	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
++	{											\
++	    src_type_t *src1, *src2;								\
++	    src_type_t buf1[2];									\
++	    src_type_t buf2[2];									\
++	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
++	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
++	    src1 = src_first_line + src_stride * y1;						\
++	    src2 = src_first_line + src_stride * y2;						\
++												\
++	    if (left_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = src1[0];							\
++		buf2[0] = buf2[1] = src2[0];							\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
++		dst += left_pad;								\
++		if (have_mask && !mask_is_solid)						\
++		    mask += left_pad;								\
++	    }											\
++	    if (width > 0)									\
++	    {											\
++		scanline_func (dst, mask,							\
++			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
++		dst += width;									\
++		if (have_mask && !mask_is_solid)						\
++		    mask += width;								\
++	    }											\
++	    if (right_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
++		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
++	    }											\
++	}											\
++	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
++	{											\
++	    src_type_t *src1, *src2;								\
++	    src_type_t buf1[2];									\
++	    src_type_t buf2[2];									\
++	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
++	    if (y1 < 0)										\
++	    {											\
++		weight1 = 0;									\
++		y1 = 0;										\
++	    }											\
++	    if (y1 >= src_image->bits.height)							\
++	    {											\
++		weight1 = 0;									\
++		y1 = src_image->bits.height - 1;						\
++	    }											\
++	    if (y2 < 0)										\
++	    {											\
++		weight2 = 0;									\
++		y2 = 0;										\
++	    }											\
++	    if (y2 >= src_image->bits.height)							\
++	    {											\
++		weight2 = 0;									\
++		y2 = src_image->bits.height - 1;						\
++	    }											\
++	    src1 = src_first_line + src_stride * y1;						\
++	    src2 = src_first_line + src_stride * y2;						\
++												\
++	    if (left_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = 0;								\
++		buf2[0] = buf2[1] = 0;								\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
++		dst += left_pad;								\
++		if (have_mask && !mask_is_solid)						\
++		    mask += left_pad;								\
++	    }											\
++	    if (left_tz > 0)									\
++	    {											\
++		buf1[0] = 0;									\
++		buf1[1] = src1[0];								\
++		buf2[0] = 0;									\
++		buf2[1] = src2[0];								\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, left_tz, weight1, weight2,				\
++			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
++		dst += left_tz;									\
++		if (have_mask && !mask_is_solid)						\
++		    mask += left_tz;								\
++		vx += left_tz * unit_x;								\
++	    }											\
++	    if (width > 0)									\
++	    {											\
++		scanline_func (dst, mask,							\
++			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
++		dst += width;									\
++		if (have_mask && !mask_is_solid)						\
++		    mask += width;								\
++		vx += width * unit_x;								\
++	    }											\
++	    if (right_tz > 0)									\
++	    {											\
++		buf1[0] = src1[src_image->bits.width - 1];					\
++		buf1[1] = 0;									\
++		buf2[0] = src2[src_image->bits.width - 1];					\
++		buf2[1] = 0;									\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, right_tz, weight1, weight2,				\
++			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
++		dst += right_tz;								\
++		if (have_mask && !mask_is_solid)						\
++		    mask += right_tz;								\
++	    }											\
++	    if (right_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = 0;								\
++		buf2[0] = buf2[1] = 0;								\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
++	    }											\
++	}											\
++	else											\
++	{											\
++	    scanline_func (dst, mask, src_first_line + src_stride * y1,				\
++			   src_first_line + src_stride * y2, width,				\
++			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
++	}											\
++    }												\
++}
++
++/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
++				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
++	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
++				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
++
++#define SCALED_BILINEAR_FLAGS						\
++    (FAST_PATH_SCALE_TRANSFORM	|					\
++     FAST_PATH_NO_ALPHA_MAP	|					\
++     FAST_PATH_BILINEAR_FILTER	|					\
++     FAST_PATH_NO_ACCESSORS	|					\
++     FAST_PATH_NARROW_FORMAT)
++
++#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_PAD_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_null, 0,							\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_NONE_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_null, 0,							\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
++	PIXMAN_null, 0,							\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_PAD_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_NONE_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
++	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_PAD_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_NONE_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
++	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
++    }
++
++/* Prefer the use of 'cover' variant, because it is faster */
++#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
++    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
++    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
++    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
++    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
++    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
++    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
++    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
++    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
++    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
++
+ #endif
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch b/x11-libs/pixman/files/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
new file mode 100644
index 0000000..a492a8f
--- /dev/null
+++ b/x11-libs/pixman/files/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
@@ -0,0 +1,136 @@
+From 0df43b8ae5031dd83775d00b57b6bed809db0e89 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 02:07:09 +0200
+Subject: [PATCH 08/22] test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds'
+
+Individual correctness check for the new bilinear scaling related
+supplementary function. This test program uses a bit wider range
+of input arguments, not covered by other tests.
+---
+ test/Makefile.am            |    2 +
+ test/scaling-helpers-test.c |   93 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 95 insertions(+), 0 deletions(-)
+ create mode 100644 test/scaling-helpers-test.c
+
+diff --git a/test/Makefile.am b/test/Makefile.am
+index 057e9ce..9dc7219 100644
+--- a/test/Makefile.am
++++ b/test/Makefile.am
+@@ -13,6 +13,7 @@ TESTPROGRAMS =			\
+ 	trap-crasher		\
+ 	alpha-loop		\
+ 	scaling-crash-test	\
++	scaling-helpers-test	\
+ 	gradient-crash-test	\
+ 	alphamap		\
+ 	stress-test		\
+@@ -33,6 +34,7 @@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
+ composite_SOURCES = composite.c utils.c utils.h
+ gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
+ stress_test_SOURCES = stress-test.c utils.c utils.h
++scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h
+ 
+ # Benchmarks
+ 
+diff --git a/test/scaling-helpers-test.c b/test/scaling-helpers-test.c
+new file mode 100644
+index 0000000..c186138
+--- /dev/null
++++ b/test/scaling-helpers-test.c
+@@ -0,0 +1,93 @@
++#include <config.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <stdio.h>
++#include <assert.h>
++#include "utils.h"
++#include "pixman-fast-path.h"
++
++/* A trivial reference implementation for
++ * 'bilinear_pad_repeat_get_scanline_bounds'
++ */
++static void
++bilinear_pad_repeat_get_scanline_bounds_ref (int32_t        source_image_width,
++					     pixman_fixed_t vx_,
++					     pixman_fixed_t unit_x,
++					     int32_t *      left_pad,
++					     int32_t *      left_tz,
++					     int32_t *      width,
++					     int32_t *      right_tz,
++					     int32_t *      right_pad)
++{
++    int w = *width;
++    *left_pad = 0;
++    *left_tz = 0;
++    *width = 0;
++    *right_tz = 0;
++    *right_pad = 0;
++    int64_t vx = vx_;
++    while (--w >= 0)
++    {
++	if (vx < 0)
++	{
++	    if (vx + pixman_fixed_1 < 0)
++		*left_pad += 1;
++	    else
++		*left_tz += 1;
++	}
++	else if (vx + pixman_fixed_1 >= pixman_int_to_fixed (source_image_width))
++	{
++	    if (vx >= pixman_int_to_fixed (source_image_width))
++		*right_pad += 1;
++	    else
++		*right_tz += 1;
++	}
++	else
++	{
++	    *width += 1;
++	}
++	vx += unit_x;
++    }
++}
++
++int
++main (void)
++{
++    int i;
++    for (i = 0; i < 10000; i++)
++    {
++	int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1;
++	int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2;
++	pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16);
++	int32_t width = lcg_rand_N(10000);
++	int32_t source_image_width = lcg_rand_N(10000) + 1;
++	pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1;
++	width1 = width2 = width;
++
++	bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width,
++						     vx,
++						     unit_x,
++						     &left_pad1,
++						     &left_tz1,
++						     &width1,
++						     &right_tz1,
++						     &right_pad1);
++
++	bilinear_pad_repeat_get_scanline_bounds (source_image_width,
++						 vx,
++						 unit_x,
++						 &left_pad2,
++						 &left_tz2,
++						 &width2,
++						 &right_tz2,
++						 &right_pad2);
++
++	assert (left_pad1 == left_pad2);
++	assert (left_tz1 == left_tz2);
++	assert (width1 == width2);
++	assert (right_tz1 == right_tz2);
++	assert (right_pad1 == right_pad2);
++    }
++
++    return 0;
++}
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch b/x11-libs/pixman/files/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
new file mode 100644
index 0000000..58f8a5b
--- /dev/null
+++ b/x11-libs/pixman/files/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
@@ -0,0 +1,156 @@
+From 350029396d911941591149cc82b5e68a78ad6747 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 20:18:02 +0200
+Subject: [PATCH 09/22] SSE2 optimization for bilinear scaled 'src_8888_8888'
+
+A primitive naive implementation of bilinear scaling using SSE2 intrinsics,
+which only handles one pixel at a time. It is approximately 2x faster than
+pixman general compositing path. Single pass processing without intermediate
+temporary buffer contributes to ~15% and loop unrolling contributes to ~20%
+of this speedup.
+
+Benchmark on Intel Core i7 (x86-64):
+ Using cairo-perf-trace:
+  before: image        firefox-planet-gnome   12.566   12.610   0.23%    6/6
+  after:  image        firefox-planet-gnome   10.961   11.013   0.19%    5/6
+
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=165.38 MPix/s
+---
+ pixman/pixman-sse2.c |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 112 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 88287b4..696005f 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -5567,6 +5567,114 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ 			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ 			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+ 
++static void
++bilinear_interpolate_line_sse2 (uint32_t *       out,
++                                const uint32_t * top,
++                                const uint32_t * bottom,
++                                int              wt,
++                                int              wb,
++                                pixman_fixed_t   x,
++                                pixman_fixed_t   ux,
++                                int              width)
++{
++    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
++    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
++    const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
++    const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
++    const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
++    const __m128i xmm_zero = _mm_setzero_si128 ();
++    __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
++    uint32_t pix1, pix2, pix3, pix4;
++
++    #define INTERPOLATE_ONE_PIXEL(pix)						\
++    do {									\
++	__m128i xmm_wh, xmm_lo, xmm_hi, a;					\
++	/* fetch 2x2 pixel block into sse2 register */				\
++	uint32_t tl = top [pixman_fixed_to_int (x)];				\
++	uint32_t tr = top [pixman_fixed_to_int (x) + 1];			\
++	uint32_t bl = bottom [pixman_fixed_to_int (x)];				\
++	uint32_t br = bottom [pixman_fixed_to_int (x) + 1];			\
++	a = _mm_set_epi32 (tr, tl, br, bl);					\
++        x += ux;								\
++	/* vertical interpolation */						\
++	a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero),	\
++					    xmm_wt),				\
++			   _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero),	\
++					    xmm_wb));				\
++	/* calculate horizontal weights */					\
++	xmm_wh = _mm_add_epi16 (xmm_addc,					\
++				_mm_xor_si128 (xmm_xorc,			\
++					       _mm_srli_epi16 (xmm_x, 8)));	\
++	xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);					\
++	/* horizontal interpolation */						\
++	xmm_lo = _mm_mullo_epi16 (a, xmm_wh);					\
++	xmm_hi = _mm_mulhi_epu16 (a, xmm_wh);					\
++	a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi),			\
++			   _mm_unpackhi_epi16 (xmm_lo, xmm_hi));		\
++	/* shift and pack the result */						\
++	a = _mm_srli_epi32 (a, 16);						\
++	a = _mm_packs_epi32 (a, a);						\
++	a = _mm_packus_epi16 (a, a);						\
++	pix = _mm_cvtsi128_si32 (a);						\
++    } while (0)
++
++    while ((width -= 4) >= 0)
++    {
++	INTERPOLATE_ONE_PIXEL (pix1);
++	INTERPOLATE_ONE_PIXEL (pix2);
++	INTERPOLATE_ONE_PIXEL (pix3);
++	INTERPOLATE_ONE_PIXEL (pix4);
++	*out++ = pix1;
++	*out++ = pix2;
++	*out++ = pix3;
++	*out++ = pix4;
++    }
++    if (width & 2)
++    {
++	INTERPOLATE_ONE_PIXEL (pix1);
++	INTERPOLATE_ONE_PIXEL (pix2);
++	*out++ = pix1;
++	*out++ = pix2;
++    }
++    if (width & 1)
++    {
++	INTERPOLATE_ONE_PIXEL (pix1);
++	*out = pix1;
++    }
++
++    #undef INTERPOLATE_ONE_PIXEL
++}
++
++static force_inline void
++scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t *       dst,
++					     const uint32_t * mask,
++					     const uint32_t * src_top,
++					     const uint32_t * src_bottom,
++					     int32_t          w,
++					     int              wt,
++					     int              wb,
++					     pixman_fixed_t   vx,
++					     pixman_fixed_t   unit_x,
++					     pixman_fixed_t   max_vx,
++					     pixman_bool_t    zero_src)
++{
++    bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
++				    wt, wb, vx, unit_x, w);
++}
++
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
++			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       COVER, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
++			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       PAD, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
++			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       NONE, FALSE, FALSE)
++
+ static const pixman_fast_path_t sse2_fast_paths[] =
+ {
+     /* PIXMAN_OP_OVER */
+@@ -5668,6 +5776,10 @@ static const pixman_fast_path_t sse2_fast_paths[] =
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch b/x11-libs/pixman/files/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
new file mode 100644
index 0000000..e68a0f7
--- /dev/null
+++ b/x11-libs/pixman/files/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
@@ -0,0 +1,288 @@
+From 17feaa9c50bb8521b0366345efe181bd99754957 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Tue, 22 Feb 2011 18:45:03 +0200
+Subject: [PATCH 10/22] ARM: NEON optimization for bilinear scaled 'src_8888_8888'
+
+Initial NEON optimization for bilinear scaling. Can be probably
+improved more.
+
+Benchmark on ARM Cortex-A8:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=44.27 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  197 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |   45 ++++++++++
+ 2 files changed, 242 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 47daf45..c168e10 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2391,3 +2391,200 @@ generate_composite_function_nearest_scanline \
+     10,  /* dst_r_basereg */ \
+     8,  /* src_basereg   */ \
+     15  /* mask_basereg  */
++
++/******************************************************************************/
++
++/* Supplementary macro for setting function attributes */
++.macro pixman_asm_function fname
++    .func fname
++    .global fname
++#ifdef __ELF__
++    .hidden fname
++    .type fname, %function
++#endif
++fname:
++.endm
++
++.macro bilinear_interpolate_last_pixel
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d0}, [TMP1]
++    vshr.u16  d30, d24, #8
++    vld1.32   {d1}, [TMP2]
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    /* 5 cycles bubble */
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    /* 5 cycles bubble */
++    vshrn.u32 d0, q0, #16
++    /* 3 cycles bubble */
++    vmovn.u16 d0, q0
++    /* 1 cycle bubble */
++    vst1.32   {d0[0]}, [OUT, :32]!
++.endm
++
++.macro bilinear_interpolate_two_pixels
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d0}, [TMP1]
++    vld1.32   {d1}, [TMP2]
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d20}, [TMP1]
++    vld1.32   {d21}, [TMP2]
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    vshrn.u32 d30, q0, #16
++    vshrn.u32 d31, q10, #16
++    vmovn.u16 d0, q15
++    vst1.32   {d0}, [OUT]!
++.endm
++
++.macro bilinear_interpolate_four_pixels
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d0}, [TMP1]
++    vld1.32   {d1}, [TMP2]
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d20}, [TMP1]
++    vld1.32   {d21}, [TMP2]
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d4}, [TMP1]
++    vld1.32   {d5}, [TMP2]
++    vmull.u8  q3, d4, d28
++    vmlal.u8  q3, d5, d29
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d16}, [TMP1]
++    vld1.32   {d17}, [TMP2]
++    vmull.u8  q9, d16, d28
++    vmlal.u8  q9, d17, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q2, d6, #8
++    vmlsl.u16 q2, d6, d30
++    vmlal.u16 q2, d7, d30
++    vshll.u16 q8, d18, #8
++    vmlsl.u16 q8, d18, d31
++    vmlal.u16 q8, d19, d31
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q10, #16
++    vshrn.u32 d4, q2, #16
++    vshrn.u32 d5, q8, #16
++    vmovn.u16 d0, q0
++    vmovn.u16 d1, q2
++    vst1.32   {d0, d1}, [OUT]!
++.endm
++
++
++/*
++ * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t *       out,
++ *                                                const uint32_t * top,
++ *                                                const uint32_t * bottom,
++ *                                                int              wt,
++ *                                                int              wb,
++ *                                                pixman_fixed_t   x,
++ *                                                pixman_fixed_t   ux,
++ *                                                int              width)
++ */
++
++pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
++    OUT       .req      r0
++    TOP       .req      r1
++    BOTTOM    .req      r2
++    WT        .req      r3
++    WB        .req      r4
++    X         .req      r5
++    UX        .req      r6
++    WIDTH     .req      ip
++    TMP1      .req      r3
++    TMP2      .req      r4
++
++    mov       ip, sp
++    push      {r4, r5, r6, r7}
++    ldmia     ip, {WB, X, UX, WIDTH}
++
++    cmp       WIDTH, #0
++    ble       3f
++    vdup.u16  q12, X
++    vdup.u16  q13, UX
++    vdup.u8   d28, WT
++    vdup.u8   d29, WB
++    vadd.u16  d25, d25, d26
++    vadd.u16  q13, q13, q13
++
++    subs      WIDTH, WIDTH, #4
++    blt       1f
++0:
++    bilinear_interpolate_four_pixels
++    subs      WIDTH, WIDTH, #4
++    bge       0b
++1:
++    tst       WIDTH, #2
++    beq       2f
++    bilinear_interpolate_two_pixels
++2:
++    tst       WIDTH, #1
++    beq       3f
++    bilinear_interpolate_last_pixel
++3:
++    pop       {r4, r5, r6, r7}
++    bx        lr
++
++    .unreq    OUT
++    .unreq    TOP
++    .unreq    BOTTOM
++    .unreq    WT
++    .unreq    WB
++    .unreq    X
++    .unreq    UX
++    .unreq    WIDTH
++    .unreq    TMP1
++    .unreq    TMP2
++.endfunc
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 3e0c0d1..c7c0254 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -232,6 +232,47 @@ pixman_blt_neon (uint32_t *src_bits,
+     }
+ }
+ 
++void
++pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t *       out,
++                                                        const uint32_t * top,
++                                                        const uint32_t * bottom,
++                                                        int              wt,
++                                                        int              wb,
++                                                        pixman_fixed_t   x,
++                                                        pixman_fixed_t   ux,
++                                                        int              width);
++
++static force_inline void
++scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t *       dst,
++					     const uint32_t * mask,
++					     const uint32_t * src_top,
++					     const uint32_t * src_bottom,
++					     int32_t          w,
++					     int              wt,
++					     int              wb,
++					     pixman_fixed_t   vx,
++					     pixman_fixed_t   unit_x,
++					     pixman_fixed_t   max_vx,
++					     pixman_bool_t    zero_src)
++{
++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
++                                                            src_bottom, wt, wb,
++                                                            vx, unit_x, w);
++}
++
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
++			       scaled_bilinear_scanline_neon_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       COVER, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
++			       scaled_bilinear_scanline_neon_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       PAD, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
++			       scaled_bilinear_scanline_neon_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       NONE, FALSE, FALSE)
++
+ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ {
+     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
+@@ -343,6 +384,10 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch b/x11-libs/pixman/files/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
new file mode 100644
index 0000000..4370eb0
--- /dev/null
+++ b/x11-libs/pixman/files/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
@@ -0,0 +1,156 @@
+From 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Mon, 7 Mar 2011 13:45:54 -0500
+Subject: [PATCH 11/22] test: In image_endian_swap() use pixman_image_get_format() to get the bpp.
+
+There is no reason to pass in the bpp as an argument; it can be gotten
+directly from the image.
+---
+ test/affine-test.c          |    6 +++---
+ test/blitters-test.c        |    4 ++--
+ test/composite-traps-test.c |    2 +-
+ test/scaling-test.c         |    6 +++---
+ test/utils.c                |    9 +++++++--
+ test/utils.h                |    2 +-
+ 6 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/test/affine-test.c b/test/affine-test.c
+index b7a1fa6..ed8000c 100644
+--- a/test/affine-test.c
++++ b/test/affine-test.c
+@@ -95,8 +95,8 @@ test_composite (int      testnum,
+     dst_img = pixman_image_create_bits (
+         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+ 
+-    image_endian_swap (src_img, src_bpp * 8);
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (src_img);
++    image_endian_swap (dst_img);
+ 
+     pixman_transform_init_identity (&transform);
+     
+@@ -251,7 +251,7 @@ test_composite (int      testnum,
+ 	    dstbuf[i] &= 0xFFFFFF;
+     }
+ 
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (dst_img);
+ 
+     if (verbose)
+     {
+diff --git a/test/blitters-test.c b/test/blitters-test.c
+index 42181ef..63e7cb3 100644
+--- a/test/blitters-test.c
++++ b/test/blitters-test.c
+@@ -61,7 +61,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
+ 	pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)]));
+     }
+ 
+-    image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
++    image_endian_swap (img);
+ 
+     if (used_fmt) *used_fmt = fmt;
+     return img;
+@@ -101,7 +101,7 @@ free_random_image (uint32_t initcrc,
+ 	/* swap endiannes in order to provide identical results on both big
+ 	 * and litte endian systems
+ 	 */
+-	image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
++	image_endian_swap (img);
+ 	crc32 = compute_crc32 (initcrc, data, stride * height);
+     }
+ 
+diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
+index 8f32778..298537d 100644
+--- a/test/composite-traps-test.c
++++ b/test/composite-traps-test.c
+@@ -218,7 +218,7 @@ test_composite (int      testnum,
+ 	    dst_bits[i] &= 0xFFFFFF;
+     }
+ 
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (dst_img);
+ 
+     if (verbose)
+     {
+diff --git a/test/scaling-test.c b/test/scaling-test.c
+index dbb9d39..82370f7 100644
+--- a/test/scaling-test.c
++++ b/test/scaling-test.c
+@@ -140,8 +140,8 @@ test_composite (int      testnum,
+     dst_img = pixman_image_create_bits (
+         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+ 
+-    image_endian_swap (src_img, src_bpp * 8);
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (src_img);
++    image_endian_swap (dst_img);
+ 
+     if (lcg_rand_n (4) > 0)
+     {
+@@ -330,7 +330,7 @@ test_composite (int      testnum,
+ 	    dstbuf[i] &= 0xFFFFFF;
+     }
+ 
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (dst_img);
+ 
+     if (verbose)
+     {
+diff --git a/test/utils.c b/test/utils.c
+index 2f21398..4bf02e1 100644
+--- a/test/utils.c
++++ b/test/utils.c
+@@ -133,11 +133,12 @@ compute_crc32 (uint32_t    in_crc32,
+ /* perform endian conversion of pixel data
+  */
+ void
+-image_endian_swap (pixman_image_t *img, int bpp)
++image_endian_swap (pixman_image_t *img)
+ {
+     int stride = pixman_image_get_stride (img);
+     uint32_t *data = pixman_image_get_data (img);
+     int height = pixman_image_get_height (img);
++    int bpp = PIXMAN_FORMAT_BPP (pixman_image_get_format (img));
+     int i, j;
+ 
+     /* swap bytes only on big endian systems */
+@@ -145,10 +146,13 @@ image_endian_swap (pixman_image_t *img, int bpp)
+     if (*(volatile uint8_t *)&endian_check_var != 0x12)
+ 	return;
+ 
++    if (bpp == 8)
++	return;
++
+     for (i = 0; i < height; i++)
+     {
+ 	uint8_t *line_data = (uint8_t *)data + stride * i;
+-	/* swap bytes only for 16, 24 and 32 bpp for now */
++	
+ 	switch (bpp)
+ 	{
+ 	case 1:
+@@ -208,6 +212,7 @@ image_endian_swap (pixman_image_t *img, int bpp)
+ 	    }
+ 	    break;
+ 	default:
++	    assert (FALSE);
+ 	    break;
+ 	}
+     }
+diff --git a/test/utils.h b/test/utils.h
+index 9c7bdb1..a5183f7 100644
+--- a/test/utils.h
++++ b/test/utils.h
+@@ -60,7 +60,7 @@ compute_crc32 (uint32_t    in_crc32,
+ /* perform endian conversion of pixel data
+  */
+ void
+-image_endian_swap (pixman_image_t *img, int bpp);
++image_endian_swap (pixman_image_t *img);
+ 
+ /* Allocate memory that is bounded by protected pages,
+  * so that out-of-bounds access will cause segfaults
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch b/x11-libs/pixman/files/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
new file mode 100644
index 0000000..375e253
--- /dev/null
+++ b/x11-libs/pixman/files/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
@@ -0,0 +1,36 @@
+From 84e361c8e357e26f299213fbeefe64c73447b116 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Fri, 4 Mar 2011 15:51:18 -0500
+Subject: [PATCH 12/22] test: Do endian swapping of the source and destination images.
+
+Otherwise the test fails on big endian. Fix for bug 34767, reported by
+Siarhei Siamashka.
+---
+ test/composite-traps-test.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
+index 298537d..cf30281 100644
+--- a/test/composite-traps-test.c
++++ b/test/composite-traps-test.c
+@@ -139,6 +139,8 @@ test_composite (int      testnum,
+ 	    pixman_image_set_source_clipping (src_img, 1);
+ 	    pixman_region_fini (&clip);
+ 	}
++
++	image_endian_swap (src_img);
+     }
+ 
+     /* Create destination image */
+@@ -157,6 +159,8 @@ test_composite (int      testnum,
+ 	
+ 	dst_img = pixman_image_create_bits (
+ 	    dst_format, dst_width, dst_height, dst_bits, dst_stride);
++
++	image_endian_swap (dst_img);
+     }
+ 
+     /* Create traps */
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch b/x11-libs/pixman/files/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
new file mode 100644
index 0000000..3a04397
--- /dev/null
+++ b/x11-libs/pixman/files/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
@@ -0,0 +1,77 @@
+From bb3d1b67fd0f42ae00af811c624ea1c44541034d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 6 Mar 2011 16:17:12 +0200
+Subject: [PATCH 13/22] ARM: use prefetch in nearest scaled 'src_0565_0565'
+
+Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S |   27 +++++++++++++++++++++++++--
+ 1 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index 7567700..dd1366d 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -348,6 +348,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	TMP1	.req	r4
+ 	TMP2	.req	r5
+ 	VXMASK	.req	r6
++	PF_OFFS	.req	r7
+ 
+ 	ldr	UNIT_X, [sp]
+ 	push	{r4, r5, r6, r7}
+@@ -366,12 +367,33 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 		strh	TMP2, [DST], #2
+ 	.endm
+ 
++	/*
++	 * stop prefetch before reaching the end of scanline (a good behaving
++	 * value selected based on some benchmarks with short scanlines)
++	 */
++	#define PREFETCH_BRAKING_DISTANCE 32
++
+ 	/* now do the scaling */
+ 	and	TMP1, VXMASK, VX, lsr #15
+ 	add	VX, VX, UNIT_X
+-	subs	W, #4
++	subs	W, #(8 + PREFETCH_BRAKING_DISTANCE)
++	blt	2f
++	/* set prefetch distance to 80 pixels ahead */
++	add	PF_OFFS, VX, UNIT_X, lsl #6
++	add	PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++1:	/* main loop, process 8 pixels per iteration with prefetch */
++	subs	W, W, #8
++	add	PF_OFFS, UNIT_X, lsl #3
++	scale_2_pixels
++	scale_2_pixels
++	scale_2_pixels
++	scale_2_pixels
++	pld	[SRC, PF_OFFS, lsr #15]
++	bge	1b
++2:
++	subs	W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
+ 	blt	2f
+-1: /* main loop, process 4 pixels per iteration */
++1:	/* process the remaining pixels */
+ 	scale_2_pixels
+ 	scale_2_pixels
+ 	subs	W, W, #4
+@@ -394,6 +416,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	.unreq	TMP1
+ 	.unreq	TMP2
+ 	.unreq	VXMASK
++	.unreq	PF_OFFS
+ 	/* return */
+ 	pop	{r4, r5, r6, r7}
+ 	bx	lr
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch b/x11-libs/pixman/files/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
new file mode 100644
index 0000000..d22df37
--- /dev/null
+++ b/x11-libs/pixman/files/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
@@ -0,0 +1,131 @@
+From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 7 Mar 2011 03:10:43 +0200
+Subject: [PATCH 14/22] ARM: common macro for nearest scaling fast paths
+
+The code of nearest scaled 'src_0565_0565' function was generalized
+and moved to a common macro, so that it can be reused for other
+fast paths.
+---
+ pixman/pixman-arm-simd-asm.S |   60 +++++++++++++++++++++++++----------------
+ 1 files changed, 36 insertions(+), 24 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index dd1366d..a9775e2 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ .endfunc
+ 
+ /*
+- * Note: This function is only using armv4t instructions (not even armv6),
++ * Note: This code is only using armv5te instructions (not even armv6),
+  *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+  *       be split into a few variants, tuned for each microarchitecture.
+  *
+  * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+  * have efficient write combining), it needs to be changed to use 16-byte
+  * aligned writes using STM instruction.
++ *
++ * Nearest scanline scaler macro template uses the following arguments:
++ *  fname                     - name of the function to generate
++ *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
++ *  t                         - type suffix for LDR/STR instructions
++ *  prefetch_distance         - prefetch in the source image by that many
++ *                              pixels ahead
++ *  prefetch_braking_distance - stop prefetching when that many pixels are
++ *                              remaining before the end of scanline
+  */
+-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
++
++.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
++                                      prefetch_distance,        \
++                                      prefetch_braking_distance
++
++pixman_asm_function fname
+ 	W	.req	r0
+ 	DST	.req	r1
+ 	SRC	.req	r2
+@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 
+ 	ldr	UNIT_X, [sp]
+ 	push	{r4, r5, r6, r7}
+-	mvn	VXMASK, #1
++	mvn	VXMASK, #((1 << bpp_shift) - 1)
+ 
+ 	/* define helper macro */
+ 	.macro	scale_2_pixels
+-		ldrh	TMP1, [SRC, TMP1]
+-		and	TMP2, VXMASK, VX, lsr #15
++		ldr&t	TMP1, [SRC, TMP1]
++		and	TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
+ 		add	VX, VX, UNIT_X
+-		strh	TMP1, [DST], #2
++		str&t	TMP1, [DST], #(1 << bpp_shift)
+ 
+-		ldrh	TMP2, [SRC, TMP2]
+-		and	TMP1, VXMASK, VX, lsr #15
++		ldr&t	TMP2, [SRC, TMP2]
++		and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ 		add	VX, VX, UNIT_X
+-		strh	TMP2, [DST], #2
++		str&t	TMP2, [DST], #(1 << bpp_shift)
+ 	.endm
+ 
+-	/*
+-	 * stop prefetch before reaching the end of scanline (a good behaving
+-	 * value selected based on some benchmarks with short scanlines)
+-	 */
+-	#define PREFETCH_BRAKING_DISTANCE 32
+-
+ 	/* now do the scaling */
+-	and	TMP1, VXMASK, VX, lsr #15
++	and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ 	add	VX, VX, UNIT_X
+-	subs	W, #(8 + PREFETCH_BRAKING_DISTANCE)
++	subs	W, W, #(8 + prefetch_braking_distance)
+ 	blt	2f
+-	/* set prefetch distance to 80 pixels ahead */
+-	add	PF_OFFS, VX, UNIT_X, lsl #6
+-	add	PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++	/* calculate prefetch offset */
++	mov	PF_OFFS, #prefetch_distance
++	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
+ 1:	/* main loop, process 8 pixels per iteration with prefetch */
+ 	subs	W, W, #8
+ 	add	PF_OFFS, UNIT_X, lsl #3
+@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	scale_2_pixels
+ 	scale_2_pixels
+ 	scale_2_pixels
+-	pld	[SRC, PF_OFFS, lsr #15]
++	pld	[SRC, PF_OFFS, lsr #(16 - bpp_shift)]
+ 	bge	1b
+ 2:
+-	subs	W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
++	subs	W, W, #(4 - 8 - prefetch_braking_distance)
+ 	blt	2f
+ 1:	/* process the remaining pixels */
+ 	scale_2_pixels
+@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	scale_2_pixels
+ 2:
+ 	tst	W, #1
+-	ldrneh	TMP1, [SRC, TMP1]
+-	strneh	TMP1, [DST], #2
++	ldrne&t	TMP1, [SRC, TMP1]
++	strne&t	TMP1, [DST]
+ 	/* cleanup helper macro */
+ 	.purgem	scale_2_pixels
+ 	.unreq	DST
+@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	pop	{r4, r5, r6, r7}
+ 	bx	lr
+ .endfunc
++.endm
++
++generate_nearest_scanline_func \
++    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch b/x11-libs/pixman/files/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
new file mode 100644
index 0000000..99d746e
--- /dev/null
+++ b/x11-libs/pixman/files/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
@@ -0,0 +1,60 @@
+From 5921c17639fe5fdc595c850e3347281c1c8746ba Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 6 Mar 2011 22:16:32 +0200
+Subject: [PATCH 15/22] ARM: assembly optimized nearest scaled 'src_8888_8888'
+
+Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=39.79 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=163.12 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S |    3 +++
+ pixman/pixman-arm-simd.c     |    9 +++++++++
+ 2 files changed, 12 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index a9775e2..858c690 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -433,3 +433,6 @@ pixman_asm_function fname
+ 
+ generate_nearest_scanline_func \
+     pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
++
++generate_nearest_scanline_func \
++    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index 6bbc109..a66f8df 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -389,6 +389,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
+ 
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+                                         uint16_t, uint16_t)
++PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
++                                        uint32_t, uint32_t)
+ 
+ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ {
+@@ -411,6 +413,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+ 
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch b/x11-libs/pixman/files/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
new file mode 100644
index 0000000..3131b7c
--- /dev/null
+++ b/x11-libs/pixman/files/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
@@ -0,0 +1,130 @@
+From 66f4ee1b3bccf4516433d61dbf2035551a712fa2 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 10:59:46 +0200
+Subject: [PATCH 16/22] ARM: new bilinear fast path template macro in 'pixman-arm-common.h'
+
+It can be reused in different ARM NEON bilinear scaling fast path functions.
+---
+ pixman/pixman-arm-common.h |   45 ++++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c   |   44 ++----------------------------------------
+ 2 files changed, 48 insertions(+), 41 deletions(-)
+
+diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
+index 9b1322b..c3bf986 100644
+--- a/pixman/pixman-arm-common.h
++++ b/pixman/pixman-arm-common.h
+@@ -361,4 +361,49 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                    \
+     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+ 
++/*****************************************************************************/
++
++#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op,     \
++                                                src_type, dst_type)           \
++void                                                                          \
++pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (               \
++                                                dst_type *       dst,         \
++                                                const src_type * top,         \
++                                                const src_type * bottom,      \
++                                                int              wt,          \
++                                                int              wb,          \
++                                                pixman_fixed_t   x,           \
++                                                pixman_fixed_t   ux,          \
++                                                int              width);      \
++                                                                              \
++static force_inline void                                                      \
++scaled_bilinear_scanline_##cputype##_##name##_##op (                          \
++                                                dst_type *       dst,         \
++                                                const uint32_t * mask,        \
++                                                const src_type * src_top,     \
++                                                const src_type * src_bottom,  \
++                                                int32_t          w,           \
++                                                int              wt,          \
++                                                int              wb,          \
++                                                pixman_fixed_t   vx,          \
++                                                pixman_fixed_t   unit_x,      \
++                                                pixman_fixed_t   max_vx,      \
++                                                pixman_bool_t    zero_src)    \
++{                                                                             \
++    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
++	return;                                                               \
++    pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
++                            dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
++}                                                                             \
++                                                                              \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
++                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
++                       src_type, uint32_t, dst_type, COVER, FALSE, FALSE)     \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
++                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
++                       src_type, uint32_t, dst_type, NONE, FALSE, FALSE)      \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
++                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
++                       src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
++
+ #endif
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index c7c0254..98ad5f2 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -127,6 +127,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+                                            OVER, uint16_t, uint16_t)
+ 
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
++                                         uint32_t, uint32_t)
++
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+                                    int32_t   h,
+@@ -232,47 +235,6 @@ pixman_blt_neon (uint32_t *src_bits,
+     }
+ }
+ 
+-void
+-pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t *       out,
+-                                                        const uint32_t * top,
+-                                                        const uint32_t * bottom,
+-                                                        int              wt,
+-                                                        int              wb,
+-                                                        pixman_fixed_t   x,
+-                                                        pixman_fixed_t   ux,
+-                                                        int              width);
+-
+-static force_inline void
+-scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t *       dst,
+-					     const uint32_t * mask,
+-					     const uint32_t * src_top,
+-					     const uint32_t * src_bottom,
+-					     int32_t          w,
+-					     int              wt,
+-					     int              wb,
+-					     pixman_fixed_t   vx,
+-					     pixman_fixed_t   unit_x,
+-					     pixman_fixed_t   max_vx,
+-					     pixman_bool_t    zero_src)
+-{
+-    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
+-                                                            src_bottom, wt, wb,
+-                                                            vx, unit_x, w);
+-}
+-
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
+-			       scaled_bilinear_scanline_neon_8888_8888_SRC,
+-			       uint32_t, uint32_t, uint32_t,
+-			       COVER, FALSE, FALSE)
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
+-			       scaled_bilinear_scanline_neon_8888_8888_SRC,
+-			       uint32_t, uint32_t, uint32_t,
+-			       PAD, FALSE, FALSE)
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
+-			       scaled_bilinear_scanline_neon_8888_8888_SRC,
+-			       uint32_t, uint32_t, uint32_t,
+-			       NONE, FALSE, FALSE)
+-
+ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ {
+     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch b/x11-libs/pixman/files/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
new file mode 100644
index 0000000..853e221
--- /dev/null
+++ b/x11-libs/pixman/files/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
@@ -0,0 +1,271 @@
+From 34098dba6763afd3636a14f9c2a079ab08f23b2d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:34:15 +0200
+Subject: [PATCH 17/22] ARM: NEON: common macro template for bilinear scanline scalers
+
+This allows to generate bilinear scanline scaling functions targeting
+various source and destination color formats. Right now a8r8g8b8/x8r8g8b8
+and r5g6b5 color formats are supported. More formats can be added if needed.
+---
+ pixman/pixman-arm-neon-asm.S |  222 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon-asm.h |   17 +++
+ 2 files changed, 239 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index c168e10..f3784f5 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2588,3 +2588,225 @@ pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
+     .unreq    TMP1
+     .unreq    TMP2
+ .endfunc
++
++.purgem bilinear_interpolate_last_pixel
++.purgem bilinear_interpolate_two_pixels
++.purgem bilinear_interpolate_four_pixels
++
++/*
++ * Bilinear scaling support code which tries to provide pixel fetching, color
++ * format conversion, and interpolation as separate macros which can be used
++ * as the basic building blocks for constructing bilinear scanline functions.
++ */
++
++.macro bilinear_load_8888 reg1, reg2, tmp
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {reg1}, [TMP1]
++    vld1.32   {reg2}, [TMP2]
++.endm
++
++.macro bilinear_load_0565 reg1, reg2, tmp
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    vld1.32   {reg2[0]}, [TMP1]
++    vld1.32   {reg2[1]}, [TMP2]
++    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
++.endm
++
++.macro bilinear_store_8888 numpix, tmp1, tmp2
++.if numpix == 4
++    vst1.32   {d0, d1}, [OUT]!
++.elseif numpix == 2
++    vst1.32   {d0}, [OUT]!
++.elseif numpix == 1
++    vst1.32   {d0[0]}, [OUT, :32]!
++.else
++    .error bilinear_store_8888 numpix is unsupported
++.endif
++.endm
++
++.macro bilinear_store_0565 numpix, tmp1, tmp2
++    vuzp.u8 d0, d1
++    vuzp.u8 d2, d3
++    vuzp.u8 d1, d3
++    vuzp.u8 d0, d2
++    convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
++.if numpix == 4
++    vst1.16   {d2}, [OUT]!
++.elseif numpix == 2
++    vst1.32   {d2[0]}, [OUT]!
++.elseif numpix == 1
++    vst1.16   {d2[0]}, [OUT]!
++.else
++    .error bilinear_store_0565 numpix is unsupported
++.endif
++.endm
++
++.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
++    bilinear_load_&src_fmt d0, d1, d2
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    vshr.u16  d30, d24, #8
++    /* 4 cycles bubble */
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    /* 5 cycles bubble */
++    vshrn.u32 d0, q0, #16
++    /* 3 cycles bubble */
++    vmovn.u16 d0, q0
++    /* 1 cycle bubble */
++    bilinear_store_&dst_fmt 1, q2, q3
++.endm
++
++.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
++    bilinear_load_&src_fmt d0, d1, d2
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    bilinear_load_&src_fmt d20, d21, d22
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    vshrn.u32 d30, q0, #16
++    vshrn.u32 d31, q10, #16
++    vmovn.u16 d0, q15
++    bilinear_store_&dst_fmt 2, q2, q3
++.endm
++
++.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
++    bilinear_load_&src_fmt d0, d1, d2
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    bilinear_load_&src_fmt d20, d21, d22
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    bilinear_load_&src_fmt d4, d5, d6
++    vmull.u8  q3, d4, d28
++    vmlal.u8  q3, d5, d29
++    bilinear_load_&src_fmt d16, d17, d18
++    vmull.u8  q9, d16, d28
++    vmlal.u8  q9, d17, d29
++    pld       [TMP1, PF_OFFS]
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    vshr.u16  q15, q12, #8
++    vshll.u16 q2, d6, #8
++    vmlsl.u16 q2, d6, d30
++    vmlal.u16 q2, d7, d30
++    vshll.u16 q8, d18, #8
++    pld       [TMP2, PF_OFFS]
++    vmlsl.u16 q8, d18, d31
++    vmlal.u16 q8, d19, d31
++    vadd.u16  q12, q12, q13
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q10, #16
++    vshrn.u32 d4, q2, #16
++    vshrn.u32 d5, q8, #16
++    vmovn.u16 d0, q0
++    vmovn.u16 d1, q2
++    bilinear_store_&dst_fmt 4, q2, q3
++.endm
++
++/*
++ * Main template macro for generating NEON optimized bilinear scanline
++ * functions.
++ *
++ * TODO: use software pipelining and aligned writes to the destination buffer
++ *       in order to improve performance
++ *
++ * Bilinear scanline scaler macro template uses the following arguments:
++ *  fname             - name of the function to generate
++ *  src_fmt           - source color format (8888 or 0565)
++ *  dst_fmt           - destination color format (8888 or 0565)
++ *  bpp_shift         - (1 << bpp_shift) is the size of source pixel in bytes
++ *  prefetch_distance - prefetch in the source image by that many
++ *                      pixels ahead
++ */
++
++.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
++                                       bpp_shift, prefetch_distance
++
++pixman_asm_function fname
++    OUT       .req      r0
++    TOP       .req      r1
++    BOTTOM    .req      r2
++    WT        .req      r3
++    WB        .req      r4
++    X         .req      r5
++    UX        .req      r6
++    WIDTH     .req      ip
++    TMP1      .req      r3
++    TMP2      .req      r4
++    PF_OFFS   .req      r7
++    TMP3      .req      r8
++    TMP4      .req      r9
++
++    mov       ip, sp
++    push      {r4, r5, r6, r7, r8, r9}
++    mov       PF_OFFS, #prefetch_distance
++    ldmia     ip, {WB, X, UX, WIDTH}
++    mul       PF_OFFS, PF_OFFS, UX
++
++    cmp       WIDTH, #0
++    ble       3f
++
++    vdup.u16  q12, X
++    vdup.u16  q13, UX
++    vdup.u8   d28, WT
++    vdup.u8   d29, WB
++    vadd.u16  d25, d25, d26
++    vadd.u16  q13, q13, q13
++
++    subs      WIDTH, WIDTH, #4
++    blt       1f
++    mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
++0:
++    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++    subs      WIDTH, WIDTH, #4
++    bge       0b
++1:
++    tst       WIDTH, #2
++    beq       2f
++    bilinear_interpolate_two_pixels src_fmt, dst_fmt
++2:
++    tst       WIDTH, #1
++    beq       3f
++    bilinear_interpolate_last_pixel src_fmt, dst_fmt
++3:
++    pop       {r4, r5, r6, r7, r8, r9}
++    bx        lr
++
++    .unreq    OUT
++    .unreq    TOP
++    .unreq    BOTTOM
++    .unreq    WT
++    .unreq    WB
++    .unreq    X
++    .unreq    UX
++    .unreq    WIDTH
++    .unreq    TMP1
++    .unreq    TMP2
++    .unreq    PF_OFFS
++    .unreq    TMP3
++    .unreq    TMP4
++.endfunc
++
++.endm
+diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
+index 24fa361..97adc6a 100644
+--- a/pixman/pixman-arm-neon-asm.h
++++ b/pixman/pixman-arm-neon-asm.h
+@@ -1158,3 +1158,20 @@ fname:
+     vsri.u16    out, tmp1, #5
+     vsri.u16    out, tmp2, #11
+ .endm
++
++/*
++ * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
++ * returned in (out0, out1) registers pair. Requires one temporary
++ * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
++ * value from 'in' is lost
++ */
++.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
++    vshl.u16    out0, in,   #5  /* G top 6 bits */
++    vshl.u16    tmp,  in,   #11 /* B top 5 bits */
++    vsri.u16    in,   in,   #5  /* R is ready in top bits */
++    vsri.u16    out0, out0, #6  /* G is ready in top bits */
++    vsri.u16    tmp,  tmp,  #5  /* B is ready in top bits */
++    vshr.u16    out1, in,   #8  /* R is in place */
++    vsri.u16    out0, tmp,  #8  /* G & B is in place */
++    vzip.u16    out0, out1      /* everything is in place */
++.endm
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch b/x11-libs/pixman/files/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
new file mode 100644
index 0000000..2913568
--- /dev/null
+++ b/x11-libs/pixman/files/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
@@ -0,0 +1,226 @@
+From 11a0c5badbc59ce967707ef836313cc98f8aec4e Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:46:48 +0200
+Subject: [PATCH 18/22] ARM: use common macro template for bilinear scaled 'src_8888_8888'
+
+This is a cleanup for old and now duplicated code. The performance improvement
+is mostly coming from the enabled use of software prefetch, but instructions
+scheduling is also slightly better.
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=74.36 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  191 +-----------------------------------------
+ 1 files changed, 3 insertions(+), 188 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index f3784f5..52dc444 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2405,194 +2405,6 @@ generate_composite_function_nearest_scanline \
+ fname:
+ .endm
+ 
+-.macro bilinear_interpolate_last_pixel
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d0}, [TMP1]
+-    vshr.u16  d30, d24, #8
+-    vld1.32   {d1}, [TMP2]
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    /* 5 cycles bubble */
+-    vshll.u16 q0, d2, #8
+-    vmlsl.u16 q0, d2, d30
+-    vmlal.u16 q0, d3, d30
+-    /* 5 cycles bubble */
+-    vshrn.u32 d0, q0, #16
+-    /* 3 cycles bubble */
+-    vmovn.u16 d0, q0
+-    /* 1 cycle bubble */
+-    vst1.32   {d0[0]}, [OUT, :32]!
+-.endm
+-
+-.macro bilinear_interpolate_two_pixels
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d0}, [TMP1]
+-    vld1.32   {d1}, [TMP2]
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d20}, [TMP1]
+-    vld1.32   {d21}, [TMP2]
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+-    vshll.u16 q0, d2, #8
+-    vmlsl.u16 q0, d2, d30
+-    vmlal.u16 q0, d3, d30
+-    vshll.u16 q10, d22, #8
+-    vmlsl.u16 q10, d22, d31
+-    vmlal.u16 q10, d23, d31
+-    vshrn.u32 d30, q0, #16
+-    vshrn.u32 d31, q10, #16
+-    vmovn.u16 d0, q15
+-    vst1.32   {d0}, [OUT]!
+-.endm
+-
+-.macro bilinear_interpolate_four_pixels
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d0}, [TMP1]
+-    vld1.32   {d1}, [TMP2]
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d20}, [TMP1]
+-    vld1.32   {d21}, [TMP2]
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+-    vshll.u16 q0, d2, #8
+-    vmlsl.u16 q0, d2, d30
+-    vmlal.u16 q0, d3, d30
+-    vshll.u16 q10, d22, #8
+-    vmlsl.u16 q10, d22, d31
+-    vmlal.u16 q10, d23, d31
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d4}, [TMP1]
+-    vld1.32   {d5}, [TMP2]
+-    vmull.u8  q3, d4, d28
+-    vmlal.u8  q3, d5, d29
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d16}, [TMP1]
+-    vld1.32   {d17}, [TMP2]
+-    vmull.u8  q9, d16, d28
+-    vmlal.u8  q9, d17, d29
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+-    vshll.u16 q2, d6, #8
+-    vmlsl.u16 q2, d6, d30
+-    vmlal.u16 q2, d7, d30
+-    vshll.u16 q8, d18, #8
+-    vmlsl.u16 q8, d18, d31
+-    vmlal.u16 q8, d19, d31
+-    vshrn.u32 d0, q0, #16
+-    vshrn.u32 d1, q10, #16
+-    vshrn.u32 d4, q2, #16
+-    vshrn.u32 d5, q8, #16
+-    vmovn.u16 d0, q0
+-    vmovn.u16 d1, q2
+-    vst1.32   {d0, d1}, [OUT]!
+-.endm
+-
+-
+-/*
+- * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t *       out,
+- *                                                const uint32_t * top,
+- *                                                const uint32_t * bottom,
+- *                                                int              wt,
+- *                                                int              wb,
+- *                                                pixman_fixed_t   x,
+- *                                                pixman_fixed_t   ux,
+- *                                                int              width)
+- */
+-
+-pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
+-    OUT       .req      r0
+-    TOP       .req      r1
+-    BOTTOM    .req      r2
+-    WT        .req      r3
+-    WB        .req      r4
+-    X         .req      r5
+-    UX        .req      r6
+-    WIDTH     .req      ip
+-    TMP1      .req      r3
+-    TMP2      .req      r4
+-
+-    mov       ip, sp
+-    push      {r4, r5, r6, r7}
+-    ldmia     ip, {WB, X, UX, WIDTH}
+-
+-    cmp       WIDTH, #0
+-    ble       3f
+-    vdup.u16  q12, X
+-    vdup.u16  q13, UX
+-    vdup.u8   d28, WT
+-    vdup.u8   d29, WB
+-    vadd.u16  d25, d25, d26
+-    vadd.u16  q13, q13, q13
+-
+-    subs      WIDTH, WIDTH, #4
+-    blt       1f
+-0:
+-    bilinear_interpolate_four_pixels
+-    subs      WIDTH, WIDTH, #4
+-    bge       0b
+-1:
+-    tst       WIDTH, #2
+-    beq       2f
+-    bilinear_interpolate_two_pixels
+-2:
+-    tst       WIDTH, #1
+-    beq       3f
+-    bilinear_interpolate_last_pixel
+-3:
+-    pop       {r4, r5, r6, r7}
+-    bx        lr
+-
+-    .unreq    OUT
+-    .unreq    TOP
+-    .unreq    BOTTOM
+-    .unreq    WT
+-    .unreq    WB
+-    .unreq    X
+-    .unreq    UX
+-    .unreq    WIDTH
+-    .unreq    TMP1
+-    .unreq    TMP2
+-.endfunc
+-
+-.purgem bilinear_interpolate_last_pixel
+-.purgem bilinear_interpolate_two_pixels
+-.purgem bilinear_interpolate_four_pixels
+-
+ /*
+  * Bilinear scaling support code which tries to provide pixel fetching, color
+  * format conversion, and interpolation as separate macros which can be used
+@@ -2810,3 +2622,6 @@ pixman_asm_function fname
+ .endfunc
+ 
+ .endm
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch b/x11-libs/pixman/files/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
new file mode 100644
index 0000000..56fd9b7
--- /dev/null
+++ b/x11-libs/pixman/files/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
@@ -0,0 +1,51 @@
+From 2ee27e7d79637da9173ee1bf3423e5a81534ccb4 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:53:04 +0200
+Subject: [PATCH 19/22] ARM: NEON optimization for bilinear scaled 'src_8888_0565'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s
+  after:  op=1, src=20028888, dst=10020565, speed=61.65 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |    3 +++
+ pixman/pixman-arm-neon.c     |    5 +++++
+ 2 files changed, 8 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 52dc444..f0b42ca 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2625,3 +2625,6 @@ pixman_asm_function fname
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 98ad5f2..ba6de66 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -129,6 +129,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+ 
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+                                          uint32_t, uint32_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
++                                         uint32_t, uint16_t)
+ 
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+@@ -350,6 +352,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch b/x11-libs/pixman/files/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
new file mode 100644
index 0000000..17af7c5
--- /dev/null
+++ b/x11-libs/pixman/files/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
@@ -0,0 +1,50 @@
+From 29003c3befe2159396d181ef9ac1caaadcabf382 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:21:53 +0200
+Subject: [PATCH 20/22] ARM: NEON optimization for bilinear scaled 'src_0565_x888'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s
+  after:  op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |    3 +++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 7 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index f0b42ca..9245db9 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2628,3 +2628,6 @@ generate_bilinear_scanline_func \
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index ba6de66..18e26eb 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -131,6 +131,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+                                          uint32_t, uint32_t)
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+                                          uint32_t, uint16_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
++                                         uint16_t, uint32_t)
+ 
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+@@ -355,6 +357,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch b/x11-libs/pixman/files/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
new file mode 100644
index 0000000..24275a8
--- /dev/null
+++ b/x11-libs/pixman/files/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
@@ -0,0 +1,49 @@
+From fe99673719091d4a880d031add1369332a75731b Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:27:41 +0200
+Subject: [PATCH 21/22] ARM: NEON optimization for bilinear scaled 'src_0565_0565'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |    3 +++
+ pixman/pixman-arm-neon.c     |    3 +++
+ 2 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 9245db9..2b6875b 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2631,3 +2631,6 @@ generate_bilinear_scanline_func \
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 18e26eb..0a10ca1 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -133,6 +133,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+                                          uint32_t, uint16_t)
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
+                                          uint16_t, uint32_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
++                                         uint16_t, uint16_t)
+ 
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+@@ -358,6 +360,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+ 
+     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
+ 
+     { PIXMAN_OP_NONE },
+ };
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/files/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch b/x11-libs/pixman/files/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
new file mode 100644
index 0000000..24f9652
--- /dev/null
+++ b/x11-libs/pixman/files/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
@@ -0,0 +1,166 @@
+From 70a923882ca24664344ba91a649e7aa12c3063f7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:55:48 +0200
+Subject: [PATCH 22/22] ARM: a bit faster NEON bilinear scaling for r5g6b5 source images
+
+Instructions scheduling improved in the code responsible for fetching r5g6b5
+pixels and converting them to the intermediate x8r8g8b8 color format used in
+the interpolation part of code. Still a lot of NEON stalls are remaining,
+which can be resolved later by the use of pipelining.
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+          op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=41.35 MPix/s
+          op=1, src=10020565, dst=20020888, speed=49.16 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  118 +++++++++++++++++++++++++++++++++++------
+ 1 files changed, 100 insertions(+), 18 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 2b6875b..71b30ac 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2430,6 +2430,101 @@ fname:
+     convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ .endm
+ 
++.macro bilinear_load_and_vertical_interpolate_two_8888 \
++                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
++
++    bilinear_load_8888 reg1, reg2, tmp1
++    vmull.u8  acc1, reg1, d28
++    vmlal.u8  acc1, reg2, d29
++    bilinear_load_8888 reg3, reg4, tmp2
++    vmull.u8  acc2, reg3, d28
++    vmlal.u8  acc2, reg4, d29
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_four_8888 \
++                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
++                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++
++    bilinear_load_and_vertical_interpolate_two_8888 \
++                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
++    bilinear_load_and_vertical_interpolate_two_8888 \
++                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_two_0565 \
++                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
++
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    add       TMP3, TOP, TMP4, asl #1
++    add       TMP4, BOTTOM, TMP4, asl #1
++    vld1.32   {acc2lo[0]}, [TMP1]
++    vld1.32   {acc2hi[0]}, [TMP3]
++    vld1.32   {acc2lo[1]}, [TMP2]
++    vld1.32   {acc2hi[1]}, [TMP4]
++    convert_0565_to_x888 acc2, reg3, reg2, reg1
++    vzip.u8   reg1, reg3
++    vzip.u8   reg2, reg4
++    vzip.u8   reg3, reg4
++    vzip.u8   reg1, reg2
++    vmull.u8  acc1, reg1, d28
++    vmlal.u8  acc1, reg2, d29
++    vmull.u8  acc2, reg3, d28
++    vmlal.u8  acc2, reg4, d29
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_four_0565 \
++                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
++                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    add       TMP3, TOP, TMP4, asl #1
++    add       TMP4, BOTTOM, TMP4, asl #1
++    vld1.32   {xacc2lo[0]}, [TMP1]
++    vld1.32   {xacc2hi[0]}, [TMP3]
++    vld1.32   {xacc2lo[1]}, [TMP2]
++    vld1.32   {xacc2hi[1]}, [TMP4]
++    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    add       TMP3, TOP, TMP4, asl #1
++    add       TMP4, BOTTOM, TMP4, asl #1
++    vld1.32   {yacc2lo[0]}, [TMP1]
++    vzip.u8   xreg1, xreg3
++    vld1.32   {yacc2hi[0]}, [TMP3]
++    vzip.u8   xreg2, xreg4
++    vld1.32   {yacc2lo[1]}, [TMP2]
++    vzip.u8   xreg3, xreg4
++    vld1.32   {yacc2hi[1]}, [TMP4]
++    vzip.u8   xreg1, xreg2
++    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
++    vmull.u8  xacc1, xreg1, d28
++    vzip.u8   yreg1, yreg3
++    vmlal.u8  xacc1, xreg2, d29
++    vzip.u8   yreg2, yreg4
++    vmull.u8  xacc2, xreg3, d28
++    vzip.u8   yreg3, yreg4
++    vmlal.u8  xacc2, xreg4, d29
++    vzip.u8   yreg1, yreg2
++    vmull.u8  yacc1, yreg1, d28
++    vmlal.u8  yacc1, yreg2, d29
++    vmull.u8  yacc2, yreg3, d28
++    vmlal.u8  yacc2, yreg4, d29
++.endm
++
+ .macro bilinear_store_8888 numpix, tmp1, tmp2
+ .if numpix == 4
+     vst1.32   {d0, d1}, [OUT]!
+@@ -2477,12 +2572,8 @@ fname:
+ .endm
+ 
+ .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
+-    bilinear_load_&src_fmt d0, d1, d2
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    bilinear_load_&src_fmt d20, d21, d22
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
++    bilinear_load_and_vertical_interpolate_two_&src_fmt \
++                q1, q11, d0, d1, d20, d21, d22, d23
+     vshr.u16  q15, q12, #8
+     vadd.u16  q12, q12, q13
+     vshll.u16 q0, d2, #8
+@@ -2498,18 +2589,9 @@ fname:
+ .endm
+ 
+ .macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
+-    bilinear_load_&src_fmt d0, d1, d2
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    bilinear_load_&src_fmt d20, d21, d22
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
+-    bilinear_load_&src_fmt d4, d5, d6
+-    vmull.u8  q3, d4, d28
+-    vmlal.u8  q3, d5, d29
+-    bilinear_load_&src_fmt d16, d17, d18
+-    vmull.u8  q9, d16, d28
+-    vmlal.u8  q9, d17, d29
++    bilinear_load_and_vertical_interpolate_four_&src_fmt \
++                q1, q11, d0, d1, d20, d21, d22, d23 \
++                q3, q9,  d4, d5, d16, d17, d18, d19
+     pld       [TMP1, PF_OFFS]
+     vshr.u16  q15, q12, #8
+     vadd.u16  q12, q12, q13
+-- 
+1.7.3.4
+
diff --git a/x11-libs/pixman/pixman-0.21.6.ebuild b/x11-libs/pixman/pixman-0.21.6.ebuild
new file mode 100644
index 0000000..63738aa
--- /dev/null
+++ b/x11-libs/pixman/pixman-0.21.6.ebuild
@@ -0,0 +1,74 @@
+# Copyright 1999-2011 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: $
+
+EAPI=3
+inherit xorg-2 toolchain-funcs versionator
+
+EGIT_REPO_URI="git://anongit.freedesktop.org/git/pixman"
+DESCRIPTION="Low-level pixel manipulation routines"
+
+KEYWORDS="~arm"
+IUSE="altivec mmx sse2 simd neon"
+
+pkg_setup() {
+	xorg-2_pkg_setup
+	CONFIGURE_OPTIONS="
+		$(use_enable altivec vmx)
+		$(use_enable simd arm-simd)
+		$(use_enable neon arm-neon)
+		--disable-gtk"
+
+	local enable_mmx="$(use mmx && echo 1 || echo 0)"
+	local enable_sse2="$(use sse2 && echo 1 || echo 0)"
+
+	# this block fixes bug #260287
+	if use x86; then
+		if use sse2 && ! $(version_is_at_least "4.2" "$(gcc-version)"); then
+			ewarn "SSE2 instructions require GCC 4.2 or higher."
+			ewarn "pixman will be built *without* SSE2 support"
+			enable_sse2="0"
+		fi
+	fi
+
+	# this block fixes bug #236558
+	case "$enable_mmx,$enable_sse2" in
+	'1,1')
+		CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --enable-sse2" ;;
+	'1,0')
+		CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --enable-mmx --disable-sse2" ;;
+	'0,1')
+		ewarn "You enabled SSE2 but have MMX disabled. This is an invalid."
+		ewarn "pixman will be built *without* MMX/SSE2 support."
+		CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;;
+	'0,0')
+		CONFIGURE_OPTIONS="${CONFIGURE_OPTIONS} --disable-mmx --disable-sse2" ;;
+	esac
+}
+
+src_prepare() {
+	epatch "${FILESDIR}"/0002-Fix-compilation-on-Win32.patch
+	epatch "${FILESDIR}"/0003-test-Fix-tests-for-compilation-on-Windows.patch
+	epatch "${FILESDIR}"/0004-test-Add-Makefile-for-Win32.patch
+	epatch "${FILESDIR}"/0005-Do-not-include-unused-headers.patch
+	epatch "${FILESDIR}"/0006-test-Silence-MSVC-warnings.patch
+	epatch "${FILESDIR}"/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
+	epatch "${FILESDIR}"/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
+	epatch "${FILESDIR}"/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
+	epatch "${FILESDIR}"/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
+	epatch "${FILESDIR}"/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
+	epatch "${FILESDIR}"/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
+	epatch "${FILESDIR}"/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
+	epatch "${FILESDIR}"/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
+	epatch "${FILESDIR}"/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
+	epatch "${FILESDIR}"/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
+	epatch "${FILESDIR}"/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
+	epatch "${FILESDIR}"/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
+	epatch "${FILESDIR}"/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
+	epatch "${FILESDIR}"/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
+	epatch "${FILESDIR}"/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
+	epatch "${FILESDIR}"/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
+
+	# We patch Makefile.am and such, so eautoreconf!
+	eautoreconf
+}