From 47faab80d76c7e60eb9f6fdcf4395ac4f47ab7ee Mon Sep 17 00:00:00 2001 From: Josh Coalson Date: Tue, 9 Nov 2004 01:50:35 +0000 Subject: [PATCH] split PPC asm code into two directores, one holding asm compatible with OS X native 'as', and one for 'gas' --- src/libFLAC/Makefile.am | 3 +- src/libFLAC/Makefile.lite | 2 +- src/libFLAC/ppc/Makefile.am | 15 +- src/libFLAC/ppc/as/Makefile.am | 42 +++ src/libFLAC/ppc/{ => as}/lpc_asm.s | 0 src/libFLAC/ppc/gas/Makefile.am | 42 +++ src/libFLAC/ppc/gas/lpc_asm.s | 431 +++++++++++++++++++++++++++++ 7 files changed, 521 insertions(+), 14 deletions(-) create mode 100644 src/libFLAC/ppc/as/Makefile.am rename src/libFLAC/ppc/{ => as}/lpc_asm.s (100%) create mode 100644 src/libFLAC/ppc/gas/Makefile.am create mode 100644 src/libFLAC/ppc/gas/lpc_asm.s diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am index 4538f484..30e2f672 100644 --- a/src/libFLAC/Makefile.am +++ b/src/libFLAC/Makefile.am @@ -44,7 +44,8 @@ endif endif if FLaC__CPU_PPC ARCH_SUBDIRS = ppc -libFLAC_la_LIBADD = ppc/libFLAC-asm.la +#@@@@@@ need to conditionalize this to gas or as depending on configure +libFLAC_la_LIBADD = ppc/as/libFLAC-asm.la LOCAL_EXTRA_LDFLAGS = "-Wl,-read_only_relocs,warning" endif endif diff --git a/src/libFLAC/Makefile.lite b/src/libFLAC/Makefile.lite index eb286c0c..e6b01e34 100644 --- a/src/libFLAC/Makefile.lite +++ b/src/libFLAC/Makefile.lite @@ -49,7 +49,7 @@ DEBUG_CFLAGS = -DFLAC__OVERFLOW_DETECT ifeq ($(DARWIN_BUILD),yes) SRCS_S = \ - ppc/lpc_asm.s + ppc/as/lpc_asm.s else ifeq ($(SOLARIS_BUILD),yes) else diff --git a/src/libFLAC/ppc/Makefile.am b/src/libFLAC/ppc/Makefile.am index 181dbaeb..c88ac7e4 100644 --- a/src/libFLAC/ppc/Makefile.am +++ b/src/libFLAC/ppc/Makefile.am @@ -28,15 +28,6 @@ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -SUFFIXES = .s .lo - -STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh - -# For some unknown reason libtool can't figure out the tag for 'as', so -# we fake it with --tag=CC and strip out unwanted options. -.s.lo: - $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $< - -noinst_LTLIBRARIES = libFLAC-asm.la -libFLAC_asm_la_SOURCES = \ - lpc_asm.s +#@@@@@@ We actually want to only compile one based on some configuration, +#@@@@@@ so we'll eventually have to conditionalize the sub-Makefile.am's +SUBDIRS = as gas diff --git a/src/libFLAC/ppc/as/Makefile.am b/src/libFLAC/ppc/as/Makefile.am new file mode 100644 index 00000000..181dbaeb --- /dev/null +++ b/src/libFLAC/ppc/as/Makefile.am @@ -0,0 +1,42 @@ +# libFLAC - Free Lossless Audio Codec library +# Copyright (C) 2004 Josh Coalson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the Xiph.org Foundation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SUFFIXES = .s .lo + +STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh + +# For some unknown reason libtool can't figure out the tag for 'as', so +# we fake it with --tag=CC and strip out unwanted options. +.s.lo: + $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $< + +noinst_LTLIBRARIES = libFLAC-asm.la +libFLAC_asm_la_SOURCES = \ + lpc_asm.s diff --git a/src/libFLAC/ppc/lpc_asm.s b/src/libFLAC/ppc/as/lpc_asm.s similarity index 100% rename from src/libFLAC/ppc/lpc_asm.s rename to src/libFLAC/ppc/as/lpc_asm.s diff --git a/src/libFLAC/ppc/gas/Makefile.am b/src/libFLAC/ppc/gas/Makefile.am new file mode 100644 index 00000000..bc6fc2c7 --- /dev/null +++ b/src/libFLAC/ppc/gas/Makefile.am @@ -0,0 +1,42 @@ +# libFLAC - Free Lossless Audio Codec library +# Copyright (C) 2004 Josh Coalson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the Xiph.org Foundation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +SUFFIXES = .s .lo + +STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh + +# For some unknown reason libtool can't figure out the tag for 'gas', so +# we fake it with --tag=CC and strip out unwanted options. +.s.lo: + $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) gas -force_cpusubtype_ALL -o $@ $< + +noinst_LTLIBRARIES = libFLAC-asm.la +libFLAC_asm_la_SOURCES = \ + lpc_asm.s diff --git a/src/libFLAC/ppc/gas/lpc_asm.s b/src/libFLAC/ppc/gas/lpc_asm.s new file mode 100644 index 00000000..485902ef --- /dev/null +++ b/src/libFLAC/ppc/gas/lpc_asm.s @@ -0,0 +1,431 @@ +# libFLAC - Free Lossless Audio Codec library +# Copyright (C) 2004 Josh Coalson +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# - Neither the name of the Xiph.org Foundation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +.text + .align 2 +.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16 +.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16, @function + +.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8 +.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8, @function + +_FLAC__lpc_restore_signal_asm_ppc_altivec_16: +# r3: residual[] +# r4: data_len +# r5: qlp_coeff[] +# r6: order +# r7: lp_quantization +# r8: data[] + +# see src/libFLAC/lpc.c:FLAC__lpc_restore_signal() +# these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual +# bps<=15 for mid-side coding, since that uses an extra bit) + +# these should be fast; the inner loop is unrolled (it takes no more than +# 3*(order%4) instructions, all of which are arithmetic), and all of the +# coefficients and all relevant history stay in registers, so the outer loop +# has only one load from memory (the residual) + +# I have not yet run this through simg4, so there may be some avoidable stalls, +# and there may be a somewhat more clever way to do the outer loop + +# the branch mechanism may prevent dynamic loading; I still need to examine +# this issue, and there may be a more elegant method + + stmw r31,-4(r1) + + addi r9,r1,-28 + li r31,0xf + andc r9,r9,r31 # for quadword-aligned stack data + + slwi r6,r6,2 # adjust for word size + slwi r4,r4,2 + add r4,r4,r8 # r4 = data+data_len + + mfspr r0,256 # cache old vrsave + addis r31,0,0xffff + ori r31,r31,0xfc00 + mtspr 256,r31 # declare VRs in vrsave + + cmplw cr0,r8,r4 # i> lp_quantization + + lvewx v21,0,r3 # v21[n]: *residual + vperm v21,v21,v21,v18 # v21[3]: *residual + vaddsws v20,v21,v20 # v20[3]: *residual + (sum >> lp_quantization) + vsldoi v18,v18,v18,4 # increment shift vector + + vperm v21,v20,v20,v17 # v21[n]: shift for storage + vsldoi v17,v17,v17,12 # increment shift vector + stvewx v21,0,r8 + + vsldoi v20,v20,v20,12 + vsldoi v8,v8,v20,4 # insert value onto history + + addi r3,r3,4 + addi r8,r8,4 + cmplw cr0,r8,r4 # i> lp_quantization + + lvewx v9,0,r3 # v9[n]: *residual + vperm v9,v9,v9,v6 # v9[3]: *residual + vaddsws v8,v9,v8 # v8[3]: *residual + (sum >> lp_quantization) + vsldoi v6,v6,v6,4 # increment shift vector + + vperm v9,v8,v8,v5 # v9[n]: shift for storage + vsldoi v5,v5,v5,12 # increment shift vector + stvewx v9,0,r8 + + vsldoi v8,v8,v8,12 + vsldoi v2,v2,v8,4 # insert value onto history + + addi r3,r3,4 + addi r8,r8,4 + cmplw cr0,r8,r4 # i