mirror of
https://github.com/claunia/flac.git
synced 2025-12-16 18:54:26 +00:00
Remove old/broken PPC/Altivec code.
* Removes FLAC__lpc_restore_signal_asm_ppc_altivec_16* from lpc.h and stream_decoder.c * Removes PPC-specific code from cpu.c and cpu.h * Removes PPC stuff from libFLAC/Makefile.lite and build/*.mk * Removes as/gas/PPC-specific stuff from configure.ac and libFLAC/Makefile.am* * Removes libFLAC/ppc folder and remove "src/libFLAC/ppc*/Makefile" lines from configure.ac Patch-from: lvqcl <lvqcl.mail@gmail.com>
This commit is contained in:
@@ -39,21 +39,6 @@
|
|||||||
%.debug.i %.release.i : %.cpp
|
%.debug.i %.release.i : %.cpp
|
||||||
$(CCC) $(CXXFLAGS) -E $< -o $@
|
$(CCC) $(CXXFLAGS) -E $< -o $@
|
||||||
|
|
||||||
%.debug.o %.release.o : %.s
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
#$(CC) -c -arch $(PROC) -Wall -force_cpusubtype_ALL $< -o $@
|
|
||||||
$(AS) -arch $(PROC) -force_cpusubtype_ALL $< -o $@
|
|
||||||
else
|
|
||||||
$(AS) $< -o $@
|
|
||||||
endif
|
|
||||||
%.debug.pic.o %.release.pic.o : %.s
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
#$(CC) -c -arch $(PROC) -Wall -force_cpusubtype_ALL $< -o $@
|
|
||||||
$(AS) -arch $(PROC) -force_cpusubtype_ALL $< -o $@
|
|
||||||
else
|
|
||||||
$(AS) $< -o $@
|
|
||||||
endif
|
|
||||||
|
|
||||||
%.debug.o : %.nasm
|
%.debug.o : %.nasm
|
||||||
$(NASM) -f elf -d OBJ_FORMAT_elf -i ia32/ -g $< -o $@
|
$(NASM) -f elf -d OBJ_FORMAT_elf -i ia32/ -g $< -o $@
|
||||||
%.release.o : %.nasm
|
%.release.o : %.nasm
|
||||||
|
|||||||
@@ -51,11 +51,11 @@ CXXFLAGS = $(CFLAGS)
|
|||||||
|
|
||||||
LFLAGS = -L$(LIBPATH)
|
LFLAGS = -L$(LIBPATH)
|
||||||
|
|
||||||
DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o) $(SRCS_S:%.s=%.debug.o)
|
DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o)
|
||||||
RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o) $(SRCS_S:%.s=%.release.o)
|
RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o)
|
||||||
ifeq ($(PROC),x86_64)
|
ifeq ($(PROC),x86_64)
|
||||||
DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o) $(SRCS_S:%.s=%.debug.pic.o)
|
DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o)
|
||||||
RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o) $(SRCS_S:%.s=%.release.pic.o)
|
RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
debug : $(DEBUG_PROGRAM)
|
debug : $(DEBUG_PROGRAM)
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ else
|
|||||||
CC = gcc
|
CC = gcc
|
||||||
CCC = g++
|
CCC = g++
|
||||||
endif
|
endif
|
||||||
AS = as
|
|
||||||
NASM = nasm
|
NASM = nasm
|
||||||
LINK = ar cru
|
LINK = ar cru
|
||||||
OBJPATH = $(topdir)/objs
|
OBJPATH = $(topdir)/objs
|
||||||
@@ -71,11 +70,11 @@ CXXFLAGS = $(CFLAGS)
|
|||||||
|
|
||||||
LFLAGS = -L$(LIBPATH)
|
LFLAGS = -L$(LIBPATH)
|
||||||
|
|
||||||
DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o) $(SRCS_S:%.s=%.debug.o)
|
DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o)
|
||||||
RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o) $(SRCS_S:%.s=%.release.o)
|
RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o)
|
||||||
ifeq ($(PROC),x86_64)
|
ifeq ($(PROC),x86_64)
|
||||||
DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o) $(SRCS_S:%.s=%.debug.pic.o)
|
DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o)
|
||||||
RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o) $(SRCS_S:%.s=%.release.pic.o)
|
RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
debug : $(DEBUG_STATIC_LIB) $(DEBUG_DYNAMIC_LIB)
|
debug : $(DEBUG_STATIC_LIB) $(DEBUG_DYNAMIC_LIB)
|
||||||
|
|||||||
27
configure.ac
27
configure.ac
@@ -358,25 +358,6 @@ AC_DEFINE(FLAC__HAS_NASM)
|
|||||||
AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler])
|
AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# only matters for PowerPC
|
|
||||||
AC_CHECK_PROGS(AS, as, as)
|
|
||||||
AC_CHECK_PROGS(GAS, gas, gas)
|
|
||||||
|
|
||||||
# try -v (apple as) and --version (gas) at the same time
|
|
||||||
test "$AS" = "as" && as --version -v < /dev/null 2>&1 | grep Apple >/dev/null || AS=gas
|
|
||||||
|
|
||||||
AM_CONDITIONAL(FLaC__HAS_AS, test "$AS" = "as")
|
|
||||||
AM_CONDITIONAL(FLaC__HAS_GAS, test "$AS" = "gas")
|
|
||||||
if test "$AS" = "as" ; then
|
|
||||||
AC_DEFINE(FLAC__HAS_AS)
|
|
||||||
AH_TEMPLATE(FLAC__HAS_AS, [define if you are compiling for PowerPC and have the 'as' assembler])
|
|
||||||
fi
|
|
||||||
if test "$AS" = "gas" ; then
|
|
||||||
# funniest. macro. ever.
|
|
||||||
AC_DEFINE(FLAC__HAS_GAS)
|
|
||||||
AH_TEMPLATE(FLAC__HAS_GAS, [define if you are compiling for PowerPC and have the 'gas' assembler])
|
|
||||||
fi
|
|
||||||
|
|
||||||
if test "x$debug" = xtrue; then
|
if test "x$debug" = xtrue; then
|
||||||
CPPFLAGS="-DDEBUG $CPPFLAGS"
|
CPPFLAGS="-DDEBUG $CPPFLAGS"
|
||||||
CFLAGS=$(echo "$CFLAGS" | sed 's/-g//')
|
CFLAGS=$(echo "$CFLAGS" | sed 's/-g//')
|
||||||
@@ -431,25 +412,17 @@ if test x$enable_werror = "xyes" ; then
|
|||||||
AC_LANG_POP([C++])
|
AC_LANG_POP([C++])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
if test x$enable_stack_smash_protection = "xyes" ; then
|
if test x$enable_stack_smash_protection = "xyes" ; then
|
||||||
XIPH_GCC_STACK_PROTECTOR
|
XIPH_GCC_STACK_PROTECTOR
|
||||||
XIPH_GXX_STACK_PROTECTOR
|
XIPH_GXX_STACK_PROTECTOR
|
||||||
fi
|
fi
|
||||||
|
|
||||||
#@@@
|
|
||||||
AM_CONDITIONAL(FLaC__HAS_AS__TEMPORARILY_DISABLED, test "yes" = "no")
|
|
||||||
AM_CONDITIONAL(FLaC__HAS_GAS__TEMPORARILY_DISABLED, test "yes" = "no")
|
|
||||||
|
|
||||||
AC_CONFIG_FILES([ \
|
AC_CONFIG_FILES([ \
|
||||||
Makefile \
|
Makefile \
|
||||||
src/Makefile \
|
src/Makefile \
|
||||||
src/libFLAC/Makefile \
|
src/libFLAC/Makefile \
|
||||||
src/libFLAC/flac.pc \
|
src/libFLAC/flac.pc \
|
||||||
src/libFLAC/ia32/Makefile \
|
src/libFLAC/ia32/Makefile \
|
||||||
src/libFLAC/ppc/Makefile \
|
|
||||||
src/libFLAC/ppc/as/Makefile \
|
|
||||||
src/libFLAC/ppc/gas/Makefile \
|
|
||||||
src/libFLAC/include/Makefile \
|
src/libFLAC/include/Makefile \
|
||||||
src/libFLAC/include/private/Makefile \
|
src/libFLAC/include/private/Makefile \
|
||||||
src/libFLAC/include/protected/Makefile \
|
src/libFLAC/include/protected/Makefile \
|
||||||
|
|||||||
@@ -39,19 +39,13 @@ endif
|
|||||||
# FIXME: The following logic should be part of configure, not of Makefile.am
|
# FIXME: The following logic should be part of configure, not of Makefile.am
|
||||||
|
|
||||||
if FLaC__CPU_PPC
|
if FLaC__CPU_PPC
|
||||||
# The -force_cpusubtype_ALL is needed to insert a ppc64 instruction
|
|
||||||
# into cpu.c with an asm().
|
|
||||||
if FLaC__SYS_DARWIN
|
if FLaC__SYS_DARWIN
|
||||||
#@@@ PPC optimizations temporarily disabled
|
CPUCFLAGS = -faltivec
|
||||||
CPUCFLAGS = -faltivec -force_cpusubtype_ALL -DFLAC__NO_ASM
|
|
||||||
else
|
else
|
||||||
# Linux-gcc for PPC does not have -force_cpusubtype_ALL, it is Darwin-specific
|
|
||||||
CPUCFLAGS =
|
CPUCFLAGS =
|
||||||
if FLaC__USE_ALTIVEC
|
if FLaC__USE_ALTIVEC
|
||||||
CPUCFLAGS += -maltivec -mabi=altivec
|
CPUCFLAGS += -maltivec -mabi=altivec
|
||||||
endif
|
endif
|
||||||
#@@@ PPC optimizations temporarily disabled
|
|
||||||
CPUCFLAGS += -DFLAC__NO_ASM
|
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@@ -65,21 +59,6 @@ ARCH_SUBDIRS = ia32
|
|||||||
LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la
|
LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
if FLaC__CPU_PPC
|
|
||||||
ARCH_SUBDIRS = ppc
|
|
||||||
if FLaC__HAS_AS__TEMPORARILY_DISABLED
|
|
||||||
if FLaC__CPU_PPC_SPE
|
|
||||||
else
|
|
||||||
LOCAL_EXTRA_LIBADD = ppc/as/libFLAC-asm.la
|
|
||||||
LOCAL_EXTRA_LDFLAGS = "-Wl,-read_only_relocs,warning"
|
|
||||||
else
|
|
||||||
if FLaC__HAS_GAS__TEMPORARILY_DISABLED
|
|
||||||
LOCAL_EXTRA_LIBADD = ppc/gas/libFLAC-asm.la
|
|
||||||
LOCAL_EXTRA_LDFLAGS = ""
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
if OS_IS_WINDOWS
|
if OS_IS_WINDOWS
|
||||||
|
|||||||
@@ -62,17 +62,12 @@ endif
|
|||||||
INCLUDES = -I./include -I$(topdir)/include $(OGG_INCLUDES)
|
INCLUDES = -I./include -I$(topdir)/include $(OGG_INCLUDES)
|
||||||
DEBUG_CFLAGS = -DFLAC__OVERFLOW_DETECT
|
DEBUG_CFLAGS = -DFLAC__OVERFLOW_DETECT
|
||||||
|
|
||||||
ifeq ($(OS),Darwin)
|
|
||||||
SRCS_S = \
|
|
||||||
ppc/as/lpc_asm.s
|
|
||||||
else
|
|
||||||
ifeq ($(PROC),i386)
|
ifeq ($(PROC),i386)
|
||||||
SRCS_NASM = \
|
SRCS_NASM = \
|
||||||
ia32/cpu_asm.nasm \
|
ia32/cpu_asm.nasm \
|
||||||
ia32/fixed_asm.nasm \
|
ia32/fixed_asm.nasm \
|
||||||
ia32/lpc_asm.nasm
|
ia32/lpc_asm.nasm
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
OGG_SRCS_C = \
|
OGG_SRCS_C = \
|
||||||
ogg_decoder_aspect.c \
|
ogg_decoder_aspect.c \
|
||||||
|
|||||||
@@ -45,37 +45,8 @@ static void disable_sse(FLAC__CPUInfo *info)
|
|||||||
{
|
{
|
||||||
info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
|
info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
|
||||||
}
|
}
|
||||||
#elif defined FLAC__CPU_PPC
|
|
||||||
# if !defined FLAC__NO_ASM
|
|
||||||
# if defined FLAC__SYS_DARWIN
|
|
||||||
# include <sys/sysctl.h>
|
|
||||||
# include <mach/mach.h>
|
|
||||||
# include <mach/mach_host.h>
|
|
||||||
# include <mach/host_info.h>
|
|
||||||
# include <mach/machine.h>
|
|
||||||
# ifndef CPU_SUBTYPE_POWERPC_970
|
|
||||||
# define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100)
|
|
||||||
# endif
|
|
||||||
# else /* FLAC__SYS_DARWIN */
|
|
||||||
|
|
||||||
# include <signal.h>
|
#endif
|
||||||
# include <setjmp.h>
|
|
||||||
|
|
||||||
static sigjmp_buf jmpbuf;
|
|
||||||
static volatile sig_atomic_t canjump = 0;
|
|
||||||
|
|
||||||
static void sigill_handler (int sig)
|
|
||||||
{
|
|
||||||
if (!canjump) {
|
|
||||||
signal (sig, SIG_DFL);
|
|
||||||
raise (sig);
|
|
||||||
}
|
|
||||||
canjump = 0;
|
|
||||||
siglongjmp (jmpbuf, 1);
|
|
||||||
}
|
|
||||||
# endif /* FLAC__SYS_DARWIN */
|
|
||||||
# endif /* FLAC__NO_ASM */
|
|
||||||
#endif /* FLAC__CPU_PPC */
|
|
||||||
|
|
||||||
#if defined (__NetBSD__) || defined(__OpenBSD__)
|
#if defined (__NetBSD__) || defined(__OpenBSD__)
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
@@ -359,71 +330,6 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
|
|||||||
info->use_asm = false;
|
info->use_asm = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* PPC-specific
|
|
||||||
*/
|
|
||||||
#elif defined FLAC__CPU_PPC
|
|
||||||
info->type = FLAC__CPUINFO_TYPE_PPC;
|
|
||||||
# if !defined FLAC__NO_ASM
|
|
||||||
info->use_asm = true;
|
|
||||||
# ifdef FLAC__USE_ALTIVEC
|
|
||||||
# if defined FLAC__SYS_DARWIN
|
|
||||||
{
|
|
||||||
int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT };
|
|
||||||
size_t len = sizeof(val);
|
|
||||||
info->ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
host_basic_info_data_t hostInfo;
|
|
||||||
mach_msg_type_number_t infoCount;
|
|
||||||
|
|
||||||
infoCount = HOST_BASIC_INFO_COUNT;
|
|
||||||
host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount);
|
|
||||||
|
|
||||||
info->ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970);
|
|
||||||
}
|
|
||||||
# else /* FLAC__USE_ALTIVEC && !FLAC__SYS_DARWIN */
|
|
||||||
{
|
|
||||||
/* no Darwin, do it the brute-force way */
|
|
||||||
/* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */
|
|
||||||
info->ppc.altivec = 0;
|
|
||||||
info->ppc.ppc64 = 0;
|
|
||||||
|
|
||||||
signal (SIGILL, sigill_handler);
|
|
||||||
canjump = 0;
|
|
||||||
if (!sigsetjmp (jmpbuf, 1)) {
|
|
||||||
canjump = 1;
|
|
||||||
|
|
||||||
asm volatile (
|
|
||||||
"mtspr 256, %0\n\t"
|
|
||||||
"vand %%v0, %%v0, %%v0"
|
|
||||||
:
|
|
||||||
: "r" (-1)
|
|
||||||
);
|
|
||||||
|
|
||||||
info->ppc.altivec = 1;
|
|
||||||
}
|
|
||||||
canjump = 0;
|
|
||||||
if (!sigsetjmp (jmpbuf, 1)) {
|
|
||||||
int x = 0;
|
|
||||||
canjump = 1;
|
|
||||||
|
|
||||||
/* PPC64 hardware implements the cntlzd instruction */
|
|
||||||
asm volatile ("cntlzd %0, %1" : "=r" (x) : "r" (x) );
|
|
||||||
|
|
||||||
info->ppc.ppc64 = 1;
|
|
||||||
}
|
|
||||||
signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */
|
|
||||||
}
|
|
||||||
# endif
|
|
||||||
# else /* !FLAC__USE_ALTIVEC */
|
|
||||||
info->ppc.altivec = 0;
|
|
||||||
info->ppc.ppc64 = 0;
|
|
||||||
# endif
|
|
||||||
# else
|
|
||||||
info->use_asm = false;
|
|
||||||
# endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* unknown CPU
|
* unknown CPU
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -83,7 +83,6 @@
|
|||||||
typedef enum {
|
typedef enum {
|
||||||
FLAC__CPUINFO_TYPE_IA32,
|
FLAC__CPUINFO_TYPE_IA32,
|
||||||
FLAC__CPUINFO_TYPE_X86_64,
|
FLAC__CPUINFO_TYPE_X86_64,
|
||||||
FLAC__CPUINFO_TYPE_PPC,
|
|
||||||
FLAC__CPUINFO_TYPE_UNKNOWN
|
FLAC__CPUINFO_TYPE_UNKNOWN
|
||||||
} FLAC__CPUInfo_Type;
|
} FLAC__CPUInfo_Type;
|
||||||
|
|
||||||
@@ -111,11 +110,6 @@ typedef struct {
|
|||||||
FLAC__bool sse41;
|
FLAC__bool sse41;
|
||||||
FLAC__bool sse42;
|
FLAC__bool sse42;
|
||||||
} FLAC__CPUInfo_x86_64;
|
} FLAC__CPUInfo_x86_64;
|
||||||
#elif defined FLAC__CPU_PPC
|
|
||||||
typedef struct {
|
|
||||||
FLAC__bool altivec;
|
|
||||||
FLAC__bool ppc64;
|
|
||||||
} FLAC__CPUInfo_PPC;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -125,8 +119,6 @@ typedef struct {
|
|||||||
FLAC__CPUInfo_IA32 ia32;
|
FLAC__CPUInfo_IA32 ia32;
|
||||||
#elif defined FLAC__CPU_X86_64
|
#elif defined FLAC__CPU_X86_64
|
||||||
FLAC__CPUInfo_x86_64 x86_64;
|
FLAC__CPUInfo_x86_64 x86_64;
|
||||||
#elif defined FLAC__CPU_PPC
|
|
||||||
FLAC__CPUInfo_PPC ppc;
|
|
||||||
#endif
|
#endif
|
||||||
} FLAC__CPUInfo;
|
} FLAC__CPUInfo;
|
||||||
|
|
||||||
|
|||||||
@@ -196,10 +196,7 @@ void FLAC__lpc_restore_signal_asm_ia32(const FLAC__int32 residual[], unsigned da
|
|||||||
void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
# endif /* FLAC__HAS_NASM */
|
# endif /* FLAC__HAS_NASM */
|
||||||
# elif defined FLAC__CPU_PPC
|
# endif /* FLAC__CPU_IA32 */
|
||||||
void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
|
||||||
void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
|
||||||
# endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */
|
|
||||||
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
|
||||||
# ifdef FLAC__SSE2_SUPPORTED
|
# ifdef FLAC__SSE2_SUPPORTED
|
||||||
void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
|
|||||||
@@ -1,32 +0,0 @@
|
|||||||
# libFLAC - Free Lossless Audio Codec library
|
|
||||||
# Copyright (C) 2004-2009 Josh Coalson
|
|
||||||
# Copyright (C) 2011-2013 Xiph.Org Foundation
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
#
|
|
||||||
# - Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# - Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
SUBDIRS = as gas
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
# libFLAC - Free Lossless Audio Codec library
|
|
||||||
# Copyright (C) 2004-2009 Josh Coalson
|
|
||||||
# Copyright (C) 2011-2013 Xiph.Org Foundation
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
#
|
|
||||||
# - Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# - Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
#@@@
|
|
||||||
if FLaC__HAS_AS__TEMPORARILY_DISABLED
|
|
||||||
|
|
||||||
SUFFIXES = .s .lo
|
|
||||||
|
|
||||||
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
|
|
||||||
|
|
||||||
# For some unknown reason libtool can't figure out the tag for 'as', so
|
|
||||||
# we fake it with --tag=CC and strip out unwanted options.
|
|
||||||
.s.lo:
|
|
||||||
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $<
|
|
||||||
|
|
||||||
noinst_LTLIBRARIES = libFLAC-asm.la
|
|
||||||
libFLAC_asm_la_SOURCES = \
|
|
||||||
lpc_asm.s
|
|
||||||
|
|
||||||
else
|
|
||||||
|
|
||||||
EXTRA_DIST = \
|
|
||||||
lpc_asm.s
|
|
||||||
|
|
||||||
endif
|
|
||||||
@@ -1,430 +0,0 @@
|
|||||||
; libFLAC - Free Lossless Audio Codec library
|
|
||||||
; Copyright (C) 2004-2009 Josh Coalson
|
|
||||||
; Copyright (C) 2011-2013 Xiph.Org Foundation
|
|
||||||
;
|
|
||||||
; Redistribution and use in source and binary forms, with or without
|
|
||||||
; modification, are permitted provided that the following conditions
|
|
||||||
; are met:
|
|
||||||
;
|
|
||||||
; - Redistributions of source code must retain the above copyright
|
|
||||||
; notice, this list of conditions and the following disclaimer.
|
|
||||||
;
|
|
||||||
; - Redistributions in binary form must reproduce the above copyright
|
|
||||||
; notice, this list of conditions and the following disclaimer in the
|
|
||||||
; documentation and/or other materials provided with the distribution.
|
|
||||||
;
|
|
||||||
; - Neither the name of the Xiph.org Foundation nor the names of its
|
|
||||||
; contributors may be used to endorse or promote products derived from
|
|
||||||
; this software without specific prior written permission.
|
|
||||||
;
|
|
||||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
||||||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
.text
|
|
||||||
.align 2
|
|
||||||
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16
|
|
||||||
|
|
||||||
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
|
|
||||||
|
|
||||||
_FLAC__lpc_restore_signal_asm_ppc_altivec_16:
|
|
||||||
; r3: residual[]
|
|
||||||
; r4: data_len
|
|
||||||
; r5: qlp_coeff[]
|
|
||||||
; r6: order
|
|
||||||
; r7: lp_quantization
|
|
||||||
; r8: data[]
|
|
||||||
|
|
||||||
; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
|
|
||||||
; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
|
|
||||||
; bps<=15 for mid-side coding, since that uses an extra bit)
|
|
||||||
|
|
||||||
; these should be fast; the inner loop is unrolled (it takes no more than
|
|
||||||
; 3*(order%4) instructions, all of which are arithmetic), and all of the
|
|
||||||
; coefficients and all relevant history stay in registers, so the outer loop
|
|
||||||
; has only one load from memory (the residual)
|
|
||||||
|
|
||||||
; I have not yet run this through simg4, so there may be some avoidable stalls,
|
|
||||||
; and there may be a somewhat more clever way to do the outer loop
|
|
||||||
|
|
||||||
; the branch mechanism may prevent dynamic loading; I still need to examine
|
|
||||||
; this issue, and there may be a more elegant method
|
|
||||||
|
|
||||||
stmw r31,-4(r1)
|
|
||||||
|
|
||||||
addi r9,r1,-28
|
|
||||||
li r31,0xf
|
|
||||||
andc r9,r9,r31 ; for quadword-aligned stack data
|
|
||||||
|
|
||||||
slwi r6,r6,2 ; adjust for word size
|
|
||||||
slwi r4,r4,2
|
|
||||||
add r4,r4,r8 ; r4 = data+data_len
|
|
||||||
|
|
||||||
mfspr r0,256 ; cache old vrsave
|
|
||||||
addis r31,0,hi16(0xfffffc00)
|
|
||||||
ori r31,r31,lo16(0xfffffc00)
|
|
||||||
mtspr 256,r31 ; declare VRs in vrsave
|
|
||||||
|
|
||||||
cmplw cr0,r8,r4 ; i<data_len
|
|
||||||
bc 4,0,L1400
|
|
||||||
|
|
||||||
; load coefficients into v0-v7 and initial history into v8-v15
|
|
||||||
li r31,0xf
|
|
||||||
and r31,r8,r31 ; r31: data%4
|
|
||||||
li r11,16
|
|
||||||
subf r31,r31,r11 ; r31: 4-(data%4)
|
|
||||||
slwi r31,r31,3 ; convert to bits for vsro
|
|
||||||
li r10,-4
|
|
||||||
stw r31,-4(r9)
|
|
||||||
lvewx v0,r10,r9
|
|
||||||
vspltisb v18,-1
|
|
||||||
vsro v18,v18,v0 ; v18: mask vector
|
|
||||||
|
|
||||||
li r31,0x8
|
|
||||||
lvsl v0,0,r31
|
|
||||||
vsldoi v0,v0,v0,12
|
|
||||||
li r31,0xc
|
|
||||||
lvsl v1,0,r31
|
|
||||||
vspltisb v2,0
|
|
||||||
vspltisb v3,-1
|
|
||||||
vmrglw v2,v2,v3
|
|
||||||
vsel v0,v1,v0,v2 ; v0: reversal permutation vector
|
|
||||||
|
|
||||||
add r10,r5,r6
|
|
||||||
lvsl v17,0,r5 ; v17: coefficient alignment permutation vector
|
|
||||||
vperm v17,v17,v17,v0 ; v17: reversal coefficient alignment permutation vector
|
|
||||||
|
|
||||||
mr r11,r8
|
|
||||||
lvsl v16,0,r11 ; v16: history alignment permutation vector
|
|
||||||
|
|
||||||
lvx v0,0,r5
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v1,0,r5
|
|
||||||
vperm v0,v0,v1,v17
|
|
||||||
lvx v8,0,r11
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v9,0,r11
|
|
||||||
vperm v8,v9,v8,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1101
|
|
||||||
vand v0,v0,v18
|
|
||||||
addis r31,0,hi16(L1307)
|
|
||||||
ori r31,r31,lo16(L1307)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1101:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v2,0,r5
|
|
||||||
vperm v1,v1,v2,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v10,0,r11
|
|
||||||
vperm v9,v10,v9,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1102
|
|
||||||
vand v1,v1,v18
|
|
||||||
addis r31,0,hi16(L1306)
|
|
||||||
ori r31,r31,lo16(L1306)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1102:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v3,0,r5
|
|
||||||
vperm v2,v2,v3,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v11,0,r11
|
|
||||||
vperm v10,v11,v10,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1103
|
|
||||||
vand v2,v2,v18
|
|
||||||
addis r31,0,hi16(L1305)
|
|
||||||
ori r31,r31,lo16(L1305)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1103:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v4,0,r5
|
|
||||||
vperm v3,v3,v4,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v12,0,r11
|
|
||||||
vperm v11,v12,v11,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1104
|
|
||||||
vand v3,v3,v18
|
|
||||||
addis r31,0,hi16(L1304)
|
|
||||||
ori r31,r31,lo16(L1304)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1104:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v5,0,r5
|
|
||||||
vperm v4,v4,v5,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v13,0,r11
|
|
||||||
vperm v12,v13,v12,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1105
|
|
||||||
vand v4,v4,v18
|
|
||||||
addis r31,0,hi16(L1303)
|
|
||||||
ori r31,r31,lo16(L1303)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1105:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v6,0,r5
|
|
||||||
vperm v5,v5,v6,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v14,0,r11
|
|
||||||
vperm v13,v14,v13,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1106
|
|
||||||
vand v5,v5,v18
|
|
||||||
addis r31,0,hi16(L1302)
|
|
||||||
ori r31,r31,lo16(L1302)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1106:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v7,0,r5
|
|
||||||
vperm v6,v6,v7,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v15,0,r11
|
|
||||||
vperm v14,v15,v14,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1107
|
|
||||||
vand v6,v6,v18
|
|
||||||
addis r31,0,hi16(L1301)
|
|
||||||
ori r31,r31,lo16(L1301)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1107:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v19,0,r5
|
|
||||||
vperm v7,v7,v19,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v19,0,r11
|
|
||||||
vperm v15,v19,v15,v16
|
|
||||||
vand v7,v7,v18
|
|
||||||
addis r31,0,hi16(L1300)
|
|
||||||
ori r31,r31,lo16(L1300)
|
|
||||||
|
|
||||||
L1199:
|
|
||||||
mtctr r31
|
|
||||||
|
|
||||||
; set up invariant vectors
|
|
||||||
vspltish v16,0 ; v16: zero vector
|
|
||||||
|
|
||||||
li r10,-12
|
|
||||||
lvsr v17,r10,r8 ; v17: result shift vector
|
|
||||||
lvsl v18,r10,r3 ; v18: residual shift back vector
|
|
||||||
|
|
||||||
li r10,-4
|
|
||||||
stw r7,-4(r9)
|
|
||||||
lvewx v19,r10,r9 ; v19: lp_quantization vector
|
|
||||||
|
|
||||||
L1200:
|
|
||||||
vmulosh v20,v0,v8 ; v20: sum vector
|
|
||||||
bcctr 20,0
|
|
||||||
|
|
||||||
L1300:
|
|
||||||
vmulosh v21,v7,v15
|
|
||||||
vsldoi v15,v15,v14,4 ; increment history
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1301:
|
|
||||||
vmulosh v21,v6,v14
|
|
||||||
vsldoi v14,v14,v13,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1302:
|
|
||||||
vmulosh v21,v5,v13
|
|
||||||
vsldoi v13,v13,v12,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1303:
|
|
||||||
vmulosh v21,v4,v12
|
|
||||||
vsldoi v12,v12,v11,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1304:
|
|
||||||
vmulosh v21,v3,v11
|
|
||||||
vsldoi v11,v11,v10,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1305:
|
|
||||||
vmulosh v21,v2,v10
|
|
||||||
vsldoi v10,v10,v9,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1306:
|
|
||||||
vmulosh v21,v1,v9
|
|
||||||
vsldoi v9,v9,v8,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1307:
|
|
||||||
vsumsws v20,v20,v16 ; v20[3]: sum
|
|
||||||
vsraw v20,v20,v19 ; v20[3]: sum >> lp_quantization
|
|
||||||
|
|
||||||
lvewx v21,0,r3 ; v21[n]: *residual
|
|
||||||
vperm v21,v21,v21,v18 ; v21[3]: *residual
|
|
||||||
vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization)
|
|
||||||
vsldoi v18,v18,v18,4 ; increment shift vector
|
|
||||||
|
|
||||||
vperm v21,v20,v20,v17 ; v21[n]: shift for storage
|
|
||||||
vsldoi v17,v17,v17,12 ; increment shift vector
|
|
||||||
stvewx v21,0,r8
|
|
||||||
|
|
||||||
vsldoi v20,v20,v20,12
|
|
||||||
vsldoi v8,v8,v20,4 ; insert value onto history
|
|
||||||
|
|
||||||
addi r3,r3,4
|
|
||||||
addi r8,r8,4
|
|
||||||
cmplw cr0,r8,r4 ; i<data_len
|
|
||||||
bc 12,0,L1200
|
|
||||||
|
|
||||||
L1400:
|
|
||||||
mtspr 256,r0 ; restore old vrsave
|
|
||||||
lmw r31,-4(r1)
|
|
||||||
blr
|
|
||||||
|
|
||||||
_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
|
|
||||||
; r3: residual[]
|
|
||||||
; r4: data_len
|
|
||||||
; r5: qlp_coeff[]
|
|
||||||
; r6: order
|
|
||||||
; r7: lp_quantization
|
|
||||||
; r8: data[]
|
|
||||||
|
|
||||||
; see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
|
|
||||||
; this version assumes order<=8; it uses fewer vector registers, which should
|
|
||||||
; save time in context switches, and has less code, which may improve
|
|
||||||
; instruction caching
|
|
||||||
|
|
||||||
stmw r31,-4(r1)
|
|
||||||
|
|
||||||
addi r9,r1,-28
|
|
||||||
li r31,0xf
|
|
||||||
andc r9,r9,r31 ; for quadword-aligned stack data
|
|
||||||
|
|
||||||
slwi r6,r6,2 ; adjust for word size
|
|
||||||
slwi r4,r4,2
|
|
||||||
add r4,r4,r8 ; r4 = data+data_len
|
|
||||||
|
|
||||||
mfspr r0,256 ; cache old vrsave
|
|
||||||
addis r31,0,hi16(0xffc00000)
|
|
||||||
ori r31,r31,lo16(0xffc00000)
|
|
||||||
mtspr 256,r31 ; declare VRs in vrsave
|
|
||||||
|
|
||||||
cmplw cr0,r8,r4 ; i<data_len
|
|
||||||
bc 4,0,L2400
|
|
||||||
|
|
||||||
; load coefficients into v0-v1 and initial history into v2-v3
|
|
||||||
li r31,0xf
|
|
||||||
and r31,r8,r31 ; r31: data%4
|
|
||||||
li r11,16
|
|
||||||
subf r31,r31,r11 ; r31: 4-(data%4)
|
|
||||||
slwi r31,r31,3 ; convert to bits for vsro
|
|
||||||
li r10,-4
|
|
||||||
stw r31,-4(r9)
|
|
||||||
lvewx v0,r10,r9
|
|
||||||
vspltisb v6,-1
|
|
||||||
vsro v6,v6,v0 ; v6: mask vector
|
|
||||||
|
|
||||||
li r31,0x8
|
|
||||||
lvsl v0,0,r31
|
|
||||||
vsldoi v0,v0,v0,12
|
|
||||||
li r31,0xc
|
|
||||||
lvsl v1,0,r31
|
|
||||||
vspltisb v2,0
|
|
||||||
vspltisb v3,-1
|
|
||||||
vmrglw v2,v2,v3
|
|
||||||
vsel v0,v1,v0,v2 ; v0: reversal permutation vector
|
|
||||||
|
|
||||||
add r10,r5,r6
|
|
||||||
lvsl v5,0,r5 ; v5: coefficient alignment permutation vector
|
|
||||||
vperm v5,v5,v5,v0 ; v5: reversal coefficient alignment permutation vector
|
|
||||||
|
|
||||||
mr r11,r8
|
|
||||||
lvsl v4,0,r11 ; v4: history alignment permutation vector
|
|
||||||
|
|
||||||
lvx v0,0,r5
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v1,0,r5
|
|
||||||
vperm v0,v0,v1,v5
|
|
||||||
lvx v2,0,r11
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v3,0,r11
|
|
||||||
vperm v2,v3,v2,v4
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L2101
|
|
||||||
vand v0,v0,v6
|
|
||||||
addis r31,0,hi16(L2301)
|
|
||||||
ori r31,r31,lo16(L2301)
|
|
||||||
b L2199
|
|
||||||
|
|
||||||
L2101:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v7,0,r5
|
|
||||||
vperm v1,v1,v7,v5
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v7,0,r11
|
|
||||||
vperm v3,v7,v3,v4
|
|
||||||
vand v1,v1,v6
|
|
||||||
addis r31,0,hi16(L2300)
|
|
||||||
ori r31,r31,lo16(L2300)
|
|
||||||
|
|
||||||
L2199:
|
|
||||||
mtctr r31
|
|
||||||
|
|
||||||
; set up invariant vectors
|
|
||||||
vspltish v4,0 ; v4: zero vector
|
|
||||||
|
|
||||||
li r10,-12
|
|
||||||
lvsr v5,r10,r8 ; v5: result shift vector
|
|
||||||
lvsl v6,r10,r3 ; v6: residual shift back vector
|
|
||||||
|
|
||||||
li r10,-4
|
|
||||||
stw r7,-4(r9)
|
|
||||||
lvewx v7,r10,r9 ; v7: lp_quantization vector
|
|
||||||
|
|
||||||
L2200:
|
|
||||||
vmulosh v8,v0,v2 ; v8: sum vector
|
|
||||||
bcctr 20,0
|
|
||||||
|
|
||||||
L2300:
|
|
||||||
vmulosh v9,v1,v3
|
|
||||||
vsldoi v3,v3,v2,4
|
|
||||||
vaddsws v8,v8,v9
|
|
||||||
|
|
||||||
L2301:
|
|
||||||
vsumsws v8,v8,v4 ; v8[3]: sum
|
|
||||||
vsraw v8,v8,v7 ; v8[3]: sum >> lp_quantization
|
|
||||||
|
|
||||||
lvewx v9,0,r3 ; v9[n]: *residual
|
|
||||||
vperm v9,v9,v9,v6 ; v9[3]: *residual
|
|
||||||
vaddsws v8,v9,v8 ; v8[3]: *residual + (sum >> lp_quantization)
|
|
||||||
vsldoi v6,v6,v6,4 ; increment shift vector
|
|
||||||
|
|
||||||
vperm v9,v8,v8,v5 ; v9[n]: shift for storage
|
|
||||||
vsldoi v5,v5,v5,12 ; increment shift vector
|
|
||||||
stvewx v9,0,r8
|
|
||||||
|
|
||||||
vsldoi v8,v8,v8,12
|
|
||||||
vsldoi v2,v2,v8,4 ; insert value onto history
|
|
||||||
|
|
||||||
addi r3,r3,4
|
|
||||||
addi r8,r8,4
|
|
||||||
cmplw cr0,r8,r4 ; i<data_len
|
|
||||||
bc 12,0,L2200
|
|
||||||
|
|
||||||
L2400:
|
|
||||||
mtspr 256,r0 ; restore old vrsave
|
|
||||||
lmw r31,-4(r1)
|
|
||||||
blr
|
|
||||||
@@ -1,53 +0,0 @@
|
|||||||
# libFLAC - Free Lossless Audio Codec library
|
|
||||||
# Copyright (C) 2004-2009 Josh Coalson
|
|
||||||
# Copyright (C) 2011-2013 Xiph.Org Foundation
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
#
|
|
||||||
# - Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# - Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
#@@@
|
|
||||||
if FLaC__HAS_GAS__TEMPORARILY_DISABLED
|
|
||||||
|
|
||||||
SUFFIXES = .s .lo
|
|
||||||
|
|
||||||
STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
|
|
||||||
|
|
||||||
# For some unknown reason libtool can't figure out the tag for 'gas', so
|
|
||||||
# we fake it with --tag=CC and strip out unwanted options.
|
|
||||||
.s.lo:
|
|
||||||
$(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) gas -force_cpusubtype_ALL -o $@ $<
|
|
||||||
|
|
||||||
noinst_LTLIBRARIES = libFLAC-asm.la
|
|
||||||
libFLAC_asm_la_SOURCES = \
|
|
||||||
lpc_asm.s
|
|
||||||
|
|
||||||
else
|
|
||||||
|
|
||||||
EXTRA_DIST = \
|
|
||||||
lpc_asm.s
|
|
||||||
|
|
||||||
endif
|
|
||||||
@@ -1,432 +0,0 @@
|
|||||||
# libFLAC - Free Lossless Audio Codec library
|
|
||||||
# Copyright (C) 2004-2009 Josh Coalson
|
|
||||||
# Copyright (C) 2011-2013 Xiph.Org Foundation
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
#
|
|
||||||
# - Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# - Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# - Neither the name of the Xiph.org Foundation nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
||||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
.text
|
|
||||||
.align 2
|
|
||||||
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16
|
|
||||||
.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16, @function
|
|
||||||
|
|
||||||
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
|
|
||||||
.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8, @function
|
|
||||||
|
|
||||||
_FLAC__lpc_restore_signal_asm_ppc_altivec_16:
|
|
||||||
# r3: residual[]
|
|
||||||
# r4: data_len
|
|
||||||
# r5: qlp_coeff[]
|
|
||||||
# r6: order
|
|
||||||
# r7: lp_quantization
|
|
||||||
# r8: data[]
|
|
||||||
|
|
||||||
# see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
|
|
||||||
# these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
|
|
||||||
# bps<=15 for mid-side coding, since that uses an extra bit)
|
|
||||||
|
|
||||||
# these should be fast; the inner loop is unrolled (it takes no more than
|
|
||||||
# 3*(order%4) instructions, all of which are arithmetic), and all of the
|
|
||||||
# coefficients and all relevant history stay in registers, so the outer loop
|
|
||||||
# has only one load from memory (the residual)
|
|
||||||
|
|
||||||
# I have not yet run this through simg4, so there may be some avoidable stalls,
|
|
||||||
# and there may be a somewhat more clever way to do the outer loop
|
|
||||||
|
|
||||||
# the branch mechanism may prevent dynamic loading; I still need to examine
|
|
||||||
# this issue, and there may be a more elegant method
|
|
||||||
|
|
||||||
stmw r31,-4(r1)
|
|
||||||
|
|
||||||
addi r9,r1,-28
|
|
||||||
li r31,0xf
|
|
||||||
andc r9,r9,r31 # for quadword-aligned stack data
|
|
||||||
|
|
||||||
slwi r6,r6,2 # adjust for word size
|
|
||||||
slwi r4,r4,2
|
|
||||||
add r4,r4,r8 # r4 = data+data_len
|
|
||||||
|
|
||||||
mfspr r0,256 # cache old vrsave
|
|
||||||
addis r31,0,0xffff
|
|
||||||
ori r31,r31,0xfc00
|
|
||||||
mtspr 256,r31 # declare VRs in vrsave
|
|
||||||
|
|
||||||
cmplw cr0,r8,r4 # i<data_len
|
|
||||||
bc 4,0,L1400
|
|
||||||
|
|
||||||
# load coefficients into v0-v7 and initial history into v8-v15
|
|
||||||
li r31,0xf
|
|
||||||
and r31,r8,r31 # r31: data%4
|
|
||||||
li r11,16
|
|
||||||
subf r31,r31,r11 # r31: 4-(data%4)
|
|
||||||
slwi r31,r31,3 # convert to bits for vsro
|
|
||||||
li r10,-4
|
|
||||||
stw r31,-4(r9)
|
|
||||||
lvewx v0,r10,r9
|
|
||||||
vspltisb v18,-1
|
|
||||||
vsro v18,v18,v0 # v18: mask vector
|
|
||||||
|
|
||||||
li r31,0x8
|
|
||||||
lvsl v0,0,r31
|
|
||||||
vsldoi v0,v0,v0,12
|
|
||||||
li r31,0xc
|
|
||||||
lvsl v1,0,r31
|
|
||||||
vspltisb v2,0
|
|
||||||
vspltisb v3,-1
|
|
||||||
vmrglw v2,v2,v3
|
|
||||||
vsel v0,v1,v0,v2 # v0: reversal permutation vector
|
|
||||||
|
|
||||||
add r10,r5,r6
|
|
||||||
lvsl v17,0,r5 # v17: coefficient alignment permutation vector
|
|
||||||
vperm v17,v17,v17,v0 # v17: reversal coefficient alignment permutation vector
|
|
||||||
|
|
||||||
mr r11,r8
|
|
||||||
lvsl v16,0,r11 # v16: history alignment permutation vector
|
|
||||||
|
|
||||||
lvx v0,0,r5
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v1,0,r5
|
|
||||||
vperm v0,v0,v1,v17
|
|
||||||
lvx v8,0,r11
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v9,0,r11
|
|
||||||
vperm v8,v9,v8,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1101
|
|
||||||
vand v0,v0,v18
|
|
||||||
addis r31,0,L1307@ha
|
|
||||||
ori r31,r31,L1307@l
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1101:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v2,0,r5
|
|
||||||
vperm v1,v1,v2,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v10,0,r11
|
|
||||||
vperm v9,v10,v9,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1102
|
|
||||||
vand v1,v1,v18
|
|
||||||
addis r31,0,L1306@ha
|
|
||||||
ori r31,r31,L1306@l
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1102:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v3,0,r5
|
|
||||||
vperm v2,v2,v3,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v11,0,r11
|
|
||||||
vperm v10,v11,v10,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1103
|
|
||||||
vand v2,v2,v18
|
|
||||||
lis r31,L1305@ha
|
|
||||||
la r31,L1305@l(r31)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1103:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v4,0,r5
|
|
||||||
vperm v3,v3,v4,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v12,0,r11
|
|
||||||
vperm v11,v12,v11,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1104
|
|
||||||
vand v3,v3,v18
|
|
||||||
lis r31,L1304@ha
|
|
||||||
la r31,L1304@l(r31)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1104:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v5,0,r5
|
|
||||||
vperm v4,v4,v5,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v13,0,r11
|
|
||||||
vperm v12,v13,v12,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1105
|
|
||||||
vand v4,v4,v18
|
|
||||||
lis r31,L1303@ha
|
|
||||||
la r31,L1303@l(r31)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1105:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v6,0,r5
|
|
||||||
vperm v5,v5,v6,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v14,0,r11
|
|
||||||
vperm v13,v14,v13,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1106
|
|
||||||
vand v5,v5,v18
|
|
||||||
lis r31,L1302@ha
|
|
||||||
la r31,L1302@l(r31)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1106:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v7,0,r5
|
|
||||||
vperm v6,v6,v7,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v15,0,r11
|
|
||||||
vperm v14,v15,v14,v16
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L1107
|
|
||||||
vand v6,v6,v18
|
|
||||||
lis r31,L1301@ha
|
|
||||||
la r31,L1301@l(r31)
|
|
||||||
b L1199
|
|
||||||
|
|
||||||
L1107:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v19,0,r5
|
|
||||||
vperm v7,v7,v19,v17
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v19,0,r11
|
|
||||||
vperm v15,v19,v15,v16
|
|
||||||
vand v7,v7,v18
|
|
||||||
lis r31,L1300@ha
|
|
||||||
la r31,L1300@l(r31)
|
|
||||||
|
|
||||||
L1199:
|
|
||||||
mtctr r31
|
|
||||||
|
|
||||||
# set up invariant vectors
|
|
||||||
vspltish v16,0 # v16: zero vector
|
|
||||||
|
|
||||||
li r10,-12
|
|
||||||
lvsr v17,r10,r8 # v17: result shift vector
|
|
||||||
lvsl v18,r10,r3 # v18: residual shift back vector
|
|
||||||
|
|
||||||
li r10,-4
|
|
||||||
stw r7,-4(r9)
|
|
||||||
lvewx v19,r10,r9 # v19: lp_quantization vector
|
|
||||||
|
|
||||||
L1200:
|
|
||||||
vmulosh v20,v0,v8 # v20: sum vector
|
|
||||||
bcctr 20,0
|
|
||||||
|
|
||||||
L1300:
|
|
||||||
vmulosh v21,v7,v15
|
|
||||||
vsldoi v15,v15,v14,4 # increment history
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1301:
|
|
||||||
vmulosh v21,v6,v14
|
|
||||||
vsldoi v14,v14,v13,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1302:
|
|
||||||
vmulosh v21,v5,v13
|
|
||||||
vsldoi v13,v13,v12,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1303:
|
|
||||||
vmulosh v21,v4,v12
|
|
||||||
vsldoi v12,v12,v11,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1304:
|
|
||||||
vmulosh v21,v3,v11
|
|
||||||
vsldoi v11,v11,v10,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1305:
|
|
||||||
vmulosh v21,v2,v10
|
|
||||||
vsldoi v10,v10,v9,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1306:
|
|
||||||
vmulosh v21,v1,v9
|
|
||||||
vsldoi v9,v9,v8,4
|
|
||||||
vaddsws v20,v20,v21
|
|
||||||
|
|
||||||
L1307:
|
|
||||||
vsumsws v20,v20,v16 # v20[3]: sum
|
|
||||||
vsraw v20,v20,v19 # v20[3]: sum >> lp_quantization
|
|
||||||
|
|
||||||
lvewx v21,0,r3 # v21[n]: *residual
|
|
||||||
vperm v21,v21,v21,v18 # v21[3]: *residual
|
|
||||||
vaddsws v20,v21,v20 # v20[3]: *residual + (sum >> lp_quantization)
|
|
||||||
vsldoi v18,v18,v18,4 # increment shift vector
|
|
||||||
|
|
||||||
vperm v21,v20,v20,v17 # v21[n]: shift for storage
|
|
||||||
vsldoi v17,v17,v17,12 # increment shift vector
|
|
||||||
stvewx v21,0,r8
|
|
||||||
|
|
||||||
vsldoi v20,v20,v20,12
|
|
||||||
vsldoi v8,v8,v20,4 # insert value onto history
|
|
||||||
|
|
||||||
addi r3,r3,4
|
|
||||||
addi r8,r8,4
|
|
||||||
cmplw cr0,r8,r4 # i<data_len
|
|
||||||
bc 12,0,L1200
|
|
||||||
|
|
||||||
L1400:
|
|
||||||
mtspr 256,r0 # restore old vrsave
|
|
||||||
lmw r31,-4(r1)
|
|
||||||
blr
|
|
||||||
|
|
||||||
_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
|
|
||||||
# r3: residual[]
|
|
||||||
# r4: data_len
|
|
||||||
# r5: qlp_coeff[]
|
|
||||||
# r6: order
|
|
||||||
# r7: lp_quantization
|
|
||||||
# r8: data[]
|
|
||||||
|
|
||||||
# see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
|
|
||||||
# this version assumes order<=8; it uses fewer vector registers, which should
|
|
||||||
# save time in context switches, and has less code, which may improve
|
|
||||||
# instruction caching
|
|
||||||
|
|
||||||
stmw r31,-4(r1)
|
|
||||||
|
|
||||||
addi r9,r1,-28
|
|
||||||
li r31,0xf
|
|
||||||
andc r9,r9,r31 # for quadword-aligned stack data
|
|
||||||
|
|
||||||
slwi r6,r6,2 # adjust for word size
|
|
||||||
slwi r4,r4,2
|
|
||||||
add r4,r4,r8 # r4 = data+data_len
|
|
||||||
|
|
||||||
mfspr r0,256 # cache old vrsave
|
|
||||||
addis r31,0,0xffc0
|
|
||||||
ori r31,r31,0x0000
|
|
||||||
mtspr 256,r31 # declare VRs in vrsave
|
|
||||||
|
|
||||||
cmplw cr0,r8,r4 # i<data_len
|
|
||||||
bc 4,0,L2400
|
|
||||||
|
|
||||||
# load coefficients into v0-v1 and initial history into v2-v3
|
|
||||||
li r31,0xf
|
|
||||||
and r31,r8,r31 # r31: data%4
|
|
||||||
li r11,16
|
|
||||||
subf r31,r31,r11 # r31: 4-(data%4)
|
|
||||||
slwi r31,r31,3 # convert to bits for vsro
|
|
||||||
li r10,-4
|
|
||||||
stw r31,-4(r9)
|
|
||||||
lvewx v0,r10,r9
|
|
||||||
vspltisb v6,-1
|
|
||||||
vsro v6,v6,v0 # v6: mask vector
|
|
||||||
|
|
||||||
li r31,0x8
|
|
||||||
lvsl v0,0,r31
|
|
||||||
vsldoi v0,v0,v0,12
|
|
||||||
li r31,0xc
|
|
||||||
lvsl v1,0,r31
|
|
||||||
vspltisb v2,0
|
|
||||||
vspltisb v3,-1
|
|
||||||
vmrglw v2,v2,v3
|
|
||||||
vsel v0,v1,v0,v2 # v0: reversal permutation vector
|
|
||||||
|
|
||||||
add r10,r5,r6
|
|
||||||
lvsl v5,0,r5 # v5: coefficient alignment permutation vector
|
|
||||||
vperm v5,v5,v5,v0 # v5: reversal coefficient alignment permutation vector
|
|
||||||
|
|
||||||
mr r11,r8
|
|
||||||
lvsl v4,0,r11 # v4: history alignment permutation vector
|
|
||||||
|
|
||||||
lvx v0,0,r5
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v1,0,r5
|
|
||||||
vperm v0,v0,v1,v5
|
|
||||||
lvx v2,0,r11
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v3,0,r11
|
|
||||||
vperm v2,v3,v2,v4
|
|
||||||
cmplw cr0,r5,r10
|
|
||||||
bc 12,0,L2101
|
|
||||||
vand v0,v0,v6
|
|
||||||
lis r31,L2301@ha
|
|
||||||
la r31,L2301@l(r31)
|
|
||||||
b L2199
|
|
||||||
|
|
||||||
L2101:
|
|
||||||
addi r5,r5,16
|
|
||||||
lvx v7,0,r5
|
|
||||||
vperm v1,v1,v7,v5
|
|
||||||
addi r11,r11,-16
|
|
||||||
lvx v7,0,r11
|
|
||||||
vperm v3,v7,v3,v4
|
|
||||||
vand v1,v1,v6
|
|
||||||
lis r31,L2300@ha
|
|
||||||
la r31,L2300@l(r31)
|
|
||||||
|
|
||||||
L2199:
|
|
||||||
mtctr r31
|
|
||||||
|
|
||||||
# set up invariant vectors
|
|
||||||
vspltish v4,0 # v4: zero vector
|
|
||||||
|
|
||||||
li r10,-12
|
|
||||||
lvsr v5,r10,r8 # v5: result shift vector
|
|
||||||
lvsl v6,r10,r3 # v6: residual shift back vector
|
|
||||||
|
|
||||||
li r10,-4
|
|
||||||
stw r7,-4(r9)
|
|
||||||
lvewx v7,r10,r9 # v7: lp_quantization vector
|
|
||||||
|
|
||||||
L2200:
|
|
||||||
vmulosh v8,v0,v2 # v8: sum vector
|
|
||||||
bcctr 20,0
|
|
||||||
|
|
||||||
L2300:
|
|
||||||
vmulosh v9,v1,v3
|
|
||||||
vsldoi v3,v3,v2,4
|
|
||||||
vaddsws v8,v8,v9
|
|
||||||
|
|
||||||
L2301:
|
|
||||||
vsumsws v8,v8,v4 # v8[3]: sum
|
|
||||||
vsraw v8,v8,v7 # v8[3]: sum >> lp_quantization
|
|
||||||
|
|
||||||
lvewx v9,0,r3 # v9[n]: *residual
|
|
||||||
vperm v9,v9,v9,v6 # v9[3]: *residual
|
|
||||||
vaddsws v8,v9,v8 # v8[3]: *residual + (sum >> lp_quantization)
|
|
||||||
vsldoi v6,v6,v6,4 # increment shift vector
|
|
||||||
|
|
||||||
vperm v9,v8,v8,v5 # v9[n]: shift for storage
|
|
||||||
vsldoi v5,v5,v5,12 # increment shift vector
|
|
||||||
stvewx v9,0,r8
|
|
||||||
|
|
||||||
vsldoi v8,v8,v8,12
|
|
||||||
vsldoi v2,v2,v8,4 # insert value onto history
|
|
||||||
|
|
||||||
addi r3,r3,4
|
|
||||||
addi r8,r8,4
|
|
||||||
cmplw cr0,r8,r4 # i<data_len
|
|
||||||
bc 12,0,L2200
|
|
||||||
|
|
||||||
L2400:
|
|
||||||
mtspr 256,r0 # restore old vrsave
|
|
||||||
lmw r31,-4(r1)
|
|
||||||
blr
|
|
||||||
@@ -142,8 +142,6 @@ typedef struct FLAC__StreamDecoderPrivate {
|
|||||||
void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */
|
/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */
|
||||||
void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
||||||
/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit), AND order <= 8: */
|
|
||||||
void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
|
|
||||||
void *client_data;
|
void *client_data;
|
||||||
FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
|
FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
|
||||||
FLAC__BitReader *input;
|
FLAC__BitReader *input;
|
||||||
@@ -391,7 +389,6 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
|
|||||||
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
|
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
|
||||||
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
|
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
|
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal;
|
|
||||||
/* now override with asm where appropriate */
|
/* now override with asm where appropriate */
|
||||||
#ifndef FLAC__NO_ASM
|
#ifndef FLAC__NO_ASM
|
||||||
if(decoder->private_->cpuinfo.use_asm) {
|
if(decoder->private_->cpuinfo.use_asm) {
|
||||||
@@ -402,19 +399,16 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
|
|||||||
if(decoder->private_->cpuinfo.ia32.mmx) {
|
if(decoder->private_->cpuinfo.ia32.mmx) {
|
||||||
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
|
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
|
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32_mmx;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
|
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32;
|
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef FLAC__HAS_X86INTRIN
|
#ifdef FLAC__HAS_X86INTRIN
|
||||||
# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not faster than ASM/MMX code */
|
# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not faster than ASM/MMX code */
|
||||||
if(decoder->private_->cpuinfo.ia32.sse2) {
|
if(decoder->private_->cpuinfo.ia32.sse2) {
|
||||||
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
|
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
|
||||||
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
|
|
||||||
}
|
}
|
||||||
# endif
|
# endif
|
||||||
# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT_SSE: faster than asm; TODO: more tests */
|
# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT_SSE: faster than asm; TODO: more tests */
|
||||||
@@ -422,12 +416,6 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
|
|||||||
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
|
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
#elif defined FLAC__CPU_PPC
|
|
||||||
FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC);
|
|
||||||
if(decoder->private_->cpuinfo.ppc.altivec) {
|
|
||||||
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ppc_altivec_16;
|
|
||||||
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -1320,9 +1308,6 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne
|
|||||||
memset(tmp, 0, sizeof(FLAC__int32)*4);
|
memset(tmp, 0, sizeof(FLAC__int32)*4);
|
||||||
decoder->private_->output[i] = tmp + 4;
|
decoder->private_->output[i] = tmp + 4;
|
||||||
|
|
||||||
/* WATCHOUT:
|
|
||||||
* minimum of quadword alignment for PPC vector optimizations is REQUIRED:
|
|
||||||
*/
|
|
||||||
if(!FLAC__memory_alloc_aligned_int32_array(size, &decoder->private_->residual_unaligned[i], &decoder->private_->residual[i])) {
|
if(!FLAC__memory_alloc_aligned_int32_array(size, &decoder->private_->residual_unaligned[i], &decoder->private_->residual[i])) {
|
||||||
decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
|
decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
|
||||||
return false;
|
return false;
|
||||||
@@ -2664,12 +2649,8 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un
|
|||||||
if( (FLAC__uint64)order * ((((FLAC__uint64)1)<<bps)-1) * ((1<<subframe->qlp_coeff_precision)-1) < (((FLAC__uint64)-1) << 32) )
|
if( (FLAC__uint64)order * ((((FLAC__uint64)1)<<bps)-1) * ((1<<subframe->qlp_coeff_precision)-1) < (((FLAC__uint64)-1) << 32) )
|
||||||
*/
|
*/
|
||||||
if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
|
if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
|
||||||
if(bps <= 16 && subframe->qlp_coeff_precision <= 16) {
|
if(bps <= 16 && subframe->qlp_coeff_precision <= 16)
|
||||||
if(order <= 8)
|
decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
|
||||||
decoder->private_->local_lpc_restore_signal_16bit_order8(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
|
|
||||||
else
|
|
||||||
decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
|
decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
|
||||||
else
|
else
|
||||||
|
|||||||
Reference in New Issue
Block a user