mirror of
https://github.com/claunia/cuetools.net.git
synced 2025-12-16 18:14:25 +00:00
1084 lines
27 KiB
C
1084 lines
27 KiB
C
// Reed-Solomon encoding/erasure decoding
|
|
// Fast version on GF(2^n_field) using Walsh transform
|
|
|
|
// Implementation of the algorithms described in
|
|
// Efficient erasure decoding of Reed-Solomon codes
|
|
// http://arxiv.org/abs/0901.1886v1
|
|
|
|
// (c) 2009 Frederic didier.
|
|
// Any feedback is very welcome. For any question, comments,
|
|
// see http://algo.epfl.ch/~didier/reed_solomon.html or email
|
|
// frederic.didier@epfl.ch
|
|
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// 2. Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following
|
|
// disclaimer in the documentation and/or other materials
|
|
// provided with the distribution.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
|
|
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS
|
|
// BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
|
|
// OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
|
// OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
|
// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
|
// OF SUCH DAMAGE.
|
|
//
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
#include "reed_solomon.h"
|
|
#include "stdlib.h"
|
|
#include "stdio.h"
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
int n_field;
|
|
int n_walsh;
|
|
int N_field;
|
|
int N_walsh;
|
|
int modulo;
|
|
|
|
// GF tables
|
|
symbol *log_table;
|
|
symbol *exp_table;
|
|
|
|
// Encoding/Decoding product
|
|
symbol *product;
|
|
symbol *product_enc;
|
|
|
|
// Used to compute product
|
|
symbol *log_walsh;
|
|
symbol *pos;
|
|
|
|
// extra memory needed for fast evaluation
|
|
symbol *coeff, *t_coeff;
|
|
symbol *inverse, *t_inverse;
|
|
|
|
void memory_init()
|
|
{
|
|
log_table = (symbol *)malloc(sizeof(symbol)*N_field);
|
|
exp_table = (symbol *)malloc(sizeof(symbol)*2*N_field);
|
|
|
|
product = (symbol *)malloc(sizeof(symbol)*N_walsh);
|
|
product_enc = (symbol *)malloc(sizeof(symbol)*N_walsh);
|
|
|
|
log_walsh = (symbol *)malloc(sizeof(symbol)*N_walsh);
|
|
pos = (symbol *)malloc(sizeof(symbol)*N_walsh);
|
|
|
|
inverse = t_inverse = (symbol *) malloc(sizeof(symbol) * N_walsh * n_field + 8);
|
|
coeff = t_coeff = (symbol *) malloc(sizeof(symbol) * N_walsh * n_field + 8);
|
|
|
|
// allign memory for SSE operation
|
|
while ((int)(inverse) & 0xf) inverse++;
|
|
while ((int)(coeff) & 0xf) coeff++;
|
|
}
|
|
|
|
void memory_clear()
|
|
{
|
|
free(log_table);
|
|
free(exp_table);
|
|
free(product);
|
|
free(product_enc);
|
|
free(log_walsh);
|
|
free(pos);
|
|
free(t_inverse);
|
|
free(t_coeff);
|
|
}
|
|
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
// compute the tables for the finite field operations
|
|
// exp_table is twice the needed size so we do not need to
|
|
// perform a modulo each time.
|
|
|
|
// list of some primitive polynomials
|
|
int primitive[] = {
|
|
0,1,6,
|
|
0,3,7,
|
|
0,2,3,4,8,
|
|
0,4,9,
|
|
0,3,10,
|
|
0,2,11,
|
|
0,1,4,6,12,
|
|
0,1,3,4,13,
|
|
0,1,11,12,14,
|
|
0,1,15,
|
|
0,1,3,12,16,
|
|
0,3,17,
|
|
0,3,20,
|
|
0,3,25,
|
|
100 // fallback
|
|
};
|
|
|
|
// will be set to the primitive poly of the field
|
|
// used only by the fast algorithm version
|
|
int *poly;
|
|
int weight;
|
|
|
|
void fill_table()
|
|
{
|
|
int i;
|
|
|
|
// put the primitive poly in mask
|
|
int temp=0;
|
|
int mask=0;
|
|
int index=0;
|
|
while (1) {
|
|
if (primitive[index]==0) {
|
|
mask=0;
|
|
temp=index;
|
|
}
|
|
mask ^= 1 << primitive[index];
|
|
if (primitive[index]>=n_field) break;
|
|
index++;
|
|
}
|
|
|
|
// used for the fast version only
|
|
poly = &primitive[temp];
|
|
weight = index-temp;
|
|
|
|
if (primitive[index]!=n_field) {
|
|
printf("primitive poly for GF %d not found !\n", n_field);
|
|
}
|
|
|
|
// clock the lfsr (multiply by X)
|
|
// fill the table
|
|
int state=1;
|
|
for (i=0; i<modulo; i++)
|
|
{
|
|
if (log_table[state]!=0) {
|
|
printf("polynomial is not primitive\n");
|
|
}
|
|
|
|
log_table[state]=i;
|
|
exp_table[i]=state;
|
|
exp_table[modulo+i]=state;
|
|
|
|
state <<=1;
|
|
if (state>>n_field) state^=mask;
|
|
if (state>>n_field!=0) exit(0);
|
|
}
|
|
|
|
// small change so we have bijection
|
|
log_table[0]=0;
|
|
log_table[1]=modulo;
|
|
exp_table[2*modulo]=1;
|
|
}
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
void walsh_mod(symbol *vect, int size);
|
|
void compute_product();
|
|
void walsh_field(symbol *vect);
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
void log_walsh_init()
|
|
{
|
|
int i;
|
|
for (i=0; i<N_walsh; i++)
|
|
log_walsh[i] = log_table[i];
|
|
log_walsh[1]=0;
|
|
walsh_mod(log_walsh, N_walsh);
|
|
|
|
// multiply it by the correct value,
|
|
// so computation are easier later
|
|
int shift = n_field - n_walsh;
|
|
for (i=0; i<N_walsh; i++)
|
|
log_walsh[i] = ((int) log_walsh[i] << shift) % modulo;
|
|
}
|
|
|
|
void fast_init()
|
|
{
|
|
int i,j;
|
|
|
|
// clear coeff
|
|
// It need to be done only once
|
|
for (i=0; i<N_walsh*n_field; i++)
|
|
coeff[i]=0;
|
|
|
|
// fill the inverse table
|
|
for (i=0; i<N_walsh; i++) {
|
|
int t = exp_table[(modulo-log_table[i]) % modulo];
|
|
if(i==0) t=0;
|
|
for (j=0; j<n_field; j++)
|
|
inverse[i*n_field + j] = (t>>j)&1;
|
|
}
|
|
|
|
// precompute the Walsh transforms of the inverse
|
|
walsh_field(inverse);
|
|
|
|
// everything is even if n_walsh=16
|
|
// divide by two so computation fit on 16 bits!
|
|
// otherwise, multiply by power of two, so
|
|
// shift factor is always 15.
|
|
if (n_walsh==16)
|
|
for (i=0; i<n_field*N_walsh; i++) inverse[i] >>= 1;
|
|
else {
|
|
for (i=0; i<n_field*N_walsh; i++) inverse[i] <<= (15-n_walsh);
|
|
}
|
|
}
|
|
|
|
|
|
// *******************************************************
|
|
// *******************************************************
|
|
|
|
void code_init(int nf, int nw)
|
|
{
|
|
if (nf>31 || nw > nf) {
|
|
printf("incorrect field parameters\n");
|
|
exit(0);
|
|
}
|
|
|
|
n_field = nf;
|
|
n_walsh = nw;
|
|
N_field = 1<<n_field;
|
|
N_walsh = 1<<n_walsh;
|
|
modulo = N_field-1;
|
|
|
|
memory_init();
|
|
fill_table();
|
|
log_walsh_init();
|
|
fast_init();
|
|
}
|
|
|
|
void code_clear()
|
|
{
|
|
memory_clear();
|
|
}
|
|
|
|
// *******************************************************
|
|
// *******************************************************
|
|
|
|
// for encoding, we can precompute the product once
|
|
void encode_init(int K)
|
|
{
|
|
int i;
|
|
|
|
// fill pos
|
|
for (i=0; i<N_walsh; i++) pos[i]=0;
|
|
for (i=0; i<K; i++) pos[i]=1;
|
|
|
|
// compute product
|
|
compute_product();
|
|
|
|
// save it in product_enc
|
|
// so it is not overwritten by any decoding
|
|
for (i=0; i<N_walsh; i++)
|
|
product_enc[i] = product[i];
|
|
}
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
// this can be optimized a bit...
|
|
// But it is not critical at all.
|
|
|
|
// Perform a Walsh transform and keep the coeffs mod (modulo)
|
|
// The transformation is involutive if n_walsh = n_field.
|
|
void walsh_mod(symbol *vect, int size)
|
|
{
|
|
int i,j,step;
|
|
step=1;
|
|
while (step<size) {
|
|
i=0;
|
|
while (i<size) {
|
|
j = step;
|
|
while (j--)
|
|
{
|
|
int t=vect[i];
|
|
int b=vect[i+step];
|
|
int a=t+b;
|
|
|
|
b = t + modulo - b;
|
|
|
|
// modulo 2^m - 1
|
|
// a = a_1 2^m + a_0
|
|
// and 2^m = 1 mod (2^m - 1)
|
|
// remark : result can be 2^m-1 = 0
|
|
|
|
// a = (a & modulo) + (a>>n_field);
|
|
// b = (b & modulo) + (b>>n_field);
|
|
|
|
// on GF 2^16
|
|
a = a + (a>>16);
|
|
b = b + (b>>16);
|
|
|
|
vect[i]=a;
|
|
vect[i+step]=b;
|
|
|
|
i++;
|
|
}
|
|
i+=step;
|
|
}
|
|
step<<=1;
|
|
}
|
|
}
|
|
|
|
// ************************************************
|
|
// ************************************************
|
|
|
|
// compute the product (3) of the paper
|
|
// return in product the logarithm of the product
|
|
void compute_product()
|
|
{
|
|
int i;
|
|
// initialisation
|
|
for (i=0; i<N_walsh; i++)
|
|
product[i]=pos[i];
|
|
|
|
// Walsh transform
|
|
walsh_mod(product,N_walsh);
|
|
|
|
// multiplication
|
|
// need long long here if n_field > 16
|
|
// otherwise int is ok.
|
|
for (i=0; i<N_walsh; i++) {
|
|
product[i] = ((unsigned int)product[i] * (unsigned int)log_walsh[i]) % modulo;
|
|
}
|
|
|
|
// inverse Walsh transform
|
|
// it is not involutive if n_field != n_walsh,
|
|
// But we multiplied the log_walsh by (1<<(n_field-n_walsh))
|
|
// so that it works anyway !
|
|
walsh_mod(product,N_walsh);
|
|
}
|
|
|
|
// Same but quadratic version
|
|
// Here only for debuging purpose
|
|
void compute_product_quadratic(int K, int *positions)
|
|
{
|
|
int i,j;
|
|
|
|
for (j=0; j<N_walsh; j++)
|
|
product[j] = log_table[j ^ positions[0]];
|
|
|
|
for (i=1; i<K; i++) {
|
|
for (j=0; j<N_walsh; j++) {
|
|
int t = product[j] + log_table[j ^ positions[i]];
|
|
if (t>modulo) t-= modulo;
|
|
product[j] = t;
|
|
}
|
|
}
|
|
}
|
|
|
|
// *******************************************************
|
|
// *******************************************************
|
|
|
|
// Perform a Walsh transform
|
|
// Nothing special this time, except that we do not
|
|
// have to worry about any overflow since in our use,
|
|
// only the bit n_walsh will be important
|
|
//
|
|
// special version that do n_field transforms at once.
|
|
// one of the limiting part in the complexity
|
|
|
|
void walsh_field_generic(symbol *vect)
|
|
{
|
|
int i,j;
|
|
int step=n_field;
|
|
int size=n_field * N_walsh;
|
|
while (step<size)
|
|
{
|
|
symbol *p=vect;
|
|
symbol *q=vect+step;
|
|
i=size/(2*step);
|
|
while (i--) {
|
|
j=step;
|
|
while (j--) {
|
|
symbol a = *p;
|
|
symbol b = *q;
|
|
*(p++) = a + b;
|
|
*(q++) = a - b;
|
|
}
|
|
p+=step;
|
|
q+=step;
|
|
}
|
|
step<<=1;
|
|
}
|
|
}
|
|
|
|
// In some critical part of the code, we gain a lot
|
|
// using a constant instead of n_field.
|
|
#define NFIELD 16
|
|
|
|
#ifdef SSE
|
|
|
|
// Perform a Walsh transform on 4 expanded field element
|
|
// each taking 16 x 16bits
|
|
void walsh_end(symbol *p) {
|
|
__asm__ __volatile__ (
|
|
"movdqa (%%esi), %%xmm0\n"
|
|
"movdqa 16(%%esi), %%xmm1\n"
|
|
"movdqa 32(%%esi), %%xmm2\n"
|
|
"movdqa 48(%%esi), %%xmm3\n"
|
|
"movdqa 64(%%esi), %%xmm4\n"
|
|
"movdqa 80(%%esi), %%xmm5\n"
|
|
"movdqa 96(%%esi), %%xmm6\n"
|
|
"movdqa 112(%%esi), %%xmm7\n"
|
|
|
|
"psubw %%xmm2, %%xmm0\n"
|
|
"psubw %%xmm3, %%xmm1\n"
|
|
"psubw %%xmm6, %%xmm4\n"
|
|
"psubw %%xmm7, %%xmm5\n"
|
|
|
|
"psllw $1,%%xmm2\n"
|
|
"psllw $1,%%xmm3\n"
|
|
"psllw $1,%%xmm6\n"
|
|
"psllw $1,%%xmm7\n"
|
|
|
|
"paddw %%xmm0, %%xmm2\n"
|
|
"paddw %%xmm1, %%xmm3\n"
|
|
"paddw %%xmm4, %%xmm6\n"
|
|
"paddw %%xmm5, %%xmm7\n"
|
|
|
|
"psubw %%xmm4, %%xmm0\n"
|
|
"psubw %%xmm5, %%xmm1\n"
|
|
"psubw %%xmm6, %%xmm2\n"
|
|
"psubw %%xmm7, %%xmm3\n"
|
|
|
|
"psllw $1,%%xmm4\n"
|
|
"psllw $1,%%xmm5\n"
|
|
"psllw $1,%%xmm6\n"
|
|
"psllw $1,%%xmm7\n"
|
|
|
|
"paddw %%xmm0, %%xmm4\n"
|
|
"paddw %%xmm1, %%xmm5\n"
|
|
"paddw %%xmm2, %%xmm6\n"
|
|
"paddw %%xmm3, %%xmm7\n"
|
|
|
|
"movdqa %%xmm6, (%%esi)\n"
|
|
"movdqa %%xmm7, 16(%%esi)\n"
|
|
"movdqa %%xmm4, 32(%%esi)\n"
|
|
"movdqa %%xmm5, 48(%%esi)\n"
|
|
"movdqa %%xmm2, 64(%%esi)\n"
|
|
"movdqa %%xmm3, 80(%%esi)\n"
|
|
"movdqa %%xmm0, 96(%%esi)\n"
|
|
"movdqa %%xmm1, 112(%%esi)\n"
|
|
: : "S"(p) : "memory");
|
|
}
|
|
|
|
// Perform a Walsh step (A,B) -> (A+B, A-B)
|
|
// where sizeof A = sizeof B = s*32 bits
|
|
void walsh_step(symbol *p, symbol *q, int s) {
|
|
while (s--) {
|
|
__asm__ __volatile__ (
|
|
"movdqa (%%esi), %%xmm0\n"
|
|
"movdqa 16(%%esi), %%xmm1\n"
|
|
"movdqa 32(%%esi), %%xmm2\n"
|
|
"movdqa 48(%%esi), %%xmm3\n"
|
|
|
|
"movdqa (%%edi), %%xmm4\n"
|
|
"movdqa 16(%%edi), %%xmm5\n"
|
|
"movdqa 32(%%edi), %%xmm6\n"
|
|
"movdqa 48(%%edi), %%xmm7\n"
|
|
|
|
"psubw %%xmm4, %%xmm0\n"
|
|
"psubw %%xmm5, %%xmm1\n"
|
|
"psubw %%xmm6, %%xmm2\n"
|
|
"psubw %%xmm7, %%xmm3\n"
|
|
|
|
"psllw $1,%%xmm4\n"
|
|
"psllw $1,%%xmm5\n"
|
|
"psllw $1,%%xmm6\n"
|
|
"psllw $1,%%xmm7\n"
|
|
|
|
"paddw %%xmm0, %%xmm4\n"
|
|
"paddw %%xmm1, %%xmm5\n"
|
|
"paddw %%xmm2, %%xmm6\n"
|
|
"paddw %%xmm3, %%xmm7\n"
|
|
|
|
"movdqa %%xmm4, (%%esi)\n"
|
|
"movdqa %%xmm5, 16(%%esi)\n"
|
|
"movdqa %%xmm6, 32(%%esi)\n"
|
|
"movdqa %%xmm7, 48(%%esi)\n"
|
|
|
|
"movdqa %%xmm0, (%%edi)\n"
|
|
"movdqa %%xmm1, 16(%%edi)\n"
|
|
"movdqa %%xmm2, 32(%%edi)\n"
|
|
"movdqa %%xmm3, 48(%%edi)\n"
|
|
: : "S"(p), "D"(q) : "memory");
|
|
|
|
p += 32;
|
|
q += 32;
|
|
}
|
|
}
|
|
|
|
// Perform a Walsh step (A,B) -> (A+B, A-B)
|
|
// where sizeof A = sizeof B = s*16 bits
|
|
void walsh_step_simple(symbol *p, symbol *q, int s) {
|
|
while (s--) {
|
|
__asm__ __volatile__ (
|
|
"movdqa (%%esi), %%xmm0\n"
|
|
"movdqa 16(%%esi), %%xmm1\n"
|
|
|
|
"movdqa (%%edi), %%xmm4\n"
|
|
"movdqa 16(%%edi), %%xmm5\n"
|
|
|
|
"psubw %%xmm4, %%xmm0\n"
|
|
"psubw %%xmm5, %%xmm1\n"
|
|
|
|
"psllw $1,%%xmm4\n"
|
|
"psllw $1,%%xmm5\n"
|
|
|
|
"paddw %%xmm0, %%xmm4\n"
|
|
"paddw %%xmm1, %%xmm5\n"
|
|
|
|
"movdqa %%xmm4, (%%esi)\n"
|
|
"movdqa %%xmm5, 16(%%esi)\n"
|
|
|
|
"movdqa %%xmm0, (%%edi)\n"
|
|
"movdqa %%xmm1, 16(%%edi)\n"
|
|
: : "S"(p), "D"(q) : "memory");
|
|
|
|
p += 16;
|
|
q += 16;
|
|
}
|
|
}
|
|
|
|
// iterative DFS
|
|
void walsh_field_iter(symbol *p, int size)
|
|
{
|
|
int i;
|
|
size/=4;
|
|
for (i=0; i<size; i++) {
|
|
// start of A
|
|
symbol *q = p;
|
|
|
|
// size of the Walsh transform in 16bits
|
|
int taille=2;
|
|
|
|
// first Walsh transform
|
|
// alternative with size/=2: walsh_step_simple(q, q + NFIELD*taille, taille);
|
|
walsh_end(q);
|
|
|
|
// More step depending on the trailing zero in the binary
|
|
// representation of i+1
|
|
int t=i+1;
|
|
while ((t&1)==0) {
|
|
taille <<= 1;
|
|
q -= NFIELD*taille;
|
|
walsh_step(q, q + NFIELD*taille, taille/2);
|
|
t >>= 1;
|
|
};
|
|
// next bloc
|
|
p += 4*NFIELD;
|
|
}
|
|
}
|
|
|
|
// Recursive version
|
|
void walsh_field_rec(symbol *p, int size)
|
|
{
|
|
// if (size==2) return walsh_step_simple(p,p+n_field,1);
|
|
if (size==4) return walsh_end(p);
|
|
|
|
size /= 2;
|
|
symbol *q = p + size*NFIELD;
|
|
walsh_field_rec(p,size);
|
|
walsh_field_rec(q,size);
|
|
|
|
// walsh_step_simple(p,q,size);
|
|
walsh_step(p,q,size/2);
|
|
}
|
|
|
|
void walsh_field(symbol *vect)
|
|
{
|
|
return walsh_field_iter(vect, N_walsh);
|
|
return walsh_field_rec(vect, N_walsh);
|
|
int i,j;
|
|
int step=n_field;
|
|
int size=n_field * N_walsh;
|
|
while (step<size)
|
|
{
|
|
symbol *p=vect;
|
|
symbol *q=vect+step;
|
|
i=size/(2*step);
|
|
while (i--) {
|
|
j=step/16;
|
|
while (j--) {
|
|
__asm__ __volatile__ (
|
|
"movdqa (%%esi), %%xmm0\n"
|
|
"movdqa 16(%%esi), %%xmm1\n"
|
|
|
|
"movdqa (%%edi), %%xmm2\n"
|
|
"movdqa 16(%%edi), %%xmm3\n"
|
|
|
|
"movdqa %%xmm0, %%xmm4\n"
|
|
"movdqa %%xmm1, %%xmm5\n"
|
|
|
|
"paddw %%xmm2, %%xmm0\n"
|
|
"paddw %%xmm3, %%xmm1\n"
|
|
|
|
"psubw %%xmm2, %%xmm4\n"
|
|
"psubw %%xmm3, %%xmm5\n"
|
|
|
|
"movdqa %%xmm0, (%%esi)\n"
|
|
"movdqa %%xmm1, 16(%%esi)\n"
|
|
|
|
"movdqa %%xmm4, (%%edi)\n"
|
|
"movdqa %%xmm5, 16(%%edi)\n"
|
|
: : "S"(p), "D"(q) : "memory");
|
|
|
|
p += 16;
|
|
q += 16;
|
|
}
|
|
p+=step;
|
|
q+=step;
|
|
}
|
|
step<<=1;
|
|
}
|
|
}
|
|
|
|
|
|
#else
|
|
|
|
void walsh_field(symbol *vect) {
|
|
walsh_field_generic(vect);
|
|
}
|
|
|
|
#endif
|
|
|
|
// *******************************************************
|
|
// *******************************************************
|
|
|
|
// compute field product of the vector coeff and inverse
|
|
// for each [n_field] consecutive symbols of [inverse] and [coeff]
|
|
// multiply them as if they were element of the field in the
|
|
// standard basis.
|
|
//
|
|
// the difference is instead of coefficients in GF_2,
|
|
// the coefficients are in Z ...
|
|
//
|
|
// The other limiting part in the complexity (with walsh_field).
|
|
// The multiplication is quadratic in n_field
|
|
// and is performed N_walsh time.
|
|
|
|
|
|
// work with any field using n_field but slow
|
|
// look at the one on GF(2^16) for a fast version
|
|
void field_product_generic()
|
|
{
|
|
int i,j,k;
|
|
symbol s[64];
|
|
|
|
symbol *inv = inverse;
|
|
|
|
for (k=0; k<N_walsh; k++)
|
|
{
|
|
symbol *c = &coeff[k*n_field];
|
|
symbol *base = &s[n_field];
|
|
|
|
// put [c] in [s]
|
|
for (i=0; i<n_field; i++) {
|
|
base[i] = c[i];
|
|
c[i]=0;
|
|
s[i]=0;
|
|
}
|
|
|
|
// product
|
|
for (i=0; i<n_field; i++)
|
|
{
|
|
// add inv[i]*[s] to [c]
|
|
symbol t = *(inv++);
|
|
for (j=0; j<n_field; j++) {
|
|
c[j] += t * base[j];
|
|
}
|
|
// multiply [s] by x
|
|
base--;
|
|
t = base[n_field];
|
|
for (j=0; j<weight; j++) {
|
|
base[poly[j]] += t;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef SSE
|
|
|
|
inline ssse_1()
|
|
{
|
|
__asm__ __volatile__ (
|
|
// multiply [base] by x
|
|
|
|
// left shift of xmm3,xmm2
|
|
// word out is in xmm7
|
|
"movapd %%xmm2, %%xmm6\n"
|
|
"movapd %%xmm3, %%xmm7\n"
|
|
"pslldq $2,%%xmm2\n"
|
|
"pslldq $2,%%xmm3\n"
|
|
"psrldq $14,%%xmm6\n"
|
|
"psrldq $14,%%xmm7\n"
|
|
"pxor %%xmm6,%%xmm3\n"
|
|
|
|
// expand xmm7 to pos 0, 1, 3
|
|
// add it to xmm2
|
|
"pshuflw $16,%%xmm7,%%xmm6\n"
|
|
"paddw %%xmm6,%%xmm2\n"
|
|
|
|
// put value in pos 4 (8+4)
|
|
// add it to xmm3
|
|
// "psrldq $6,%%xmm7\n"
|
|
"pslldq $8,%%xmm7\n"
|
|
"paddw %%xmm7,%%xmm3\n"
|
|
|
|
// next inv ...
|
|
"psrldq $2, %%xmm4\n"
|
|
|
|
// copy 8 times low word in xmm4
|
|
"pshuflw $0, %%xmm4, %%xmm6\n"
|
|
"pshufd $0, %%xmm6, %%xmm6\n"
|
|
"movapd %%xmm6, %%xmm7\n"
|
|
|
|
// multiply constant by base
|
|
"pmullw %%xmm2, %%xmm6\n"
|
|
"pmullw %%xmm3, %%xmm7\n"
|
|
|
|
// add it to [c]
|
|
"paddw %%xmm6, %%xmm0\n"
|
|
"paddw %%xmm7, %%xmm1\n"
|
|
: );
|
|
}
|
|
|
|
inline ssse_2()
|
|
{
|
|
__asm__ __volatile__ (
|
|
// multiply [base] by x
|
|
|
|
// left shift of xmm3,xmm2
|
|
// word out is in xmm7
|
|
"movapd %%xmm2, %%xmm6\n"
|
|
"movapd %%xmm3, %%xmm7\n"
|
|
"pslldq $2,%%xmm2\n"
|
|
"pslldq $2,%%xmm3\n"
|
|
"psrldq $14,%%xmm6\n"
|
|
"psrldq $14,%%xmm7\n"
|
|
"pxor %%xmm6,%%xmm3\n"
|
|
|
|
// expand xmm7 to pos 0, 1, 3
|
|
// add it to xmm2
|
|
"pshuflw $16,%%xmm7,%%xmm6\n"
|
|
"paddw %%xmm6,%%xmm2\n"
|
|
|
|
// put value in pos 4 (8+4=12)
|
|
// add it to xmm3
|
|
// "psrldq $6,%%xmm7\n"
|
|
"pslldq $8,%%xmm7\n"
|
|
"paddw %%xmm7,%%xmm3\n"
|
|
|
|
// copy 8 times low word in xmm5
|
|
"pshuflw $0, %%xmm5, %%xmm6\n"
|
|
"pshufd $0, %%xmm6, %%xmm6\n"
|
|
"movapd %%xmm6, %%xmm7\n"
|
|
|
|
// next inv ...
|
|
"psrldq $2, %%xmm5\n"
|
|
|
|
// multiply constant by base
|
|
"pmullw %%xmm2, %%xmm6\n"
|
|
"pmullw %%xmm3, %%xmm7\n"
|
|
|
|
// add it to [c]
|
|
"paddw %%xmm6, %%xmm0\n"
|
|
"paddw %%xmm7, %%xmm1\n"
|
|
: );
|
|
}
|
|
|
|
inline sse_loop()
|
|
{
|
|
int i;
|
|
for (i=1; i<NFIELD; i++)
|
|
{
|
|
__asm__ __volatile__ (
|
|
// multiply [base] by x
|
|
|
|
// left shift of xmm3,xmm2
|
|
// word out is in xmm7
|
|
"movapd %%xmm2, %%xmm6\n"
|
|
"movapd %%xmm3, %%xmm7\n"
|
|
"pslldq $2,%%xmm2\n"
|
|
"pslldq $2,%%xmm3\n"
|
|
"psrldq $14,%%xmm6\n"
|
|
"psrldq $14,%%xmm7\n"
|
|
"pxor %%xmm6,%%xmm3\n"
|
|
|
|
// expand xmm7 to pos 0, 1, 3
|
|
// add it to xmm2
|
|
"pshuflw $16,%%xmm7,%%xmm6\n"
|
|
"paddw %%xmm6,%%xmm2\n"
|
|
|
|
// put value in pos 4 (8+4)
|
|
// add it to xmm3
|
|
// "psrldq $6,%%xmm7\n"
|
|
"pslldq $8,%%xmm7\n"
|
|
"paddw %%xmm7,%%xmm3\n"
|
|
|
|
// next inv ...
|
|
"psrldq $2, %%xmm4\n"
|
|
"movapd %%xmm5, %%xmm6\n"
|
|
"pslldq $14, %%xmm6\n"
|
|
"pxor %%xmm6, %%xmm4\n"
|
|
"psrldq $2, %%xmm5\n"
|
|
|
|
// copy 8 times low word in xmm4
|
|
"pshuflw $0, %%xmm4, %%xmm6\n"
|
|
"pshufd $0, %%xmm6, %%xmm6\n"
|
|
"movapd %%xmm6, %%xmm7\n"
|
|
|
|
// multiply constant by base
|
|
"pmullw %%xmm2, %%xmm6\n"
|
|
"pmullw %%xmm3, %%xmm7\n"
|
|
|
|
// add it to [c]
|
|
"paddw %%xmm6, %%xmm0\n"
|
|
"paddw %%xmm7, %%xmm1\n"
|
|
: );
|
|
}
|
|
}
|
|
|
|
void field_product_16()
|
|
{
|
|
int i,k;
|
|
|
|
symbol *inv = inverse;
|
|
symbol *c = coeff;
|
|
|
|
for (k=0; k<N_walsh; k++)
|
|
{
|
|
__asm__ __volatile__ (
|
|
// load c into xmm0,xmm1
|
|
"movdqa (%%edi), %%xmm0\n"
|
|
"movdqa 16(%%edi), %%xmm1\n"
|
|
|
|
// init base into xmm2,xmm3
|
|
"movdqa %%xmm0, %%xmm2\n"
|
|
"movdqa %%xmm1, %%xmm3\n"
|
|
|
|
// load inv into xmm4,xmm5
|
|
"movdqa (%%esi), %%xmm4\n"
|
|
"movdqa 16(%%esi), %%xmm5\n"
|
|
|
|
// make 8 copy of inv[0]
|
|
"pshuflw $0, %%xmm4, %%xmm6\n"
|
|
"pshufd $0, %%xmm6, %%xmm6\n"
|
|
|
|
// multiply [c] by constant
|
|
"pmullw %%xmm6, %%xmm0\n"
|
|
"pmullw %%xmm6, %%xmm1\n"
|
|
|
|
: : "S"(inv), "D"(c) : "memory");
|
|
|
|
// product with loop
|
|
// sse_loop();
|
|
|
|
// product without loop
|
|
// the last one can be simplified a bit,but really small gain.
|
|
ssse_1();ssse_1();ssse_1();ssse_1();ssse_1();ssse_1();ssse_1();
|
|
ssse_2();ssse_2();ssse_2();ssse_2();ssse_2();ssse_2();ssse_2();ssse_2();
|
|
|
|
// put result back in [c]
|
|
__asm__ __volatile__ (
|
|
"movdqa %%xmm0, (%%edi)\n"
|
|
"movdqa %%xmm1, 16(%%edi)\n"
|
|
: : "D"(c) : "memory");
|
|
|
|
// next c
|
|
c += NFIELD;
|
|
inv += NFIELD;
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
void field_product_16()
|
|
{
|
|
int i,j,k;
|
|
symbol s[32];
|
|
|
|
symbol *inv = inverse;
|
|
symbol *c = coeff;
|
|
symbol t;
|
|
|
|
for (k=0; k<N_walsh; k++)
|
|
{
|
|
symbol *base = s + NFIELD;
|
|
|
|
// put [c] in [base]
|
|
// multiply [c] by inv[0]
|
|
t = *(inv++);
|
|
for (i=0; i<NFIELD; i++) {
|
|
base[i] = c[i];
|
|
c[i] *=t;
|
|
}
|
|
|
|
// product
|
|
for (i=1; i<NFIELD; i++)
|
|
{
|
|
// multiply [base] by x
|
|
base--;
|
|
t = base[0] = base[NFIELD];
|
|
base[1] += t;
|
|
base[3] += t;
|
|
base[12] += t;
|
|
|
|
// add inv[i]*[base] to [c]
|
|
t = *(inv++);
|
|
for (j=0; j<NFIELD; j++) {
|
|
c[j] += t * base[j];
|
|
}
|
|
}
|
|
|
|
// next c
|
|
c += NFIELD;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
void (* field_product)() = field_product_16;
|
|
|
|
|
|
// *******************************************************
|
|
// *******************************************************
|
|
|
|
// In the case where k and n-k are smaller than or equal to 2^i
|
|
// We can gain a factor 2 for encoding, by doing the convolution on
|
|
// length 2^i rather than 2^{i+1} ...
|
|
// Not implemented here.
|
|
// Require to change the parity positions ...
|
|
|
|
void fast_encode(int N, int K, int S, symbol *data, symbol *parity)
|
|
{
|
|
int i,j,k;
|
|
|
|
// no need to clear coeff afterwards
|
|
// because after the second walsh transform, the
|
|
// least significant bits are 0 anyway...
|
|
// memset(coeff, 0, N_walsh*n_field*sizeof(symbol));
|
|
|
|
// encode codeword by codeword
|
|
for (j=0; j<S; j++)
|
|
{
|
|
// put received symbols in places
|
|
symbol *temp = coeff;
|
|
for (i=0; i<K; i++)
|
|
{
|
|
int t=data[i*S+j];
|
|
if (t) t=exp_table[log_table[t] + modulo - product_enc[i]];
|
|
|
|
for (k=0; k<NFIELD; k++) {
|
|
*(temp++) = t;
|
|
t >>=1;
|
|
// we are interested only in the parity
|
|
// so no need to add &1;
|
|
}
|
|
}
|
|
|
|
// compute the Walsh transforms of the coefficient
|
|
walsh_field(coeff);
|
|
|
|
// multiply with precomputed Walsh transform of the inverse
|
|
field_product();
|
|
|
|
// walsh transform again
|
|
walsh_field(coeff);
|
|
|
|
// put decoded symbol in place
|
|
// final multiplication by the product
|
|
temp = coeff + K*n_field;
|
|
for (i=0; i<N-K; i++) {
|
|
// reconstruct the field element from
|
|
// its shift coordinates
|
|
int t=*(temp++);
|
|
for (k=1; k<NFIELD; k++) {
|
|
t >>= 1;
|
|
t ^= *(temp++);
|
|
}
|
|
|
|
if (t) t = exp_table[log_table[t] + product_enc[K+i]];
|
|
parity[i*S + j] = t;
|
|
}
|
|
}
|
|
}
|
|
|
|
void fast_decode(int K, int S, int *positions, symbol *data, symbol *packets)
|
|
{
|
|
int i,j,k;
|
|
|
|
// copy the systematic pieces in place
|
|
for (i=0; i<K; i++)
|
|
if (positions[i]<K)
|
|
for (j=0; j<S; j++)
|
|
packets[positions[i]*S + j] = data[i*S +j];
|
|
|
|
// precomputation
|
|
for (i=0; i<N_walsh; i++) pos[i]=0;
|
|
for (i=0; i<K; i++) pos[positions[i]]=1;
|
|
compute_product();
|
|
|
|
// decode codeword by codeword
|
|
for (j=0; j<S; j++)
|
|
{
|
|
// put received symbols in places
|
|
for (i=0; i<K; i++)
|
|
{
|
|
symbol *temp = coeff + positions[i]*NFIELD;
|
|
int t=data[i*S+j];
|
|
if (t)
|
|
t=exp_table[log_table[t] + modulo - product[positions[i]]];
|
|
|
|
for (k=0; k<NFIELD; k++) {
|
|
*(temp++) = t;
|
|
t >>=1;
|
|
// we are interested only in the parity
|
|
// so no need to add &1;
|
|
}
|
|
}
|
|
|
|
// compute the Walsh transforms of the coefficient
|
|
walsh_field(coeff);
|
|
|
|
// multiply with precomputed Walsh transform of the inverse
|
|
field_product();
|
|
|
|
// walsh transform again
|
|
walsh_field(coeff);
|
|
|
|
// put decoded symbol in place
|
|
// final multiplication by the product
|
|
for (i=0; i<K; i++) if (pos[i]==0)
|
|
{
|
|
symbol *temp = coeff + i*NFIELD;
|
|
|
|
// reconstruct the field element from
|
|
// its shift coordinates
|
|
// only the 15 bits of *temp is non-zero
|
|
int t=*(temp++);
|
|
for (k=1; k<NFIELD; k++) {
|
|
t >>= 1;
|
|
t ^= *(temp++);
|
|
}
|
|
|
|
if (t) t = exp_table[log_table[t] + product[i]];
|
|
packets[i*S + j] = t;
|
|
}
|
|
}
|
|
}
|
|
|