optimized

2025-12-16 18:14:25 +00:00 · 2008-11-05 21:01:12 +00:00
parent bea1e66230
commit 12c3a0721b
1 changed files with 145 additions and 264 deletions
--- a/ALACDotNet/ALACDotNet.cs
+++ b/ALACDotNet/ALACDotNet.cs
@@ -286,12 +286,12 @@ namespace ALACDotNet
 		}

 		/* supports reading 1 to 16 bits, in big endian format */
-		private unsafe uint peekbits_16(byte* buff, int pos, int bits)
+		private unsafe uint peekbits_9(byte* buff, int pos)
 		{
-			uint result = (((uint)buff[pos]) << 16) | (((uint)buff[pos + 1]) << 8) | ((uint)buff[pos + 2]);
+			uint result = (((uint)buff[pos]) << 8) | (((uint)buff[pos + 1]));
 			result <<= _bitaccumulator;
-			result &= 0x00ffffff;
-			result >>= 24 - bits;
+			result &= 0x0000ffff;
+			result >>= 7;
 			return result;
 		}

@@ -363,35 +363,34 @@ namespace ALACDotNet
 				_bitaccumulator *= -1;
 		}

-		private static int count_leading_zeros(uint input, int first)
+		private static int count_leading_zeroes(uint input)
 		{
 			int zeroes = 1;
-			int check = first;
-			while (check > 0)
-			{
-				uint shifted_input = input >> check;
-				if (shifted_input == 0) 
-					zeroes += check; 
-				else
-					input = shifted_input;
-				check >>= 1;
-			}
-			return zeroes - (int)input;
-		}
-
-		private static int count_leading_zeros(uint input)
-		{
-			int zeroes = 1;
-			int check = 16;
-			while (check > 0)
-			{
-				uint shifted_input = input >> check;
-				if (shifted_input == 0)
-					zeroes += check;
-				else
-					input = shifted_input;
-				check >>= 1;
-			}
+			uint shifted_input = input >> 16;
+			if (shifted_input == 0)
+				zeroes += 16;
+			else
+				input = shifted_input;
+			shifted_input = input >> 8;
+			if (shifted_input == 0)
+				zeroes += 8;
+			else
+				input = shifted_input;
+			shifted_input = input >> 4;
+			if (shifted_input == 0)
+				zeroes += 4;
+			else
+				input = shifted_input;
+			shifted_input = input >> 2;
+			if (shifted_input == 0)
+				zeroes += 2;
+			else
+				input = shifted_input;
+			shifted_input = input >> 1;
+			if (shifted_input == 0)
+				zeroes ++;
+			else
+				input = shifted_input;
 			return zeroes - (int)input;
 		}

@@ -405,9 +404,11 @@ namespace ALACDotNet
 				pr->predictor_coef_num = (int)readbits(_framesBuffer, ref pos, 5);

 				/* read the predictor table */
-				for (int i = 0; i < pr->predictor_coef_num; i++)
+				pr->predictor_coef_table_sum = 0;
+				for (int i = pr->predictor_coef_num - 1; i >= 0; i--)
 				{
 					pr->predictor_coef_table[i] = (short)readbits(_framesBuffer, ref pos, 16);
+					pr->predictor_coef_table_sum += pr->predictor_coef_table[i];
 				}
 			}
 		}
@@ -415,14 +416,30 @@ namespace ALACDotNet
 		private unsafe int decode_scalar(byte * buff, ref int pos, int k, int limit, int readsamplesize)
 		{
 			int x = 0;
-			//while (x <= 8 && readbit(_framesBuffer, ref pos) != 0)
-			//    x++;
-			uint next = peekbits_16(buff, pos, 9);
-			for (x = 0; (next & 0x100) != 0; x++)
+			uint next = peekbits_9(buff, pos);
+			if (next == 0x1ff) /* RICE THRESHOLD 9 bits */
+			{
+				skipbits(ref pos, 9);
+				return (int)readbits(buff, ref pos, readsamplesize);
+			}
+			if ((next & 0x1e0) == 0x1e0)
+			{
+				x += 4;
+				next <<= 4;
+			}
+			if ((next & 0x180) == 0x180)
+			{
+				x += 2;
+				next <<= 2;
+			}
+			if ((next & 0x100) == 0x100)
+			{
+				x += 1;
 				next <<= 1;
-			skipbits(ref pos, x >= 9 ? 9 : x + 1);
-			if (x > 8) /* RICE THRESHOLD */
-				return (int) readbits(buff, ref pos, readsamplesize);
+			}
+			x += (int) (next & 0x100) >> 8;
+			//x = count_leading_zeroes((~next) & 0x1ff) - 23;
+			skipbits(ref pos, x + 1);
 			if (k >= limit)
 				k = limit;
 			if (k == 1)
@@ -448,31 +465,21 @@ namespace ALACDotNet

 				for (int output_count = 0; output_count < output_size; output_count++)
 				{
-					int x = 0;
-					int x_modified;
-					int final_val;
-
-					x = decode_scalar(buff, ref pos, 31 - count_leading_zeros((history >> 9) + 3), rice_kmodifier, readsamplesize);
-
-					x_modified = sign_modifier + x;
-					final_val = (x_modified + 1) / 2;
-					if ((x_modified & 1) != 0) final_val *= -1;
-
-					output_buffer[output_count] = final_val;
-
+					int x = sign_modifier + decode_scalar(buff, ref pos, 31 - count_leading_zeroes((history >> 9) + 3), rice_kmodifier, readsamplesize);
+					output_buffer[output_count] = ((x + 1) >> 1) * (1 - ((x & 1) << 1));
 					sign_modifier = 0;

 					/* now update the history */
-					history = (uint)(history + (x_modified * rice_historymult)
+					history = (uint)(history + (x * rice_historymult)
 							 - ((history * rice_historymult) >> 9));

-					if (x_modified > 0xffff)
+					if (x > 0xffff)
 						history = 0xffff;

 					/* special case: there may be compressed blocks of 0 */
 					if ((history < 128) && (output_count + 1 < output_size))
 					{
-						int k = 7 - (31 - count_leading_zeros(history)) + (((int)history + 16) >> 6);
+						int k = 7 - (31 - count_leading_zeroes(history)) + (((int)history + 16) >> 6);
 						int block_size = decode_scalar(buff, ref pos, k, rice_kmodifier, 16);
 						if (block_size > 0)
 						{
@@ -495,153 +502,102 @@ namespace ALACDotNet
 			return (val << (32 - bits)) >> (32 - bits);
 		}

-		private static int sign_only(int v)
+		private static short sign_only(int v)
 		{
-			return v == 0 ? 0 : v > 0 ? 1 : -1;
+			return (short)(1 - ((v >> 30) & 2));
 		}

 		private unsafe void predictor_decompress_fir_adapt(uint output_size, ref predictor_t predictor_info, ref int[] error_buffer, ref int[] buffer_out, int readsamplesize)
 		{
 			int i;

-			/* first sample always copies */
-			buffer_out[0] = error_buffer[0];
-
-			if (predictor_info.predictor_coef_num == 0)
-			{
-				if (output_size <= 1)
-					return;
-				for (i = 1; i < output_size; i++)
-					buffer_out[i] = error_buffer[i];
-				return;
-			}
-
-			if (predictor_info.predictor_coef_num == 0x1f)
-			{ /* 11111 - max value of predictor_coef_num */
-				/* second-best case scenario for fir decompression,
-				 * error describes a small difference from the previous sample only
-				 */
-				if (output_size <= 1)
-					return;
-				for (i = 0; i < output_size - 1; i++)
-				{
-					int prev_value;
-					int error_value;
-
-					prev_value = buffer_out[i];
-					error_value = error_buffer[i + 1];
-					buffer_out[i + 1] =
-						extend_sign32((prev_value + error_value), readsamplesize);
-				}
-				return;
-			}
-
-			/* read warm-up samples */
-			if (predictor_info.predictor_coef_num > 0)
-				for (i = 0; i < predictor_info.predictor_coef_num; i++)
-				{
-					int val;
-
-					val = buffer_out[i] + error_buffer[i + 1];
-					val = extend_sign32(val, readsamplesize);
-					buffer_out[i + 1] = val;
-				}
-
-			//#if 0
-			//            /* 4 and 8 are very common cases (the only ones i've seen). these
-			//             * should be unrolled and optimized
-			//             */
-			//            if (predictor_coef_num == 4) {
-			//                /* FIXME: optimized general case */
-			//                return;
-			//            }
-
-			//            if (predictor_coef_table == 8) {
-			//                /* FIXME: optimized general case */
-			//                return;
-			//            }
-			//#endif
-
-			/* general case */
-			if (predictor_info.predictor_coef_num > 0)
 			fixed (predictor_t* pr = &predictor_info)
-			fixed (int * buf_out = &buffer_out[0], buf_err = &error_buffer[0])
+			fixed (int* buf_out = &buffer_out[0], buf_err = &error_buffer[0])
 			{
-				int pos = 0;
+				if (pr->predictor_coef_num == 0)
+				{
+					for (i = 0; i < output_size; i++)
+						buf_out[i] = buf_err[i];
+					return;
+				}

-				for (i = (int) predictor_info.predictor_coef_num + 1; i < output_size; i++)
+				int sample = 0;
+
+				if (pr->predictor_coef_num == 0x1f)
+				{ /* 11111 - max value of predictor_coef_num */
+					/* second-best case scenario for fir decompression,
+					 * error describes a small difference from the previous sample only
+					 */
+					if (output_size <= 1)
+						return;
+					for (i = 0; i < output_size; i++)
+					{
+						sample += buf_err[i];
+						buf_out[i] = sample;
+					}
+					return;
+				}
+
+				if (output_size <= predictor_info.predictor_coef_num || pr->predictor_coef_num < 0)
+					throw new Exception("invalid output size");
+
+				/* read warm-up samples */
+				for (i = 0; i <= predictor_info.predictor_coef_num; i++)
+				{
+					sample += buf_err[i];
+					buf_out[i] = sample;
+				}
+
+				/* general case */
+				int* buf_pos = buf_out;
+				int predictor_coef_table_sum = pr->predictor_coef_table_sum;
+				for (i = (int)pr->predictor_coef_num + 1; i < output_size; i++)
 				{
 					int j;
 					int sum = 0;
 					int outval;
 					int error_val = buf_err[i];
+					int sample_val = *(buf_pos++);

-					for (j = 0; j < predictor_info.predictor_coef_num; j++)
+					for (j = 0; j < pr->predictor_coef_num; j++)
+						sum += buf_pos[j] * pr->predictor_coef_table[j];
+					sum -= predictor_coef_table_sum * sample_val;
+					outval = (1 << (pr->prediction_quantitization - 1)) + sum;
+					outval >>= pr->prediction_quantitization;
+					outval += sample_val + error_val;
+
+					buf_pos[pr->predictor_coef_num] = outval;
+
+					if (error_val != 0)
 					{
-						sum += (buf_out[pos + predictor_info.predictor_coef_num - j] - buf_out[pos]) *
-							   pr->predictor_coef_table[j];
-					}
-
-					outval = (1 << (predictor_info.prediction_quantitization - 1)) + sum;
-					outval = outval >> predictor_info.prediction_quantitization;
-					outval = outval + buf_out[pos] + error_val;
-					outval = extend_sign32(outval, readsamplesize);
-
-					buf_out[pos + pr->predictor_coef_num + 1] = outval;
-
-					if (error_val > 0)
-					{
-						int predictor_num = pr->predictor_coef_num - 1;
-
-						while (predictor_num >= 0 && error_val > 0)
+						short error_sign = sign_only(error_val);
+						for (j = 0; j < pr->predictor_coef_num; j++)
 						{
-							int val = buf_out[pos] - buf_out[pos + predictor_info.predictor_coef_num - predictor_num];
-							short sign = (short) sign_only(val);
-
-							pr->predictor_coef_table[predictor_num] -= sign;
-
-							val *= sign; /* absolute value */
-
-							error_val -= ((val >> predictor_info.prediction_quantitization) *
-										  (predictor_info.predictor_coef_num - predictor_num));
-
-							predictor_num--;
+							int val = sample_val - buf_pos[j];
+							if (val == 0) 
+								continue;
+							short sign = sign_only(error_sign * val);
+							pr->predictor_coef_table[j] -= sign;
+							predictor_coef_table_sum -= sign;
+							val *= sign; /* absolute value with same sign as error */
+							error_val -= (val >> pr->prediction_quantitization) * (j + 1);
+							if (error_val * error_sign <= 0)
+								break;
 						}
 					}
-					else if (error_val < 0)
-					{
-						int predictor_num = predictor_info.predictor_coef_num - 1;
-
-						while (predictor_num >= 0 && error_val < 0)
-						{
-							int val = buf_out[pos] - buf_out[pos + predictor_info.predictor_coef_num - predictor_num];
-							short sign = (short) sign_only(- val);
-
-							pr->predictor_coef_table[predictor_num] -= sign;
-
-							val *= sign; /* neg value */
-
-							error_val -= ((val >> predictor_info.prediction_quantitization) *
-										  (predictor_info.predictor_coef_num - predictor_num));
-
-							predictor_num--;
-						}
-					}
-
-					pos++;
 				}
+				pr->predictor_coef_table_sum = predictor_coef_table_sum;
 			}
 		}

-		private unsafe void deinterlace_16(uint numsamples, byte interlacing_shift, byte interlacing_leftweight)
+		private unsafe void deinterlace(uint numsamples, byte interlacing_shift, byte interlacing_leftweight)
 		{
-			int i;
-
 			if (numsamples <= 0)
 				return;

+			int i;
 			fixed (int* buf_a = &_outputsamples_buffer_a[0], buf_b = _outputsamples_buffer_b)
-			fixed (byte * buf_s = &_samplesBuffer[0])
+			fixed (byte* buf_s = &_samplesBuffer[0])
 			{
 				/* weighted interlacing */
 				if (interlacing_leftweight != 0)
@@ -735,16 +691,11 @@ namespace ALACDotNet
 			if (_bitsPerSample != 16)
 				throw new Exception("Not 16 bit");

-			deinterlace_16(outputSamples, interlacing_shift, interlacing_leftweight);
+			deinterlace(outputSamples, interlacing_shift, interlacing_leftweight);

 			_samplesInBuffer = outputSamples;
 		}

-		private void skip_chunk(UInt32 chunk_len)
-		{
-			stream_skip(chunk_len - 8); /* FIXME WRONG */
-		}
-
 		/* 'mvhd' movie header atom */
 		private void qtmovie_read_chunk_mvhd(string path, uint length, object param)
 		{
@@ -794,11 +745,11 @@ namespace ALACDotNet
 			}
 		}

-		private void read_chunk_stsd(UInt32 chunk_len)
+		private void qtmovie_read_chunk_stsd(string path, uint length, object param)
 		{
 			uint i;
 			UInt32 numentries;
-			UInt32 size_remaining = chunk_len - 8; /* FIXME WRONG */
+			UInt32 size_remaining = length;

 			/* version */
 			stream_read_uint8();
@@ -905,11 +856,11 @@ namespace ALACDotNet
 			}
 		}

-		private void read_chunk_stts(UInt32 chunk_len)
+		private void qtmovie_read_chunk_stts(string path, uint length, object param)
 		{
 			uint i;
 			UInt32 numentries;
-			UInt32 size_remaining = chunk_len - 8; /* FIXME WRONG */
+			UInt32 size_remaining = length;

 			/* version */
 			stream_read_uint8();
@@ -940,11 +891,11 @@ namespace ALACDotNet
 			}
 		}

-		private void read_chunk_stsz(UInt32 chunk_len)
+		private void qtmovie_read_chunk_stsz(string path, uint length, object param)
 		{
 			uint i;
 			UInt32 numentries;
-			UInt32 size_remaining = chunk_len - 8; /* FIXME WRONG */
+			UInt32 size_remaining = length;

 			/* version */
 			stream_read_uint8();
@@ -983,77 +934,6 @@ namespace ALACDotNet
 			}
 		}

-		private void read_chunk_stbl(UInt32 chunk_len)
-		{
-			UInt32 size_remaining = chunk_len - 8; /* FIXME WRONG */
-			while (size_remaining > 0)
-			{
-				UInt32 sub_chunk_len = stream_read_uint32();
-				if (sub_chunk_len <= 1 || sub_chunk_len > size_remaining)
-					throw new Exception("strange size for chunk inside stbl.");
-
-				UInt32 sub_chunk_id = stream_read_uint32();
-				switch (sub_chunk_id)
-				{
-					case FCC_STSD:
-						read_chunk_stsd(sub_chunk_len);
-						break;
-					case FCC_STTS:
-						read_chunk_stts(sub_chunk_len);
-						break;
-					case FCC_STSZ:
-						read_chunk_stsz(sub_chunk_len);
-						break;
-					case FCC_STSC:
-					case FCC_STCO:
-						/* skip these, no indexing for us! */
-						stream_skip(sub_chunk_len - 8);
-						break;
-					default:
-						throw new Exception(String.Format("(stbl) unknown chunk id: {0}.", sub_chunk_id));
-				}
-				size_remaining -= sub_chunk_len;
-			}
-		}
-
-		private void qtmovie_read_chunk_minf(string path, uint length, object param)
-		{
-			UInt32 dinf_size, stbl_size;
-			UInt32 size_remaining = length; /* FIXME WRONG */
-
-			/**** SOUND HEADER CHUNK ****/
-			if (stream_read_uint32() != 16)
-				throw new Exception("unexpected size in media info\n");
-			if (stream_read_uint32() != FCC_SMHD)
-				throw new Exception("not a sound header! can't handle this.\n");
-			/* now skip the rest */
-			stream_skip(16 - 8);
-			size_remaining -= 16;
-			/****/
-
-			/**** DINF CHUNK ****/
-			dinf_size = stream_read_uint32();
-			if (stream_read_uint32() != FCC_DINF)
-				throw new Exception("expected dinf, didn't get it.");
-			/* skip it */
-			stream_skip(dinf_size - 8);
-			size_remaining -= dinf_size;
-			/****/
-
-			/**** SAMPLE TABLE ****/
-			stbl_size = stream_read_uint32();
-			if (stream_read_uint32() != FCC_STBL)
-				throw new Exception("expected stbl, didn't get it.");
-			read_chunk_stbl(stbl_size);
-			size_remaining -= stbl_size;
-
-			if (size_remaining > 0)
-			{
-				throw new Exception("oops\n");
-				//stream_skip(size_remaining);
-			}
-		}
-
 		/* media handler inside mdia */
 		private void qtmovie_read_chunk_hdlr(string path, uint length, object param)
 		{
@@ -1278,7 +1158,15 @@ namespace ALACDotNet
 			qtmovie_add_nul_parser("top.moov.trak.edts");
 			qtmovie_add_lst_parser("top.moov.trak.mdia", null);
 			qtmovie_add_any_parser("top.moov.trak.mdia.hdlr", new qtmovie_read_atom(qtmovie_read_chunk_hdlr), null);
-			qtmovie_add_any_parser("top.moov.trak.mdia.minf", new qtmovie_read_atom(qtmovie_read_chunk_minf), null);
+			qtmovie_add_lst_parser("top.moov.trak.mdia.minf", null); 
+			qtmovie_add_nul_parser("top.moov.trak.mdia.minf.smhd"); // required, unused
+			qtmovie_add_nul_parser("top.moov.trak.mdia.minf.dinf"); // required, unused
+			qtmovie_add_lst_parser("top.moov.trak.mdia.minf.stbl", null); // SAMPLE TABLE, required
+			qtmovie_add_any_parser("top.moov.trak.mdia.minf.stbl.stsd", new qtmovie_read_atom(qtmovie_read_chunk_stsd), null); // _codecData
+			qtmovie_add_any_parser("top.moov.trak.mdia.minf.stbl.stts", new qtmovie_read_atom(qtmovie_read_chunk_stts), null); // _time_to_sample_*
+			qtmovie_add_any_parser("top.moov.trak.mdia.minf.stbl.stsz", new qtmovie_read_atom(qtmovie_read_chunk_stsz), null); // _sample_byte_size
+			qtmovie_add_nul_parser("top.moov.trak.mdia.minf.stbl.stsc"); /* skip these, no indexing for us! */
+			qtmovie_add_nul_parser("top.moov.trak.mdia.minf.stbl.stco"); /* skip these, no indexing for us! */
 			qtmovie_add_lst_parser("top.moov.udta", null);
 			qtmovie_add_lst_parser("top.moov.udta.meta", (uint)4);
 			qtmovie_add_lst_parser("top.moov.udta.meta.ilst", null);
@@ -1376,6 +1264,7 @@ namespace ALACDotNet
 		unsafe struct predictor_t
 		{
 			public fixed short predictor_coef_table[32];
+			public int predictor_coef_table_sum;
 			public int predictor_coef_num;
 			public int prediction_type;
 			public int prediction_quantitization;
@@ -1387,14 +1276,6 @@ namespace ALACDotNet
 		const UInt32 FCC_MDAT = ('m' << 24) + ('d' << 16) + ('a' << 8) + ('t' << 0);
 		const UInt32 FCC_FREE = ('f' << 24) + ('r' << 16) + ('e' << 8) + ('e' << 0);
 		const UInt32 FCC_M4A = ('M' << 24) + ('4' << 16) + ('A' << 8) + (' ' << 0);
-		const UInt32 FCC_SMHD = ('s' << 24) + ('m' << 16) + ('h' << 8) + ('d' << 0);
-		const UInt32 FCC_DINF = ('d' << 24) + ('i' << 16) + ('n' << 8) + ('f' << 0);
-		const UInt32 FCC_STBL = ('s' << 24) + ('t' << 16) + ('b' << 8) + ('l' << 0);
-		const UInt32 FCC_STSD = ('s' << 24) + ('t' << 16) + ('s' << 8) + ('d' << 0);
-		const UInt32 FCC_STTS = ('s' << 24) + ('t' << 16) + ('t' << 8) + ('s' << 0);
-		const UInt32 FCC_STSZ = ('s' << 24) + ('t' << 16) + ('s' << 8) + ('z' << 0);
-		const UInt32 FCC_STSC = ('s' << 24) + ('t' << 16) + ('s' << 8) + ('c' << 0);
-		const UInt32 FCC_STCO = ('s' << 24) + ('t' << 16) + ('c' << 8) + ('o' << 0);
 		const UInt32 FCC_ALAC = ('a' << 24) + ('l' << 16) + ('a' << 8) + ('c' << 0);
 	}
 }