From 1e89dc890917d8181be1192d949fa8ecab69cacc Mon Sep 17 00:00:00 2001 From: rocky Date: Sat, 15 Oct 2005 03:18:42 +0000 Subject: [PATCH] Analysis and comments courtesy of Peter J. Creath, again. (I believe this will be the last commit I'll make on his behalf.) --- lib/paranoia/isort.c | 196 ++++++++++++++++++++++++++++++++++++++----- lib/paranoia/isort.h | 65 +++++++++++++- 2 files changed, 235 insertions(+), 26 deletions(-) diff --git a/lib/paranoia/isort.c b/lib/paranoia/isort.c index ccbd5d08..f0ab73f1 100644 --- a/lib/paranoia/isort.c +++ b/lib/paranoia/isort.c @@ -1,5 +1,5 @@ /* - $Id: isort.c,v 1.5 2005/10/08 09:08:10 rocky Exp $ + $Id: isort.c,v 1.6 2005/10/15 03:18:42 rocky Exp $ Copyright (C) 2004, 2005 Rocky Bernstein Copyright (C) 1998 Monty xiphmont@mit.edu @@ -23,6 +23,18 @@ /* Old isort got a bit complex. This re-constrains complexity to give a go at speed through a more alpha-6-like mechanism. */ + +/* "Sort" is a bit of a misnomer in this implementation. It's actually + * basically a hash table of sample values (with a linked-list collision + * resolution), which lets you quickly determine where in a vector a + * particular sample value occurs. + * + * Collisions aren't due to hash collisions, as the table has one bucket + * for each possible sample value. Instead, the "collisions" represent + * multiple occurrences of a given value. + */ + + #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -36,6 +48,14 @@ #include "p_block.h" #include "isort.h" + +/* =========================================================================== + * sort_alloc() + * + * Allocates and initializes a new, empty sort_info object, which can be + * used to index up to (size) samples from a vector. + */ + sort_info_t * sort_alloc(long size) { @@ -54,21 +74,47 @@ sort_alloc(long size) return(ret); } + +/* =========================================================================== + * sort_unsortall() (internal) + * + * This function resets the index for further use with a different vector + * or range, without the overhead of an unnecessary free/alloc. + */ + void sort_unsortall(sort_info_t *i) { - if(i->lastbucket>2000){ /* a guess */ + /* If there were few enough different samples encountered (and hence few + * enough buckets used), we can just zero out those buckets. If there + * were many (2000 is picked somewhat arbitrarily), it's faster simply to + * zero out all buckets with a memset() rather than walking the data + * structure and zeroing them out one by one. + */ + if (i->lastbucket>2000) { /* a guess */ memset(i->head,0,65536*sizeof(sort_link_t *)); - }else{ + } else { long b; - for(b=0;blastbucket;b++) + for (b=0; blastbucket; b++) i->head[i->bucketusage[b]]=NULL; } i->lastbucket=0; i->sortbegin=-1; + + /* Curiously, this function preserves the vector association created + * by sort_setup(), but it is used only internally by sort_setup, so + * preserving this association is unnecessary. + */ } + +/* =========================================================================== + * sort_free() + * + * Releases all memory consumed by a sort_info object. + */ + void sort_free(sort_info_t *i) { @@ -77,60 +123,150 @@ sort_free(sort_info_t *i) free(i->bucketusage); free(i); } - + + +/* =========================================================================== + * sort_sort() (internal) + * + * This function builds the index to allow for fast searching for sample + * values within a portion (sortlo - sorthi) of the object's associated + * vector. It is called internally and only when needed. + */ + static void sort_sort(sort_info_t *i,long sortlo,long sorthi) { long j; - for(j=sorthi-1;j>=sortlo;j--){ - sort_link_t **hv=i->head+i->vector[j]+32768; - sort_link_t *l=i->revindex+j; + /* We walk backward through the range to index because we insert new + * samples at the head of each bucket's list. At the end, they'll be + * sorted from first to last occurrence. + */ + for (j=sorthi-1; j>=sortlo; j--) { + /* i->vector[j] = the signed 16-bit sample to index. + * hv = pointer to the head of the sorted list of occurences + * of this sample + * l = the node to associate with this sample + * + * We add 32768 to convert the signed 16-bit integer to an unsigned + * range from 0 to 65535. + * + * Note that l is located within i->revindex at a position + * corresponding to the sample's position in the vector. This allows + * ipos() to determine the sample position from a returned sort_link. + */ + sort_link_t **hv = i->head+i->vector[j]+32768; + sort_link_t *l = i->revindex+j; + /* If this is the first time we've encountered this sample, add its + * bucket to the list of buckets used. This list is used only for + * resetting the index quickly. + */ if(*hv==NULL){ - i->bucketusage[i->lastbucket]=i->vector[j]+32768; + i->bucketusage[i->lastbucket] = i->vector[j]+32768; i->lastbucket++; } + + /* Point the new node at the old head, then assign the new node as + * the new head. + */ l->next=*hv; *hv=l; } + + /* Mark the index as initialized. + */ i->sortbegin=0; } -/* size *must* be less than i->maxsize */ + +/* =========================================================================== + * sort_setup() + * + * This function initializes a previously allocated sort_info_t. The + * sort_info_t is associated with a vector of samples of length + * (size), whose position begins at (*abspos) within the CD's stream + * of samples. Only the range of samples between (sortlo, sorthi) + * will eventually be indexed for fast searching. (sortlo, sorthi) + * are absolute sample positions. + * + * ???: Why is abspos a pointer? Why not just store a copy? + * + * Note: size *must* be <= the size given to the preceding sort_alloc(), + * but no error checking is done here. + */ + void -sort_setup(sort_info_t *i, int16_t *vector, long *abspos, long size, - long sortlo, long sorthi) +sort_setup(sort_info_t *i, int16_t *vector, long int *abspos, + long int size, long int sortlo, long int sorthi) { - if(i->sortbegin!=-1)sort_unsortall(i); + /* Reset the index if it has already been built. + */ + if (i->sortbegin!=-1) + sort_unsortall(i); i->vector=vector; i->size=size; i->abspos=abspos; - i->lo=min(size,max(sortlo-*abspos,0)); - i->hi=max(0,min(sorthi-*abspos,size)); + /* Convert the absolute (sortlo, sorthi) to offsets within the vector. + * Note that the index will not be built until sort_getmatch() is called. + * Here we're simply hanging on to the range to index until then. + */ + i->lo = min(size, max(sortlo - *abspos, 0)); + i->hi = max(0, min(sorthi - *abspos, size)); } +/* =========================================================================== + * sort_getmatch() + * + * This function returns a sort_link_t pointer which refers to the + * first sample equal to (value) in the vector. It only searches for + * hits within (overlap) samples of (post), where (post) is an offset + * within the vector. The caller can determine the position of the + * matched sample using ipos(sort_info *, sort_link *). + * + * This function returns NULL if no matches were found. + */ + sort_link_t * sort_getmatch(sort_info_t *i, long post, long overlap, int value) { sort_link_t *ret; - if(i->sortbegin==-1)sort_sort(i,i->lo,i->hi); + /* If the vector hasn't been indexed yet, index it now. + */ + if (i->sortbegin==-1) + sort_sort(i,i->lo,i->hi); /* Now we reuse lo and hi */ - + + /* We'll only return samples within (overlap) samples of (post). + * Clamp the boundaries to search to the boundaries of the array, + * convert the signed sample to an unsigned offset, and store the + * state so that future calls to sort_nextmatch do the right thing. + * + * Reusing lo and hi this way is awful. + */ post=max(0,min(i->size,post)); i->val=value+32768; i->lo=max(0,post-overlap); /* absolute position */ i->hi=min(i->size,post+overlap); /* absolute position */ + /* Walk through the linked list of samples with this value, until + * we find the first one within the bounds specified. If there + * aren't any, return NULL. + */ ret=i->head[i->val]; - while(ret){ - if(ipos(i,ret)lo){ + + while (ret) { + /* ipos() calculates the offset (in terms of the original vector) + * of this hit. + */ + + if (ipos(i,ret)lo) { ret=ret->next; - }else{ - if(ipos(i,ret)>=i->hi) + } else { + if (ipos(i,ret)>=i->hi) ret=NULL; break; } @@ -139,12 +275,28 @@ sort_getmatch(sort_info_t *i, long post, long overlap, int value) return(ret); } + +/* =========================================================================== + * sort_nextmatch() + * + * This function returns a sort_link_t pointer which refers to the next sample + * matching the criteria previously passed to sort_getmatch(). See + * sort_getmatch() for details. + * + * This function returns NULL if no further matches were found. + */ + sort_link_t * sort_nextmatch(sort_info_t *i, sort_link_t *prev) { sort_link_t *ret=prev->next; - if(!ret || ipos(i,ret)>=i->hi)return(NULL); + /* If there aren't any more hits, or we've passed the boundary requested + * of sort_getmatch(), we're done. + */ + if (!ret || ipos(i,ret)>=i->hi) + return(NULL); + return(ret); } diff --git a/lib/paranoia/isort.h b/lib/paranoia/isort.h index 5f7a8680..b4006493 100644 --- a/lib/paranoia/isort.h +++ b/lib/paranoia/isort.h @@ -1,5 +1,5 @@ /* - $Id: isort.h,v 1.3 2005/10/08 09:08:10 rocky Exp $ + $Id: isort.h,v 1.4 2005/10/15 03:18:42 rocky Exp $ Copyright (C) 2004, 2005 Rocky Bernstein Copyright (C) 1998 Monty xiphmont@mit.edu @@ -47,13 +47,70 @@ typedef struct sort_info { } sort_info_t; +/*! ======================================================================== + * sort_alloc() + * + * Allocates and initializes a new, empty sort_info object, which can + * be used to index up to (size) samples from a vector. + */ extern sort_info_t *sort_alloc(long int size); + +/*! ======================================================================== + * sort_unsortall() (internal) + * + * This function resets the index for further use with a different + * vector or range, without the overhead of an unnecessary free/alloc. + */ extern void sort_unsortall(sort_info_t *i); -extern void sort_setup(sort_info_t *i,int16_t *vector,long *abspos,long size, - long sortlo, long sorthi); + +/*! ======================================================================== + * sort_setup() + * + * This function initializes a previously allocated sort_info_t. The + * sort_info_t is associated with a vector of samples of length + * (size), whose position begins at (*abspos) within the CD's stream + * of samples. Only the range of samples between (sortlo, sorthi) + * will eventually be indexed for fast searching. (sortlo, sorthi) + * are absolute sample positions. + * + * ???: Why is abspos a pointer? Why not just store a copy? + * + * Note: size *must* be <= the size given to the preceding sort_alloc(), + * but no error checking is done here. + */ +extern void sort_setup(sort_info_t *i, int16_t *vector, long int *abspos, + long int size, long int sortlo, long int sorthi); + +/* ========================================================================= + * sort_free() + * + * Releases all memory consumed by a sort_info object. + */ extern void sort_free(sort_info_t *i); + +/*! ======================================================================== + * sort_getmatch() + * + * This function returns a sort_link_t pointer which refers to the + * first sample equal to (value) in the vector. It only searches for + * hits within (overlap) samples of (post), where (post) is an offset + * within the vector. The caller can determine the position of the + * matched sample using ipos(sort_info *, sort_link *). + * + * This function returns NULL if no matches were found. + */ extern sort_link_t *sort_getmatch(sort_info_t *i, long post, long overlap, int value); + +/*! ======================================================================== + * sort_nextmatch() + * + * This function returns a sort_link_t pointer which refers to the + * next sample matching the criteria previously passed to + * sort_getmatch(). See sort_getmatch() for details. + * + * This function returns NULL if no further matches were found. + */ extern sort_link_t *sort_nextmatch(sort_info_t *i, sort_link_t *prev); #define is(i) (i->size) @@ -62,5 +119,5 @@ extern sort_link_t *sort_nextmatch(sort_info_t *i, sort_link_t *prev); #define iv(i) (i->vector) #define ipos(i,l) (l-i->revindex) -#endif +#endif /* _ISORT_H_ */