diff --git a/Makefile b/Makefile index 48d420c..86e054e 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,123 @@ BINARY := findcrcs +DISTNAME := $(shell pwd | awk -F '/' '{print $$(NF)}') +BINPREFIX := +EXEFLAGS := +CLEAN := ifeq ($(OS), Windows_NT) - BINARY := $(BINARY).exe + CLEAN := $(BINARY) + BINARY := $(BINARY).exe + EXEFLAGS := -static + ifeq ($(64), 1) + BINPREFIX := x86_64-w64-mingw32- + endif endif -all: findcrcs +all: +ifeq ($(OS), Windows_NT) + @(objdump -a $(BINARY) 2> /dev/null | grep "pei-i386" > /dev/null || make --no-print-directory clean; exit 0) +endif + @make --no-print-directory $(BINARY) + +64: +ifeq ($(OS), Windows_NT) + @(objdump -a $(BINARY) 2> /dev/null | grep "pei-i386" > /dev/null && make --no-print-directory clean; exit 0) + @make --no-print-directory $(BINARY) 64=1 +else + @make --no-print-directory $(BINARY) +endif clean: - rm $(BINARY) + rm -rf $(BINARY) $(CLEAN) crcutil.a *.o *.exe test.bin d1aa92b05d1f2638f423661ae4735446.bin -findcrcs: findcrcs.cc md5.c md5.h crcutil-1.0 - g++ -O3 -Wall -mcrc32 -o $(BINARY) findcrcs.cc md5.c crcutil-1.0/examples/interface.cc crcutil-1.0/code/*.cc -Icrcutil-1.0/code -Icrcutil-1.0/tests -Icrcutil-1.0/examples - strip $(BINARY) +mrproper: + @make --no-print-directory clean + rm -rf test.bin *.tar.gz *.zip $(DISTNAME)-bin-win32 $(DISTNAME)-bin-win64 -crcutil-1.0: crcutil-1.0.tar.gz - tar xfz crcutil-1.0.tar.gz +dist: + make mrproper + (cd ..; tar -cz --numeric-owner -f $(DISTNAME).tar.gz $(DISTNAME)) + mv ../$(DISTNAME).tar.gz . +ifeq ($(OS), Windows_NT) + mkdir $(DISTNAME)-bin-win32 + make all + cp $(BINARY) $(DISTNAME)-bin-win32 + cp README $(DISTNAME)-bin-win32/README.txt + cp COPYING $(DISTNAME)-bin-win32/COPYING.txt + unix2dos $(DISTNAME)-bin-win32/README.txt + unix2dos $(DISTNAME)-bin-win32/COPYING.txt + zip -r $(DISTNAME)-bin-win32.zip $(DISTNAME)-bin-win32 + rm -rf $(DISTNAME)-bin-win32 + make clean + mkdir $(DISTNAME)-bin-win64 + make all 64=1 + cp $(BINARY) $(DISTNAME)-bin-win64 + cp README $(DISTNAME)-bin-win64/README.txt + cp COPYING $(DISTNAME)-bin-win64/COPYING.txt + unix2dos $(DISTNAME)-bin-win64/README.txt + unix2dos $(DISTNAME)-bin-win64/COPYING.txt + zip -r $(DISTNAME)-bin-win64.zip $(DISTNAME)-bin-win64 + rm -rf $(DISTNAME)-bin-win64 + make clean +endif + +test: $(BINARY) test.bin + @echo "" + @echo "return should be: 100000000 13fbda0d d1aa92b05d1f2638f423661ae4735446" + @echo "time ./$(BINARY) test.bin 1000000 13fbda0d" + @echo "" + @sh -c "time ./$(BINARY) test.bin 1000000 13fbda0d" + +$(BINARY): findcrcs.cc md5.c md5.h crcutil-1.0 crcutil.a + $(BINPREFIX)g++ -O3 -D_FILE_OFFSET_BITS=64 -Wall -o $@ $(EXEFLAGS) findcrcs.cc md5.c crcutil.a -Icrcutil-1.0/code -Icrcutil-1.0/examples + $(BINPREFIX)strip -s $@ + +crcutil.a: crcutil-1.0 + rm -rf *.o + $(BINPREFIX)g++ -O3 -Wall -mcrc32 -c crcutil-1.0/examples/interface.cc crcutil-1.0/code/*.cc -Icrcutil-1.0/code -Icrcutil-1.0/tests -Icrcutil-1.0/examples + $(BINPREFIX)ar r crcutil.a *.o + rm -rf *.o + +crcutil-1.0: + wget -q -O - http://crcutil.googlecode.com/files/crcutil-1.0.tar.gz | tar xfz - chmod -R og-w+rX crcutil-1.0 chown -R 0.0 crcutil-1.0 touch crcutil-1.0 -crcutil-1.0.tar.gz: - wget -q -O - http://crcutil.googlecode.com/files/crcutil-1.0.tar.gz > crcutil-1.0.tar.gz - touch crcutil-1.0.tar.gz +test.bin: + @echo "creating test.bin" + @(echo -en \ + "\x1f\x8b\x08\x00\x6d\xe2\x4a\x51\x02\x03\x93\xef\xe6\x60\xc8\x7d"\ + "\xe4\x15\xc8\xc4\xfc\xf6\xee\x5e\xaf\xd9\x81\x32\xc7\x3f\x4e\x16"\ + "\x3f\xe1\xcc\xb5\x4f\x64\x4e\xd2\x5b\xe6\xbc\xb8\x86\x1d\x1c\x85"\ + "\x85\x2f\xa6\x34\xbd\xd5\x6a\x0c\xd2\x50\xda\x61\x2d\x21\xba\x5b"\ + "\x44\x71\xfa\x03\x8d\xd8\x24\xf6\x74\x4f\xb5\xfc\x05\x3f\x8c\x3c"\ + "\x4f\x86\xdd\x3e\xe8\xa8\xe5\x31\xef\xd3\x9c\xec\x17\x45\x2f\x9e"\ + "\x1b\xcf\xfd\x69\xfd\xff\xc7\xfc\xeb\xf1\x9b\x36\xfc\x7d\x7c\xf6"\ + "\x76\xd6\xdb\xba\x88\x95\x2d\x02\x55\x7f\x4f\x3e\x28\xbf\xc6\xc3"\ + "\x00\x01\x0e\x8f\xaa\xcc\xdb\xb7\x16\xf1\x08\x7c\x7a\x94\xb5\xf7"\ + "\x33\x1f\x54\xd4\x23\xb6\xbc\x6a\x95\x9c\x3b\xe3\xf9\xd9\x6b\x4d"\ + "\x2d\xa1\x62\x0a\x95\xee\xab\xb5\x8d\x3f\x3a\x1d\x38\x7c\xaf\x2c"\ + "\x16\xae\xf0\xde\x9d\xc7\xb1\xd6\x79\x8c\x7a\xdd\x9f\x6f\xf2\xda"\ + "\xc2\x55\x5e\x6f\xdf\xea\xb5\x93\xf5\x44\xf7\xea\xaf\x7d\x32\x50"\ + "\xc1\x09\x7b\xde\x7d\x09\xd9\x7b\x87\xe9\xcb\x3c\xf3\xe8\xec\x22"\ + "\x98\xed\x9f\xfa\xc2\xc2\xd3\x1e\xa9\x33\xf2\x57\xdf\x2e\x45\x58"\ + "\x54\xdc\x5f\xfe\x76\x07\xeb\x0e\x9d\x73\xe9\xe6\x5f\xa0\x82\x0d"\ + "\x4f\x62\xe5\xaf\x69\x3d\x15\x53\xd0\x5f\x77\xcd\x1a\xa1\x7d\xd7"\ + "\xe5\xe0\xaf\x45\xdc\x17\xaa\xef\xe5\xac\x3d\xc9\x08\x15\x3d\x69"\ + "\x1d\x15\x7b\xf2\xf4\x66\x86\xc4\x98\x9a\x5f\x35\x52\x10\xb1\x07"\ + "\xf3\x7f\x5f\xbe\xc8\x70\xa9\xfc\x7b\x5c\xf3\x92\x2f\xfb\x4c\xa5"\ + "\xae\x7e\xba\x38\x8f\xa7\xcf\x7f\x1f\x13\x54\xcf\x4a\x69\xdd\x33"\ + "\xe9\x85\xa2\x1f\x4a\x36\xd6\x28\xb0\x43\xc5\x54\xd6\xef\xdc\x3d"\ + "\xf9\xb8\xee\x81\x1d\x3b\xce\x3e\x81\xbb\x42\xf7\x2e\xff\xf7\xc7"\ + "\x62\x0b\xe2\x73\x74\xe7\xbd\x42\x58\x98\x6d\xb2\xea\xa9\xd8\x02"\ + "\x5b\x49\x99\xf9\x27\xe0\x82\x20\x13\x3f\x1a\x1c\xb8\x7c\xf4\x53"\ + "\x23\x22\x0c\x9e\xbd\xf3\x93\xdb\xce\xf0\xf8\x91\xd9\x64\x66\xa8"\ + "\xd8\xa4\xed\x25\xf6\x77\xfa\xa3\x1b\xea\x36\x6e\xad\x58\x05\xb7"\ + "\xfa\xfd\x9a\x8b\x2b\xa6\x45\x37\x7c\x4b\x29\xa8\x51\x82\x0b\xe6"\ + "\x6f\x31\x98\x7f\xf4\x30\x83\xbf\xf8\x69\x76\x44\x08\xcc\xda\x6d"\ + "\x5d\x79\x87\xa9\xa7\xfb\xb6\x0f\x22\x4e\x2e\xad\x4f\x5c\x2a\xe4"\ + "\xf4\xe2\x6b\xb7\xe3\xeb\x54\xb0\xd0\x0f\xf9\x9a\x35\x75\xf5\x5f"\ + "\x1f\xaa\xf5\xd4\xed\xfb\xc1\xad\xb6\xcd\x69\xb2\xcf\x37\x26\x06"\ + "\x00\x14\x69\xf9\x95\x72\x02\x00\x00"\ + ) | zcat | zcat | zcat > test.bin diff --git a/README b/README index f420cbf..070c0cc 100644 --- a/README +++ b/README @@ -25,6 +25,8 @@ Warning: Compiling: Use "make" on any linux/unix/bsd console nearby, or if you must, an msys or cygwin environment. You need to use a relatively recent gcc (4.5.0+ ish I guess). + On windows, if you have a 64bit MinGW, you can use "make 64=1" to build a (much faster) 64bit version. + This software uses crcutil-1.0 for providing fast crc calculations. crcutil is made by Andrew Kadatch and Bob Jenkins and can be found on http://code.google.com/p/crcutil/ Do not contact them for support on findcrcs. diff --git a/README.md b/README.md new file mode 100644 index 0000000..070c0cc --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +What: + This is a not yet idiotproof version of findcrcs. + It is to be used for finding a block of data which matches a specific crc. + +How: + findcrcs [more crcs...] + + File is a big file which should or may contain the searched for data. + Size of window is the size of the block of data to find. + Crc is the crc to find in the file (may be more then 1, but all will be matched on the window size). + + If a match is found it will print out an md5sum of the matched block for further inspection. + For best results, add some (1MB or so) zero bytes padding around the file first. + In a future version, this might be a selectable option of this program. + +Why: + Useful for finding audio offsets in disk images together with the redump.org database. + +Warning: + This software is not yet idiotproof! + - It does not check arguments for validity yet (especially size of window and crc's.) + - No paddiong option yet. + if matching audiodata, you should pad the combined audiotracks with zero bytes at the start and end. + +Compiling: + Use "make" on any linux/unix/bsd console nearby, or if you must, an msys or cygwin environment. + You need to use a relatively recent gcc (4.5.0+ ish I guess). + On windows, if you have a 64bit MinGW, you can use "make 64=1" to build a (much faster) 64bit version. + + This software uses crcutil-1.0 for providing fast crc calculations. + crcutil is made by Andrew Kadatch and Bob Jenkins and can be found on http://code.google.com/p/crcutil/ + Do not contact them for support on findcrcs. + The Makefile will try to pull in version 1.0 through wget if it is not supplied yet. + + Also, this program makes use of the MD5 implementation of Alexander Peslyak. + This is found at http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + A small casting patch was made to support g++, this small patch is released under the same license as the original md5.c file. + +Contact: + At the moment, see the redump.org forum thread where you got this. + +-V. diff --git a/crcutil-1.0/autogen.sh b/crcutil-1.0/autogen.sh old mode 100755 new mode 100644 diff --git a/crcutil-1.0/configure b/crcutil-1.0/configure old mode 100755 new mode 100644 diff --git a/crcutil-1.0/depcomp b/crcutil-1.0/depcomp old mode 100755 new mode 100644 diff --git a/crcutil-1.0/install-sh b/crcutil-1.0/install-sh old mode 100755 new mode 100644 diff --git a/crcutil-1.0/missing b/crcutil-1.0/missing old mode 100755 new mode 100644 diff --git a/findcrcs.cc b/findcrcs.cc index 5c7c12a..87863ef 100644 --- a/findcrcs.cc +++ b/findcrcs.cc @@ -3,34 +3,33 @@ This file is part of findcrcs. - findcrcs is free software: you can redistribute it and/or modify + This is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - findcrcs is distributed in the hope that it will be useful, + This is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with findcrcs. If not, see . + along with this software. If not, see . */ /* - findcrcs is using crcutil-1.0 for providing fast crc calculations + This software is using crcutil-1.0 for providing fast crc calculations crcutil is made by Andrew Kadatch and Bob Jenkins and can be found on http://code.google.com/p/crcutil/ - Do not contact them for support on findcrcs + Do not contact them for support on this software - Also, findcrcs makes use of the MD5 implementation of Alexander Peslyak. + Also, this software makes use of the MD5 implementation of Alexander Peslyak. This is found at http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 - A small casting patch was made to support g++. - This patch is released under the same license as the original md5.c file. + Changes were made for OpenSSL compatibility and a small casting patch for g++ support. + These changes are released under the same license as the original md5.c file. */ -// Usage: findcrcs [more crcs...] -// not yet idiotproof -// code comments also not included (yet, if ever) +// Usage: findcrcs [-e] [-p PADDING] [-s SEEDFILE] [--] [CRCS...] +// code comments not included (yet, if ever) #include #include @@ -39,6 +38,7 @@ #include #include #include +#include #include "md5.h" #include "interface.h" @@ -47,101 +47,435 @@ #endif #define BUFFERSIZE 65536 -#define MD5_DIGEST_LENGTH 16 +#define POLY 0xedb88320 -void checkcrcs(unsigned int crc, unsigned int crcs[], int totalcrcs, int fd, int offset, int windowsize); +int isuint(char *string); +int iscrc(char *string); +int ismd5(char *string); +unsigned int getcrc(char *file); +void usage(); + +void findcrcs(); +int checkcrcs(unsigned int crc, int offset); +void foundcrc(int index, int offset); +unsigned char *md5(int offset); +char *md5hash2string(unsigned char *md5hash); +void extractwindow(int offset, char *md5string); + +void extract_init(int offset); +int extract_read(void *buffer, unsigned int size); + +char *file, *seedfile, *extractfile; +int fd, extract, single, totalcrcs; +unsigned int padding, filesize, windowsize, seedcrc; + +typedef struct { + unsigned int crc; + int checkmd5; + unsigned char md5hash[MD5_DIGEST_LENGTH]; +} crc_t; + +crc_t *crcs; int main(int argc, char *argv[]) { - int i1, i2, fd1, fd2, fd3, filesize, windowsize, readbytes; - char *filename; - crcutil_interface::CRC *crc; - unsigned long long returnvalue; - unsigned char buffer1[BUFFERSIZE], buffer2[BUFFERSIZE]; + int i, option; struct stat stats; - unsigned int crcs[argc - 3]; + unsigned long int ulargument; + char md5byte[3]; + crc_t *reallocation; - if (argc < 4) { - fprintf(stderr, "usage: findcrcs [more crcs...]\n"); + windowsize = 0; + extract = 0; + single = 0; + padding = 0; + seedfile = NULL; + extractfile = NULL; + + while ((option = getopt(argc, argv, "ef:p:qs:h?")) != -1) { + switch (option) { + case 'e': + extract = 1; + break; + case 'f': + extractfile = optarg; + extract = 1; + single = 1; + break; + case 'p': + if (!isuint(optarg)) { + fprintf(stderr, "%s: padding size must be a positive integer\n", argv[0]); + return 1; + } + errno = 0; + ulargument = strtoul(optarg, 0, 10); + padding = (unsigned int)ulargument; + if (errno || ulargument > UINT_MAX) { + fprintf(stderr, "%s: padding size too big\n", argv[0]); + return 1; + } + break; + case 'q': + single = 1; + break; + case 's': + seedfile = optarg; + if ((fd = open(seedfile, O_RDONLY | O_BINARY)) == -1) { + perror(seedfile); + return 1; + } + close(fd); + break; + case 'h': + case '?': + default: + usage(); + return 1; + } + } + + if (argc < optind + 2) { + usage(); return 1; } - filename = argv[1]; - if (stat(filename, &stats) == -1) { - perror("findcrcs"); + file = argv[optind++]; + if ((fd = open(file, O_RDONLY | O_BINARY)) == -1) { + perror(file); + return 1; + } + + if (stat(file, &stats) == -1) { + perror(file); return 1; } filesize = stats.st_size; - windowsize = atoi(argv[2]); - if (windowsize > filesize) { - fprintf(stderr, "findcrcs: window size too big\n"); + if (!isuint(argv[optind])) { + fprintf(stderr, "%s: Window size must be a positive integer\n", argv[0]); return 1; } - for (i1 = 0; i1 < argc - 3; i1++) { - crcs[i1] = (unsigned int)strtoul(argv[i1 + 3], 0, 16); + errno = 0; + ulargument = strtoul(argv[optind++], 0, 10); + windowsize = (unsigned int)ulargument; + if (errno || ulargument > UINT_MAX || (windowsize + (2 * padding)) > filesize) { + fprintf(stderr, "%s: Window size too big\n", argv[0]); + return 1; } - crc = crcutil_interface::CRC::Create(0xedb88320, 0, 32, true, 0, 0, windowsize, 0, NULL); - - fd1 = open(filename, O_RDONLY | O_BINARY); - fd2 = open(filename, O_RDONLY | O_BINARY); - fd3 = open(filename, O_RDONLY | O_BINARY); - - returnvalue = 0; - for (i1 = 0; i1 < windowsize / BUFFERSIZE; i1++) { - read(fd1, &buffer1, BUFFERSIZE); - crc->Compute(&buffer1, BUFFERSIZE, &returnvalue); + if (windowsize == 0) { + fprintf(stderr, "%s: Window size can not be 0\n", argv[0]); + return 1; } - if ((windowsize % BUFFERSIZE) != 0) { - read(fd1, &buffer1, (windowsize % BUFFERSIZE)); - crc->Compute(&buffer1, (windowsize % BUFFERSIZE), &returnvalue); - } - checkcrcs((unsigned int)returnvalue, crcs, argc - 3, fd3, 0, windowsize); - for (i1 = 0; i1 < ((filesize - windowsize) / BUFFERSIZE) + 1; i1++) { - readbytes = read(fd1, &buffer1, BUFFERSIZE); - read(fd2, &buffer2, BUFFERSIZE); - for (i2 = 0; i2 < readbytes; i2++) { - crc->Roll(buffer2[i2], buffer1[i2], &returnvalue, NULL); - checkcrcs((unsigned int)returnvalue, crcs, argc - 3, fd3, (i1 * BUFFERSIZE) + i2 + 1, windowsize); + totalcrcs = 0; + crcs = NULL; + do { + if((reallocation = (crc_t *)realloc(crcs, sizeof(crc_t) * (totalcrcs + 1))) == NULL) { + fprintf(stderr, "crcs realloc failed. Out of memory?\n"); + return 1; } + crcs = reallocation; + memset(&crcs[totalcrcs], 0, sizeof(crc_t)); + + if (!iscrc(argv[optind])) { + fprintf(stderr, "%s: %s does not look like an crc\n", argv[0], argv[optind]); + return 1; + } + crcs[totalcrcs].crc = (unsigned int)strtoul(argv[optind++], 0, 16); + + if ((optind < argc) && ismd5(argv[optind])) { + crcs[totalcrcs].checkmd5 = 1; + for (i = 0; i < MD5_DIGEST_LENGTH * 2; i += 2) { + md5byte[0] = argv[optind][i]; + md5byte[1] = argv[optind][i + 1]; + md5byte[2] = 0; + crcs[totalcrcs].md5hash[i / 2] = (unsigned char)strtol(md5byte, 0, 16); + } + optind++; + } + + totalcrcs++; + } while (optind < argc); + + if (seedfile) { + seedcrc = getcrc(seedfile); + printf("seedcrc: %08x\n", seedcrc); + fflush(stdout); } - close(fd3); - close(fd2); - close(fd1); - crc->Delete(); + findcrcs(); + + free(crcs); + close(fd); return 0; } -void checkcrcs(unsigned int crc, unsigned int crcs[], int totalcrcs, int fd, int offset, int windowsize) { - int i1, i2; - unsigned int buffer[BUFFERSIZE]; - unsigned char md5[MD5_DIGEST_LENGTH]; - MD5_CTX ctx; +int isuint(char *string) { + if (strlen(string) == 0) return 0; + if (strspn(string, "0123456789") != strlen(string)) return 0; + if (string[0] == '0' && strlen(string) != 1) return 0; + return 1; +} - for (i1 = 0; i1 < totalcrcs; i1++) { - if (crc == crcs[i1]) { - lseek(fd, offset, SEEK_SET); - MD5_Init(&ctx); +int iscrc(char *string) { + if (strlen(string) != 8) return 0; + if (strspn(string, "0123456789abcdefABCDEF") != strlen(string)) return 0; + return 1; +} - for (i2 = 0; i2 < windowsize / BUFFERSIZE; i2++) { - read(fd, &buffer, BUFFERSIZE); - MD5_Update(&ctx, &buffer, BUFFERSIZE); - } - if ((windowsize % BUFFERSIZE) != 0) { - read(fd, &buffer, (windowsize % BUFFERSIZE)); - MD5_Update(&ctx, &buffer, (windowsize % BUFFERSIZE)); - } +int ismd5(char *string) { + if (strlen(string) != MD5_DIGEST_LENGTH * 2) return 0; + if (strspn(string, "0123456789abcdefABCDEF") != strlen(string)) return 0; + return 1; +} - MD5_Final(md5, &ctx); - printf("%d %08x ", offset, crc); - for (i2 = 0; i2 < MD5_DIGEST_LENGTH; i2++) { - printf("%02x", md5[i2]); +unsigned int getcrc(char *file) { + int fd, bytes; + crcutil_interface::CRC *crcutil; + unsigned long long crc; + unsigned char buffer[BUFFERSIZE]; + + crcutil = crcutil_interface::CRC::Create(POLY, 0, 32, true, 0, 0, 0, 0, NULL); + fd = open(file, O_RDONLY | O_BINARY); + + crc = 0; + while ((bytes = read(fd, &buffer, BUFFERSIZE)) != 0) { + crcutil->Compute(&buffer, bytes, &crc); + } + + close(fd); + crcutil->Delete(); + + return (unsigned int)crc; +} + +void usage() { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: findcrcs [OPTION]... [--] [MD5] [CRC [MD5]...]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Find the offset of CRCs in FILE with a window size of WINDOWSIZE.\n"); + fprintf(stderr, "Outputs the crc, offset and md5 of a found segment.\n"); + fprintf(stderr, "If an MD5 is given, it will only output or extract on a matching md5 hash.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " -e extract the found segments with the md5 hash as filename\n"); + fprintf(stderr, " -f EXTRACTFILE use EXTRACTFILE as file to extract to\n"); + fprintf(stderr, " implies -e and -q\n"); + fprintf(stderr, " -p PADDING use PADDING amount of zero bytes around the input file\n"); + fprintf(stderr, " this can result in a negative offset in the results\n"); + fprintf(stderr, " if used with -s only an end padding will be added\n"); + fprintf(stderr, " -q quit processing after finding a match and optionally\n"); + fprintf(stderr, " extracting that match\n"); + fprintf(stderr, " -s SEEDFILE get an initial crc from SEEDFILE\n"); + fprintf(stderr, " if used with -e, the SEEDFILE will be joined with the found\n"); + fprintf(stderr, " segment\n"); + fprintf(stderr, "\n"); +} + +void findcrcs() { + unsigned int i1; + int i2, fd1, fd2, readbytes, done; + crcutil_interface::CRC *crcutil; + unsigned long long crc; + unsigned char buffer1[BUFFERSIZE], buffer2[BUFFERSIZE]; + + crcutil = crcutil_interface::CRC::Create(POLY, 0, 32, true, 0, 0, windowsize, 0, NULL); + + fd1 = open(file, O_RDONLY | O_BINARY); + fd2 = open(file, O_RDONLY | O_BINARY); + + crc = 0; + for (i1 = 0; i1 < windowsize / BUFFERSIZE; i1++) { + read(fd1, &buffer1, BUFFERSIZE); + crcutil->Compute(&buffer1, BUFFERSIZE, &crc); + } + if ((windowsize % BUFFERSIZE) != 0) { + read(fd1, &buffer1, (windowsize % BUFFERSIZE)); + crcutil->Compute(&buffer1, (windowsize % BUFFERSIZE), &crc); + } + + done = 0; + if (!(single && checkcrcs((unsigned int)crc, 0))) { + for (i1 = 0; i1 < ((filesize - windowsize) / BUFFERSIZE) + 1; i1++) { + readbytes = read(fd1, &buffer1, BUFFERSIZE); + read(fd2, &buffer2, BUFFERSIZE); + for (i2 = 0; i2 < readbytes; i2++) { + crcutil->Roll(buffer2[i2], buffer1[i2], &crc, NULL); + if ((done = (single & checkcrcs((unsigned int)crc, (i1 * BUFFERSIZE) + i2 + 1))) == 1) break; } - printf("\n"); - return; + if (done) break; } } + + close(fd2); + close(fd1); + crcutil->Delete(); +} + +int checkcrcs(unsigned int crc, int offset) { + int i; + + for (i = 0; i < totalcrcs; i++) { + if (crc == crcs[i].crc) { + foundcrc(i, offset); + return 1; + } + } + return 0; +} + +void foundcrc(int index, int offset) { + char *md5string; + unsigned char *md5hash; + + md5hash = md5(offset); + md5string = md5hash2string(md5hash); + + if (!crcs[index].checkmd5 || (memcmp(crcs[index].md5hash, md5hash, MD5_DIGEST_LENGTH) == 0)) { + printf("%d %08x %s\n", offset, crcs[index].crc, md5string); + fflush(stdout); + if (extract) { + printf("Extracting..."); + fflush(stdout); + extractwindow(offset, md5string); + printf(" Done\n"); + fflush(stdout); + } + } + + free(md5string); + free(md5hash); +} + +unsigned char *md5(int offset) { + int seedfd, bytes; + MD5_CTX ctx; + unsigned char *md5hash; + unsigned char buffer[BUFFERSIZE]; + + if((md5hash = (unsigned char *)calloc(MD5_DIGEST_LENGTH, sizeof(unsigned char))) == NULL) { + fprintf(stderr, "MD5 calloc failed. Out of memory?\n"); + exit(1); + } + + MD5_Init(&ctx); + + if (seedfile) { + seedfd = open(seedfile, O_RDONLY | O_BINARY); + while ((bytes = read(seedfd, &buffer, BUFFERSIZE)) != 0) { + MD5_Update(&ctx, &buffer, bytes); + } + close(seedfd); + } + + extract_init(offset); + while ((bytes = extract_read(&buffer, BUFFERSIZE)) != 0) { + MD5_Update(&ctx, &buffer, bytes); + } + + MD5_Final(md5hash, &ctx); + return md5hash; +} + +char *md5hash2string(unsigned char *md5hash) { + int i; + char *md5string; + + if((md5string = (char *)calloc(((MD5_DIGEST_LENGTH * 2) + 1), sizeof(char))) == NULL) { + fprintf(stderr, "MD5 string calloc failed. Out of memory?\n"); + exit(1); + } + + for (i = 0; i < MD5_DIGEST_LENGTH; i++) { + sprintf(md5string + (i * 2), "%02x", md5hash[i]); + } + + return md5string; +} + +void extractwindow(int offset, char *md5string) { + int extractfd, seedfd, bytes; + char *file; + unsigned char buffer[BUFFERSIZE]; + + if (extractfile == NULL) { + if((file = (char *)malloc((MD5_DIGEST_LENGTH * 2) + 5)) == NULL) { + fprintf(stderr, "extractfile malloc failed. Out of memory?\n"); + exit(1); + } + sprintf(file, "%s.bin", md5string); + } else { + file = extractfile; + } + + if ((extractfd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) { + perror(file); + exit(1); + } + + if (seedfile) { + seedfd = open(seedfile, O_RDONLY | O_BINARY); + while ((bytes = read(seedfd, &buffer, BUFFERSIZE)) != 0) { + write(extractfd, &buffer, bytes); + } + close(seedfd); + } + + extract_init(offset); + while ((bytes = extract_read(&buffer, BUFFERSIZE)) != 0) { + write(extractfd, &buffer, bytes); + } + + close(extractfd); + if (extractfile == NULL) free(file); +} + +int extract_offset; +unsigned int extract_bytesleft = 0; + +void extract_init(int offset) { + extract_offset = offset; + extract_bytesleft = windowsize; +} + +int extract_read(void *buffer, unsigned int size) { + unsigned int bytesread, returnvalue; + + if (size == 0 || extract_bytesleft == 0) return 0; + if (size > extract_bytesleft) { + size = extract_bytesleft; + } + + returnvalue = 0; + if (extract_offset < 0) { + if ((unsigned int)(extract_offset * -1) >= size) { + memset(buffer, 0, size); + extract_offset += size; + extract_bytesleft -= size; + return size; + } else { + returnvalue = extract_offset * -1; + memset(buffer, 0, returnvalue); + buffer = (char *)buffer + returnvalue; + size -= returnvalue; + extract_bytesleft -= returnvalue; + extract_offset = 0; + } + } + + lseek(fd, extract_offset, SEEK_SET); + bytesread = read(fd, buffer, size); + extract_bytesleft -= bytesread; + extract_offset += bytesread; + returnvalue += bytesread; + if (bytesread < size) { + size -= bytesread; + buffer = (char *)buffer + bytesread; + memset(buffer, 0, size); + extract_bytesleft -= size; + extract_offset += size; + returnvalue += size; + } + + return returnvalue; } diff --git a/md5.c b/md5.c index 2f01c93..98d7c54 100644 --- a/md5.c +++ b/md5.c @@ -89,7 +89,7 @@ * This processes one or more 64-byte data blocks, but does NOT update * the bit counters. There are no alignment requirements. */ -static void *body(MD5_CTX *ctx, void *data, unsigned long size) +static void *body(MD5_CTX *ctx, const void *data, unsigned long size) { unsigned char *ptr; MD5_u32plus a, b, c, d; @@ -207,7 +207,7 @@ void MD5_Init(MD5_CTX *ctx) ctx->hi = 0; } -void MD5_Update(MD5_CTX *ctx, void *data, unsigned long size) +void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size) { MD5_u32plus saved_lo; unsigned long used, free; diff --git a/md5.h b/md5.h index f1a6857..f037517 100644 --- a/md5.h +++ b/md5.h @@ -28,6 +28,8 @@ #elif !defined(_MD5_H) #define _MD5_H +#define MD5_DIGEST_LENGTH 16 + /* Any 32-bit or wider unsigned integer data type will do */ typedef unsigned int MD5_u32plus; @@ -39,7 +41,7 @@ typedef struct { } MD5_CTX; extern void MD5_Init(MD5_CTX *ctx); -extern void MD5_Update(MD5_CTX *ctx, void *data, unsigned long size); +extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); extern void MD5_Final(unsigned char *result, MD5_CTX *ctx); #endif