From ed4330b50eff070e082c6d121434e56bcf6cc463 Mon Sep 17 00:00:00 2001 From: Matt Nadareski Date: Tue, 31 May 2016 23:34:19 -0700 Subject: [PATCH] [DATFromDir, RomManipulation, Stats] Various changes, see below There are at least 2 separate changes that got intermingled on this one so I'm committing them all at the same time. The first is massive overhauls to the Romba portion of DATFromDir. This part is still a work in progress. The second is adding more items to the DatData struct to allow for better and more efficient stat collection. --- DATFromDir/DATFromDir.cs | 74 +++++++++++++++++++++----- DATabase/Stats.cs | 94 +++++++--------------------------- SabreHelper/Data/Structs.cs | 9 ++++ SabreHelper/RomManipulation.cs | 72 +++++++++++++++++++++++--- 4 files changed, 154 insertions(+), 95 deletions(-) diff --git a/DATFromDir/DATFromDir.cs b/DATFromDir/DATFromDir.cs index f8da7f8e..4de94213 100644 --- a/DATFromDir/DATFromDir.cs +++ b/DATFromDir/DATFromDir.cs @@ -420,8 +420,8 @@ namespace SabreTools } } - // If we had roms but not blanks, create an artifical rom for the purposes of outputting - if (lastparent != null && _datdata.Roms.Count == 0) + // If we had roms but not blanks (and not in Romba mode), create an artifical rom for the purposes of outputting + if (lastparent != null && _datdata.Roms.Count == 0 && !_datdata.Romba) { _datdata.Roms.Add("temp", new List()); } @@ -442,26 +442,77 @@ namespace SabreTools /// New parent to be used private string ProcessFile(string item, StreamWriter sw, string lastparent) { - string tempdir = ""; + // Define the temporary directory + string tempdir = (String.IsNullOrEmpty(_tempDir) ? Environment.CurrentDirectory : _tempDir); + tempdir += (tempdir.EndsWith(Path.DirectorySeparatorChar.ToString()) ? "" : Path.DirectorySeparatorChar.ToString()); + tempdir += "temp" + DateTime.Now.ToString("yyyyMMddHHmmss") + Path.DirectorySeparatorChar; // Special case for if we are in Romba mode (all names are supposed to be SHA-1 hashes) if (_datdata.Romba) { - string datum = Path.GetFileNameWithoutExtension(item).ToLowerInvariant(); + int neededHeaderSize = 32; + string datum = Path.GetFileName(item).ToLowerInvariant(); + long filesize = new FileInfo(item).Length; // Check if the name is the right length - if (!Regex.IsMatch(datum, @"^[0-9a-f]{40}")) + if (!Regex.IsMatch(datum, @"^[0-9a-f]{40}\.gz")) { _logger.Warning("Non SHA-1 filename found, skipping: '" + datum + "'"); return ""; } + // Check if the file is at least the minimum length + if (filesize < neededHeaderSize) + { + _logger.Warning("Possibly corrupt file '" + item + "' with size " + Style.GetBytesReadable(filesize)); + return ""; + } + + // Get the Romba-specific header data + byte[] header; + byte[] footer; + using (FileStream itemstream = File.OpenRead(item)) + { + using (BinaryReader br = new BinaryReader(itemstream)) + { + header = br.ReadBytes(neededHeaderSize); + br.BaseStream.Seek(-4, SeekOrigin.End); + footer = br.ReadBytes(4); + } + } + + // Now convert the data and get the right positions + string headerstring = BitConverter.ToString(header).Replace("-", string.Empty); + string gzmd5 = headerstring.Substring(24, 32); + string gzcrc = headerstring.Substring(56, 8); + string gzsize = BitConverter.ToString(footer.Reverse().ToArray()).Replace("-", string.Empty); + long extractedsize = Convert.ToInt64(gzsize, 16); + + // Only try to add if the file size is greater than 2.5 GiB + if (filesize >= (2.5 * 1024 * 1024 * 1024)) + { + // ISIZE is mod 2^32, so we add that if the ISIZE is smaller than the filesize and header + if (extractedsize < (filesize - neededHeaderSize)) + { + _logger.Log("Filename: '" + Path.GetFullPath(item) + "'\nExtracted file size: " + + extractedsize + ", " + Style.GetBytesReadable(extractedsize) + "\nArchive file size: " + filesize + ", " + Style.GetBytesReadable(filesize)); + } + while (extractedsize < (filesize - neededHeaderSize)) + { + extractedsize += (long)Math.Pow(2, 32); + } + _logger.Log("Final file size: " + extractedsize + "\nExtracted CRC: " + gzcrc + + "\nExtracted MD5: " + gzmd5 + "\nSHA-1: " + Path.GetFileNameWithoutExtension(item)); + } + RomData rom = new RomData { Type = "rom", - Game = datum, - Name = datum, - Size = (new FileInfo(item)).Length, + Game = Path.GetFileNameWithoutExtension(item), + Name = Path.GetFileNameWithoutExtension(item), + Size = extractedsize, + CRC = gzcrc, + MD5 = gzmd5, SHA1 = Path.GetFileNameWithoutExtension(item), }; @@ -470,7 +521,7 @@ namespace SabreTools Output.WriteRomData(sw, rom, "", _datdata, 0, _logger); Output.WriteEndGame(sw, rom, new List(), new List(), "", _datdata, 0, out last, _logger); - _logger.Log("File added: " + Path.GetFileNameWithoutExtension(item) + Environment.NewLine); + _logger.User("File added: " + Path.GetFileNameWithoutExtension(item) + Environment.NewLine); return ""; } @@ -485,11 +536,6 @@ namespace SabreTools ArchiveType at = archive.Type; _logger.Log("Found archive of type: " + at); - // Define the temporary directory - tempdir = (String.IsNullOrEmpty(_tempDir) ? Environment.CurrentDirectory : _tempDir); - tempdir += (tempdir.EndsWith(Path.DirectorySeparatorChar.ToString()) ? "" : Path.DirectorySeparatorChar.ToString()); - tempdir += "temp" + DateTime.Now.ToString("yyyyMMddHHmmss") + Path.DirectorySeparatorChar; - if (at == ArchiveType.Zip || at == ArchiveType.SevenZip || at == ArchiveType.Rar) { // Create the temp directory diff --git a/DATabase/Stats.cs b/DATabase/Stats.cs index 8290571e..66bd85e9 100644 --- a/DATabase/Stats.cs +++ b/DATabase/Stats.cs @@ -34,16 +34,6 @@ namespace SabreTools /// True if output succeeded, false otherwise public bool Process() { - // Init all single-dat variables - long singleSize = 0; - long singleGame = 0; - long singleRom = 0; - long singleDisk = 0; - long singleCRC = 0; - long singleMD5 = 0; - long singleSHA1 = 0; - long singleNodump = 0; - // Init all total variables long totalSize = 0; long totalGame = 0; @@ -59,85 +49,39 @@ namespace SabreTools { _logger.User("Beginning stat collection for '" + filename + "'"); List games = new List(); - DatData datdata = new DatData(); datdata = RomManipulation.Parse(filename, 0, 0, datdata, _logger); - foreach (List romlist in datdata.Roms.Values) - { - foreach (RomData rom in romlist) - { - singleSize += rom.Size; - if (!games.Contains(rom.Game)) - { - singleGame++; - games.Add(rom.Game); - } - if (rom.Type == "rom") - { - singleRom++; - } - if (rom.Type == "disk") - { - singleDisk++; - } - if (!String.IsNullOrEmpty(rom.CRC)) - { - singleCRC++; - } - if (!String.IsNullOrEmpty(rom.MD5)) - { - singleMD5++; - } - if (!String.IsNullOrEmpty(rom.SHA1)) - { - singleSHA1++; - } - if (rom.Nodump) - { - singleNodump++; - } - } - } + SortedDictionary> newroms = RomManipulation.BucketByGame(datdata.Roms, false, true, _logger); // Output single DAT stats (if asked) if (_single) { - _logger.User(@"For file '" + filename + @"': + _logger.User(@"\nFor file '" + filename + @"': -------------------------------------------------- - Uncompressed size: " + Style.GetBytesReadable(singleSize) + @" - Games found: " + singleGame + @" - Roms found: " + singleRom + @" - Disks found: " + singleDisk + @" - Roms with CRC: " + singleCRC + @" - Roms with MD5: " + singleMD5 + @" - Roms with SHA-1: " + singleSHA1 + @" - Roms with Nodump status: " + singleNodump + @" + Uncompressed size: " + Style.GetBytesReadable(datdata.TotalSize) + @" + Games found: " + newroms.Count + @" + Roms found: " + datdata.RomCount + @" + Disks found: " + datdata.DiskCount + @" + Roms with CRC: " + datdata.CRCCount + @" + Roms with MD5: " + datdata.MD5Count + @" + Roms with SHA-1: " + datdata.SHA1Count + @" + Roms with Nodump status: " + datdata.NodumpCount + @" "); } else { - _logger.User("Adding stats for file '" + filename + "'"); + _logger.User("\nAdding stats for file '" + filename + "'"); } // Add single DAT stats to totals - totalSize += singleSize; - totalGame += singleGame; - totalRom += singleRom; - totalDisk += singleDisk; - totalCRC += singleCRC; - totalMD5 += singleMD5; - totalSHA1 += singleSHA1; - totalNodump += singleNodump; - - // Reset single DAT stats - singleSize = 0; - singleGame = 0; - singleRom = 0; - singleDisk = 0; - singleCRC = 0; - singleMD5 = 0; - singleSHA1 = 0; - singleNodump = 0; + totalSize += datdata.TotalSize; + totalGame += newroms.Count; + totalRom += datdata.RomCount; + totalDisk += datdata.DiskCount; + totalCRC += datdata.CRCCount; + totalMD5 += datdata.MD5Count; + totalSHA1 += datdata.SHA1Count; + totalNodump += datdata.SHA1Count; } // Output total DAT stats diff --git a/SabreHelper/Data/Structs.cs b/SabreHelper/Data/Structs.cs index 555cf4b2..5b3309df 100644 --- a/SabreHelper/Data/Structs.cs +++ b/SabreHelper/Data/Structs.cs @@ -61,5 +61,14 @@ namespace SabreTools.Helper public bool GameName; public bool Romba; public bool TSV; // tab-deliminated output + + // Statistical data related to the DAT + public long RomCount; + public long DiskCount; + public long TotalSize; + public long CRCCount; + public long MD5Count; + public long SHA1Count; + public long NodumpCount; } } diff --git a/SabreHelper/RomManipulation.cs b/SabreHelper/RomManipulation.cs index dc579f48..36bac943 100644 --- a/SabreHelper/RomManipulation.cs +++ b/SabreHelper/RomManipulation.cs @@ -295,6 +295,15 @@ namespace SabreTools.Helper templist.Add(rom); datdata.Roms.Add(key, templist); } + + // Add statistical data + datdata.RomCount += (rom.Type == "rom" ? 1 : 0); + datdata.DiskCount += (rom.Type == "disk" ? 1 : 0); + datdata.TotalSize += rom.Size; + datdata.CRCCount += (String.IsNullOrEmpty(rom.CRC) ? 0 : 1); + datdata.MD5Count += (String.IsNullOrEmpty(rom.MD5) ? 0 : 1); + datdata.SHA1Count += (String.IsNullOrEmpty(rom.SHA1) ? 0 : 1); + datdata.NodumpCount += (rom.Nodump ? 1 : 0); } // If the line is anything but a rom or disk and we're in a block else if (Regex.IsMatch(line, Constants.ItemPatternCMP) && block) @@ -529,6 +538,15 @@ namespace SabreTools.Helper templist.Add(rom); datdata.Roms.Add(key, templist); } + + // Add statistical data + datdata.RomCount += (rom.Type == "rom" ? 1 : 0); + datdata.DiskCount += (rom.Type == "disk" ? 1 : 0); + datdata.TotalSize += rom.Size; + datdata.CRCCount += (String.IsNullOrEmpty(rom.CRC) ? 0 : 1); + datdata.MD5Count += (String.IsNullOrEmpty(rom.MD5) ? 0 : 1); + datdata.SHA1Count += (String.IsNullOrEmpty(rom.SHA1) ? 0 : 1); + datdata.NodumpCount += (rom.Nodump ? 1 : 0); } } } @@ -595,6 +613,15 @@ namespace SabreTools.Helper temp.Add(rom); datdata.Roms.Add(key, temp); } + + // Add statistical data + datdata.RomCount += (rom.Type == "rom" ? 1 : 0); + datdata.DiskCount += (rom.Type == "disk" ? 1 : 0); + datdata.TotalSize += rom.Size; + datdata.CRCCount += (String.IsNullOrEmpty(rom.CRC) ? 0 : 1); + datdata.MD5Count += (String.IsNullOrEmpty(rom.MD5) ? 0 : 1); + datdata.SHA1Count += (String.IsNullOrEmpty(rom.SHA1) ? 0 : 1); + datdata.NodumpCount += (rom.Nodump ? 1 : 0); } // Regardless, end the current folder @@ -1005,7 +1032,7 @@ namespace SabreTools.Helper // Get the new values to add key = size + "-" + crc; - RomData value = new RomData + RomData rom = new RomData { Game = tempname, Name = subreader.GetAttribute("name"), @@ -1023,14 +1050,23 @@ namespace SabreTools.Helper if (datdata.Roms.ContainsKey(key)) { - datdata.Roms[key].Add(value); + datdata.Roms[key].Add(rom); } else { List newvalue = new List(); - newvalue.Add(value); + newvalue.Add(rom); datdata.Roms.Add(key, newvalue); } + + // Add statistical data + datdata.RomCount += (rom.Type == "rom" ? 1 : 0); + datdata.DiskCount += (rom.Type == "disk" ? 1 : 0); + datdata.TotalSize += rom.Size; + datdata.CRCCount += (String.IsNullOrEmpty(rom.CRC) ? 0 : 1); + datdata.MD5Count += (String.IsNullOrEmpty(rom.MD5) ? 0 : 1); + datdata.SHA1Count += (String.IsNullOrEmpty(rom.SHA1) ? 0 : 1); + datdata.NodumpCount += (rom.Nodump ? 1 : 0); } // Otherwise, log that it wasn't added else @@ -1072,6 +1108,15 @@ namespace SabreTools.Helper temp.Add(rom); datdata.Roms.Add(key, temp); } + + // Add statistical data + datdata.RomCount += (rom.Type == "rom" ? 1 : 0); + datdata.DiskCount += (rom.Type == "disk" ? 1 : 0); + datdata.TotalSize += rom.Size; + datdata.CRCCount += (String.IsNullOrEmpty(rom.CRC) ? 0 : 1); + datdata.MD5Count += (String.IsNullOrEmpty(rom.MD5) ? 0 : 1); + datdata.SHA1Count += (String.IsNullOrEmpty(rom.SHA1) ? 0 : 1); + datdata.NodumpCount += (rom.Nodump ? 1 : 0); } // Regardless, end the current folder @@ -1224,7 +1269,7 @@ namespace SabreTools.Helper // Get the new values to add key = size + "-" + crc; - RomData value = new RomData + RomData rom = new RomData { Game = tempname, Name = xtr.GetAttribute("name"), @@ -1242,14 +1287,23 @@ namespace SabreTools.Helper if (datdata.Roms.ContainsKey(key)) { - datdata.Roms[key].Add(value); + datdata.Roms[key].Add(rom); } else { List newvalue = new List(); - newvalue.Add(value); + newvalue.Add(rom); datdata.Roms.Add(key, newvalue); } + + // Add statistical data + datdata.RomCount += (rom.Type == "rom" ? 1 : 0); + datdata.DiskCount += (rom.Type == "disk" ? 1 : 0); + datdata.TotalSize += rom.Size; + datdata.CRCCount += (String.IsNullOrEmpty(rom.CRC) ? 0 : 1); + datdata.MD5Count += (String.IsNullOrEmpty(rom.MD5) ? 0 : 1); + datdata.SHA1Count += (String.IsNullOrEmpty(rom.SHA1) ? 0 : 1); + datdata.NodumpCount += (rom.Nodump ? 1 : 0); } xtr.Read(); break; @@ -1333,6 +1387,12 @@ namespace SabreTools.Helper ); } + // Special logging to remove later + if (rom.SHA1 == lastrom.SHA1 && rom.Size != lastrom.Size) + { + logger.User("Rom SHA-1: " + lastrom.SHA1 + "\nRom Size: " + lastrom.Size + "\nNew Rom size: " + rom.Size); + } + // If it's a duplicate, skip adding it to the output but add any missing information if (dupefound) {