using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; #if NET40_OR_GREATER || NETCOREAPP using System.Threading.Tasks; #endif using SabreTools.Core; using SabreTools.DatFiles; using SabreTools.DatItems; using SabreTools.DatItems.Formats; using SabreTools.IO; using SabreTools.Logging; using SabreTools.Matching; namespace SabreTools.DatTools { /// /// Helper methods for splitting DatFiles /// /// TODO: Implement Level split public class Splitter { #region Logging /// /// Logging object /// private static readonly Logger logger = new(); #endregion /// /// Split a DAT by input extensions /// /// Current DatFile object to split /// List of extensions to split on (first DAT) /// List of extensions to split on (second DAT) /// Extension Set A and Extension Set B DatFiles public static (DatFile? extADat, DatFile? extBDat) SplitByExtension(DatFile datFile, List extA, List extB) { // If roms is empty, return false if (datFile.Items.TotalCount == 0) return (null, null); InternalStopwatch watch = new($"Splitting DAT by extension"); // Make sure all of the extensions don't have a dot at the beginning var newExtA = extA.Select(s => s.TrimStart('.').ToLowerInvariant()).ToArray(); string newExtAString = string.Join(",", newExtA); var newExtB = extB.Select(s => s.TrimStart('.').ToLowerInvariant()).ToArray(); string newExtBString = string.Join(",", newExtB); // Set all of the appropriate outputs for each of the subsets DatFile extADat = DatFile.Create(datFile.Header.CloneStandard()); extADat.Header.FileName += $" ({newExtAString})"; extADat.Header.Name += $" ({newExtAString})"; extADat.Header.Description += $" ({newExtAString})"; DatFile extBDat = DatFile.Create(datFile.Header.CloneStandard()); extBDat.Header.FileName += $" ({newExtBString})"; extBDat.Header.Name += $" ({newExtBString})"; extBDat.Header.Description += $" ({newExtBString})"; // Now separate the roms accordingly #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { if (newExtA.Contains((item.GetName() ?? string.Empty).GetNormalizedExtension())) { extADat.Items.Add(key, item); } else if (newExtB.Contains((item.GetName() ?? string.Empty).GetNormalizedExtension())) { extBDat.Items.Add(key, item); } else { extADat.Items.Add(key, item); extBDat.Items.Add(key, item); } } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Then return both DatFiles watch.Stop(); return (extADat, extBDat); } /// /// Split a DAT by best available hashes /// /// Current DatFile object to split /// Dictionary of Field to DatFile mappings public static Dictionary SplitByHash(DatFile datFile) { // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by best available hashes"); // Create the set of field-to-dat mappings Dictionary fieldDats = []; // TODO: Can this be made into a loop? fieldDats[Models.Metadata.Rom.StatusKey] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.StatusKey].Header.FileName += " (Nodump)"; fieldDats[Models.Metadata.Rom.StatusKey].Header.Name += " (Nodump)"; fieldDats[Models.Metadata.Rom.StatusKey].Header.Description += " (Nodump)"; fieldDats[Models.Metadata.Rom.SHA512Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.SHA512Key].Header.FileName += " (SHA-512)"; fieldDats[Models.Metadata.Rom.SHA512Key].Header.Name += " (SHA-512)"; fieldDats[Models.Metadata.Rom.SHA512Key].Header.Description += " (SHA-512)"; fieldDats[Models.Metadata.Rom.SHA384Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.SHA384Key].Header.FileName += " (SHA-384)"; fieldDats[Models.Metadata.Rom.SHA384Key].Header.Name += " (SHA-384)"; fieldDats[Models.Metadata.Rom.SHA384Key].Header.Description += " (SHA-384)"; fieldDats[Models.Metadata.Rom.SHA256Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.SHA256Key].Header.FileName += " (SHA-256)"; fieldDats[Models.Metadata.Rom.SHA256Key].Header.Name += " (SHA-256)"; fieldDats[Models.Metadata.Rom.SHA256Key].Header.Description += " (SHA-256)"; fieldDats[Models.Metadata.Rom.SHA1Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.SHA1Key].Header.FileName += " (SHA-1)"; fieldDats[Models.Metadata.Rom.SHA1Key].Header.Name += " (SHA-1)"; fieldDats[Models.Metadata.Rom.SHA1Key].Header.Description += " (SHA-1)"; fieldDats[Models.Metadata.Rom.MD5Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.MD5Key].Header.FileName += " (MD5)"; fieldDats[Models.Metadata.Rom.MD5Key].Header.Name += " (MD5)"; fieldDats[Models.Metadata.Rom.MD5Key].Header.Description += " (MD5)"; fieldDats[Models.Metadata.Rom.CRCKey] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[Models.Metadata.Rom.CRCKey].Header.FileName += " (CRC)"; fieldDats[Models.Metadata.Rom.CRCKey].Header.Name += " (CRC)"; fieldDats[Models.Metadata.Rom.CRCKey].Header.Description += " (CRC)"; fieldDats["null"] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats["null"].Header.FileName += " (Other)"; fieldDats["null"].Header.Name += " (Other)"; fieldDats["null"].Header.Description += " (Other)"; // Now populate each of the DAT objects in turn #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { // If the file is not a Disk, Media, or Rom, continue if (item.ItemType != ItemType.Disk && item.ItemType != ItemType.Media && item.ItemType != ItemType.Rom) continue; // If the file is a nodump if ((item.ItemType == ItemType.Rom && (item as Rom)!.GetFieldValue(Models.Metadata.Rom.StatusKey) == ItemStatus.Nodump) || (item.ItemType == ItemType.Disk && (item as Disk)!.GetFieldValue(Models.Metadata.Disk.StatusKey) == ItemStatus.Nodump)) { fieldDats[Models.Metadata.Rom.StatusKey].Items.Add(key, item); } // If the file has a SHA-512 else if ((item.ItemType == ItemType.Rom && !string.IsNullOrEmpty((item as Rom)!.GetFieldValue(Models.Metadata.Rom.SHA512Key)))) { fieldDats[Models.Metadata.Rom.SHA512Key].Items.Add(key, item); } // If the file has a SHA-384 else if ((item.ItemType == ItemType.Rom && !string.IsNullOrEmpty((item as Rom)!.GetFieldValue(Models.Metadata.Rom.SHA384Key)))) { fieldDats[Models.Metadata.Rom.SHA384Key].Items.Add(key, item); } // If the file has a SHA-256 else if ((item.ItemType == ItemType.Media && !string.IsNullOrEmpty((item as Media)!.GetFieldValue(Models.Metadata.Media.SHA256Key))) || (item.ItemType == ItemType.Rom && !string.IsNullOrEmpty((item as Rom)!.GetFieldValue(Models.Metadata.Rom.SHA256Key)))) { fieldDats[Models.Metadata.Rom.SHA256Key].Items.Add(key, item); } // If the file has a SHA-1 else if ((item.ItemType == ItemType.Disk && !string.IsNullOrEmpty((item as Disk)!.GetFieldValue(Models.Metadata.Disk.SHA1Key))) || (item.ItemType == ItemType.Media && !string.IsNullOrEmpty((item as Media)!.GetFieldValue(Models.Metadata.Media.SHA1Key))) || (item.ItemType == ItemType.Rom && !string.IsNullOrEmpty((item as Rom)!.GetFieldValue(Models.Metadata.Rom.SHA1Key)))) { fieldDats[Models.Metadata.Rom.SHA1Key].Items.Add(key, item); } // If the file has an MD5 else if ((item.ItemType == ItemType.Disk && !string.IsNullOrEmpty((item as Disk)!.GetFieldValue(Models.Metadata.Disk.MD5Key))) || (item.ItemType == ItemType.Media && !string.IsNullOrEmpty((item as Media)!.GetFieldValue(Models.Metadata.Media.MD5Key))) || (item.ItemType == ItemType.Rom && !string.IsNullOrEmpty((item as Rom)!.GetFieldValue(Models.Metadata.Rom.MD5Key)))) { fieldDats[Models.Metadata.Rom.MD5Key].Items.Add(key, item); } // If the file has a CRC else if ((item.ItemType == ItemType.Rom && !string.IsNullOrEmpty((item as Rom)!.GetFieldValue(Models.Metadata.Rom.CRCKey)))) { fieldDats[Models.Metadata.Rom.CRCKey].Items.Add(key, item); } else { fieldDats["null"].Items.Add(key, item); } } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return fieldDats; } /// /// Split a SuperDAT by lowest available directory level /// /// Current DatFile object to split /// Name of the directory to write the DATs out to /// True if short names should be used, false otherwise /// True if original filenames should be used as the base for output filename, false otherwise /// True if split succeeded, false otherwise public static bool SplitByLevel(DatFile datFile, string outDir, bool shortname, bool basedat) { InternalStopwatch watch = new($"Splitting DAT by level"); // First, bucket by games so that we can do the right thing datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, lower: false, norename: true); // Create a temporary DAT to add things to DatFile tempDat = DatFile.Create(datFile.Header); tempDat.Header.Name = null; // Sort the input keys List keys = [.. datFile.Items.Keys]; keys.Sort(SplitByLevelSort); // Then, we loop over the games #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(keys, key => #else foreach (var key in keys) #endif { // Here, the key is the name of the game to be used for comparison if (tempDat.Header.Name != null && tempDat.Header.Name != Path.GetDirectoryName(key)) { // Reset the DAT for the next items tempDat = DatFile.Create(datFile.Header); tempDat.Header.Name = null; } // Clean the input list and set all games to be pathless ConcurrentList? items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif items.ForEach(item => item.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, Path.GetFileName(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)))); items.ForEach(item => item.Machine.SetFieldValue(Models.Metadata.Machine.DescriptionKey, Path.GetFileName(item.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)))); // Now add the game to the output DAT tempDat.Items.AddRange(key, items); // Then set the DAT name to be the parent directory name tempDat.Header.Name = Path.GetDirectoryName(key); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return true; } /// /// Helper function for SplitByLevel to sort the input game names /// /// First string to compare /// Second string to compare /// -1 for a coming before b, 0 for a == b, 1 for a coming after b private static int SplitByLevelSort(string a, string b) { NaturalComparer nc = new(); int adeep = a.Count(c => c == '/' || c == '\\'); int bdeep = b.Count(c => c == '/' || c == '\\'); if (adeep == bdeep) return nc.Compare(a, b); return adeep - bdeep; } /// /// Helper function for SplitByLevel to clean and write out a DAT /// /// Current DatFile object to split /// DAT to clean and write out /// Directory to write out to /// True if short naming scheme should be used, false otherwise /// True if original filenames should be used as the base for output filename, false otherwise private static void SplitByLevelHelper(DatFile datFile, DatFile newDatFile, string outDir, bool shortname, bool restore) { // Get the name from the DAT to use separately string? name = newDatFile.Header.Name; string? expName = name?.Replace("/", " - ")?.Replace("\\", " - "); // Now set the new output values #if NET20 || NET35 newDatFile.Header.FileName = string.IsNullOrEmpty(name) ? datFile.Header.FileName : (shortname ? Path.GetFileName(name) : expName ); #else newDatFile.Header.FileName = WebUtility.HtmlDecode(string.IsNullOrEmpty(name) ? datFile.Header.FileName : (shortname ? Path.GetFileName(name) : expName ) ); #endif newDatFile.Header.FileName = restore ? $"{datFile.Header.FileName} ({newDatFile.Header.FileName})" : newDatFile.Header.FileName; newDatFile.Header.Name = $"{datFile.Header.Name} ({expName})"; newDatFile.Header.Description = string.IsNullOrEmpty(datFile.Header.Description) ? newDatFile.Header.Name : $"{datFile.Header.Description} ({expName})"; newDatFile.Header.Type = null; // Write out the temporary DAT to the proper directory Writer.Write(newDatFile, outDir); } /// /// Split a DAT by size of Rom /// /// Current DatFile object to split /// Long value representing the split point /// Less Than and Greater Than DatFiles public static (DatFile lessThan, DatFile greaterThan) SplitBySize(DatFile datFile, long radix) { // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by size"); DatFile lessThan = DatFile.Create(datFile.Header.CloneStandard()); lessThan.Header.FileName += $" (less than {radix})"; lessThan.Header.Name += $" (less than {radix})"; lessThan.Header.Description += $" (less than {radix})"; DatFile greaterThan = DatFile.Create(datFile.Header.CloneStandard()); greaterThan.Header.FileName += $" (equal-greater than {radix})"; greaterThan.Header.Name += $" (equal-greater than {radix})"; greaterThan.Header.Description += $" (equal-greater than {radix})"; // Now populate each of the DAT objects in turn #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { ConcurrentList? items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { // If the file is not a Rom, it automatically goes in the "lesser" dat if (item.ItemType != ItemType.Rom) lessThan.Items.Add(key, item); // If the file is a Rom and has no size, put it in the "lesser" dat else if (item.ItemType == ItemType.Rom && (item as Rom)!.GetFieldValue(Models.Metadata.Rom.SizeKey) == null) lessThan.Items.Add(key, item); // If the file is a Rom and less than the radix, put it in the "lesser" dat else if (item.ItemType == ItemType.Rom && (item as Rom)!.GetFieldValue(Models.Metadata.Rom.SizeKey) < radix) lessThan.Items.Add(key, item); // If the file is a Rom and greater than or equal to the radix, put it in the "greater" dat else if (item.ItemType == ItemType.Rom && (item as Rom)!.GetFieldValue(Models.Metadata.Rom.SizeKey) >= radix) greaterThan.Items.Add(key, item); } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Then return both DatFiles watch.Stop(); return (lessThan, greaterThan); } /// /// Split a DAT by size of Rom /// /// Current DatFile object to split /// Long value representing the total size to split at /// Less Than and Greater Than DatFiles public static List SplitByTotalSize(DatFile datFile, long chunkSize) { // If the size is invalid, just return if (chunkSize <= 0) return []; // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by total size"); // Sort the DatFile by machine name datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None); // Get the keys in a known order for easier sorting var keys = datFile.Items.SortedKeys; // Get the output list List datFiles = []; // Initialize everything long currentSize = 0; long currentIndex = 0; DatFile currentDat = DatFile.Create(datFile.Header.CloneStandard()); currentDat.Header.FileName += $"_{currentIndex}"; currentDat.Header.Name += $"_{currentIndex}"; currentDat.Header.Description += $"_{currentIndex}"; // Loop through each machine foreach (string machine in keys) { // Get the current machine var items = datFile.Items[machine]; if (items == null || !items.Any()) { logger.Error($"{machine} contains no items and will be skipped"); continue; } // Get the total size of the current machine long machineSize = 0; foreach (var item in items) { if (item is Rom rom) { // TODO: Should there be more than just a log if a single item is larger than the chunksize? machineSize += rom.GetFieldValue(Models.Metadata.Rom.SizeKey) ?? 0; if ((rom.GetFieldValue(Models.Metadata.Rom.SizeKey) ?? 0) > chunkSize) logger.Error($"{rom.GetName() ?? string.Empty} in {machine} is larger than {chunkSize}"); } } // If the current machine size is greater than the chunk size by itself, we want to log and skip // TODO: Should this eventually try to split the machine here? if (machineSize > chunkSize) { logger.Error($"{machine} is larger than {chunkSize} and will be skipped"); continue; } // If the current machine size makes the current DatFile too big, split else if (currentSize + machineSize > chunkSize) { datFiles.Add(currentDat); currentSize = 0; currentIndex++; currentDat = DatFile.Create(datFile.Header.CloneStandard()); currentDat.Header.FileName += $"_{currentIndex}"; currentDat.Header.Name += $"_{currentIndex}"; currentDat.Header.Description += $"_{currentIndex}"; } // Add the current machine to the current DatFile currentDat.Items[machine] = items; currentSize += machineSize; } // Add the final DatFile to the list datFiles.Add(currentDat); // Then return the list watch.Stop(); return datFiles; } /// /// Split a DAT by type of DatItem /// /// Current DatFile object to split /// Dictionary of ItemType to DatFile mappings public static Dictionary SplitByType(DatFile datFile) { // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by item type"); // Create the set of type-to-dat mappings Dictionary typeDats = []; // We only care about a subset of types List outputTypes = [ ItemType.Disk, ItemType.Media, ItemType.Rom, ItemType.Sample, ]; // Setup all of the DatFiles foreach (ItemType itemType in outputTypes) { typeDats[itemType] = DatFile.Create(datFile.Header.CloneStandard()); typeDats[itemType].Header.FileName += $" ({itemType})"; typeDats[itemType].Header.Name += $" ({itemType})"; typeDats[itemType].Header.Description += $" ({itemType})"; } // Now populate each of the DAT objects in turn #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(outputTypes, Globals.ParallelOptions, itemType => #elif NET40_OR_GREATER Parallel.ForEach(outputTypes, itemType => #else foreach (var itemType in outputTypes) #endif { FillWithItemType(datFile, typeDats[itemType], itemType); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return typeDats; } /// /// Fill a DatFile with all items with a particular ItemType /// /// Current DatFile object to split /// DatFile to add found items to /// ItemType to retrieve items for /// DatFile containing all items with the ItemType/returns> private static void FillWithItemType(DatFile datFile, DatFile indexDat, ItemType itemType) { // Loop through and add the items for this index to the output #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { ConcurrentList items = DatItem.Merge(datFile.Items[key]); // If the rom list is empty or null, just skip it if (items == null || items.Count == 0) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { if (item.ItemType == itemType) indexDat.Items.Add(key, item); } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } } }