using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; #if NET40_OR_GREATER || NETCOREAPP using System.Threading.Tasks; #endif using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.DatFiles; using SabreTools.DatItems; using SabreTools.DatItems.Formats; using SabreTools.IO.Extensions; using SabreTools.IO.Logging; using SabreTools.Matching.Compare; namespace SabreTools.DatTools { /// /// Helper methods for splitting DatFiles /// /// TODO: Implement Level split public class Splitter { #region Logging /// /// Logging object /// private static readonly Logger logger = new(); #endregion /// /// Split a DAT by input extensions /// /// Current DatFile object to split /// List of extensions to split on (first DAT) /// List of extensions to split on (second DAT) /// Extension Set A and Extension Set B DatFiles public static (DatFile? extADat, DatFile? extBDat) SplitByExtension(DatFile datFile, List extA, List extB) { // If roms is empty, return false if (datFile.Items.DatStatistics.TotalCount == 0) return (null, null); InternalStopwatch watch = new($"Splitting DAT by extension"); // Make sure all of the extensions don't have a dot at the beginning var newExtA = extA.Select(s => s.TrimStart('.').ToLowerInvariant()).ToArray(); string newExtAString = string.Join(",", newExtA); var newExtB = extB.Select(s => s.TrimStart('.').ToLowerInvariant()).ToArray(); string newExtBString = string.Join(",", newExtB); // Set all of the appropriate outputs for each of the subsets DatFile extADat = DatFile.Create(datFile.Header.CloneStandard()); extADat.Header.SetFieldValue(DatHeader.FileNameKey, extADat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtAString})"); extADat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtAString})"); extADat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtAString})"); DatFile extBDat = DatFile.Create(datFile.Header.CloneStandard()); extBDat.Header.SetFieldValue(DatHeader.FileNameKey, extBDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtBString})"); extBDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtBString})"); extBDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtBString})"); // Now separate the roms accordingly #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { if (newExtA.Contains((item.GetName() ?? string.Empty).GetNormalizedExtension())) { extADat.Items.Add(key, item); } else if (newExtB.Contains((item.GetName() ?? string.Empty).GetNormalizedExtension())) { extBDat.Items.Add(key, item); } else { extADat.Items.Add(key, item); extBDat.Items.Add(key, item); } } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Then return both DatFiles watch.Stop(); return (extADat, extBDat); } /// /// Split a DAT by input extensions /// /// Current DatFile object to split /// List of extensions to split on (first DAT) /// List of extensions to split on (second DAT) /// Extension Set A and Extension Set B DatFiles public static (DatFile? extADat, DatFile? extBDat) SplitByExtensionDB(DatFile datFile, List extA, List extB) { // If roms is empty, return false if (datFile.ItemsDB.DatStatistics.TotalCount == 0) return (null, null); InternalStopwatch watch = new($"Splitting DAT by extension"); // Make sure all of the extensions don't have a dot at the beginning var newExtA = extA.Select(s => s.TrimStart('.').ToLowerInvariant()).ToArray(); string newExtAString = string.Join(",", newExtA); var newExtB = extB.Select(s => s.TrimStart('.').ToLowerInvariant()).ToArray(); string newExtBString = string.Join(",", newExtB); // Set all of the appropriate outputs for each of the subsets DatFile extADat = DatFile.Create(datFile.Header.CloneStandard()); extADat.Header.SetFieldValue(DatHeader.FileNameKey, extADat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtAString})"); extADat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtAString})"); extADat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtAString})"); DatFile extBDat = DatFile.Create(datFile.Header.CloneStandard()); extBDat.Header.SetFieldValue(DatHeader.FileNameKey, extBDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtBString})"); extBDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtBString})"); extBDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtBString})"); // Get all current items, machines, and mappings var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2); var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2); var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2); var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2); var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2); // Create mappings from old index to new index var machineRemapping = new Dictionary(); var sourceRemapping = new Dictionary(); // Loop through and add all sources foreach (var source in sources) { long newSourceIndex = extADat.ItemsDB.AddSource(source.Value); _ = extBDat.ItemsDB.AddSource(source.Value); sourceRemapping[source.Key] = newSourceIndex; } // Loop through and add all machines foreach (var machine in machines) { long newMachineIndex = extADat.ItemsDB.AddMachine(machine.Value); _ = extBDat.ItemsDB.AddMachine(machine.Value); machineRemapping[machine.Key] = newMachineIndex; } // Loop through and add the items #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datItems, Globals.ParallelOptions, item => #elif NET40_OR_GREATER Parallel.ForEach(datItems, item => #else foreach (var item in datItems) #endif { // Get the machine and source index for this item long machineIndex = itemMachineMappings[item.Key]; long sourceIndex = itemSourceMappings[item.Key]; if (newExtA.Contains((item.Value.GetName() ?? string.Empty).GetNormalizedExtension())) { extADat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); } else if (newExtB.Contains((item.Value.GetName() ?? string.Empty).GetNormalizedExtension())) { extBDat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); } else { extADat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); extBDat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Then return both DatFiles watch.Stop(); return (extADat, extBDat); } /// /// Split a DAT by best available hashes /// /// Current DatFile object to split /// Dictionary of Field to DatFile mappings public static Dictionary SplitByHash(DatFile datFile) { // Create each of the respective output DATs var watch = new InternalStopwatch($"Splitting DAT by best available hashes"); // Create mapping of keys to suffixes var mappings = new Dictionary { [Models.Metadata.Rom.StatusKey] = " (Nodump)", [Models.Metadata.Rom.SHA512Key] = " (SHA-512)", [Models.Metadata.Rom.SHA384Key] = " (SHA-384)", [Models.Metadata.Rom.SHA256Key] = " (SHA-256)", [Models.Metadata.Rom.SHA1Key] = " (SHA-1)", [Models.Metadata.Rom.MD5Key] = " (MD5)", [Models.Metadata.Rom.CRCKey] = " (CRC)", ["null"] = " (Other)", }; // Create the set of field-to-dat mappings Dictionary fieldDats = []; foreach (var kvp in mappings) { fieldDats[kvp.Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[kvp.Key].Header.SetFieldValue(DatHeader.FileNameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(DatHeader.FileNameKey) + kvp.Value); fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.NameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + kvp.Value); fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + kvp.Value); } // Now populate each of the DAT objects in turn #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { // If the file is not a Disk, Media, or Rom, continue switch (item) { case Disk disk: if (disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump) fieldDats[Models.Metadata.Disk.StatusKey].Items.Add(key, item); else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key))) fieldDats[Models.Metadata.Disk.SHA1Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key))) fieldDats[Models.Metadata.Disk.MD5Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key))) fieldDats[Models.Metadata.Disk.MD5Key].Items.Add(key, item); else fieldDats["null"].Items.Add(key, item); break; case Media media: if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA256Key))) fieldDats[Models.Metadata.Media.SHA256Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA1Key))) fieldDats[Models.Metadata.Media.SHA1Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.MD5Key))) fieldDats[Models.Metadata.Media.MD5Key].Items.Add(key, item); else fieldDats["null"].Items.Add(key, item); break; case Rom rom: if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump) fieldDats[Models.Metadata.Rom.StatusKey].Items.Add(key, item); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA512Key))) fieldDats[Models.Metadata.Rom.SHA512Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA384Key))) fieldDats[Models.Metadata.Rom.SHA384Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA256Key))) fieldDats[Models.Metadata.Rom.SHA256Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA1Key))) fieldDats[Models.Metadata.Rom.SHA1Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.MD5Key))) fieldDats[Models.Metadata.Rom.MD5Key].Items.Add(key, item); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey))) fieldDats[Models.Metadata.Rom.CRCKey].Items.Add(key, item); else fieldDats["null"].Items.Add(key, item); break; default: continue; } } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return fieldDats; } /// /// Split a DAT by best available hashes /// /// Current DatFile object to split /// Dictionary of Field to DatFile mappings public static Dictionary SplitByHashDB(DatFile datFile) { // Create each of the respective output DATs var watch = new InternalStopwatch($"Splitting DAT by best available hashes"); // Create mapping of keys to suffixes var mappings = new Dictionary { [Models.Metadata.Rom.StatusKey] = " (Nodump)", [Models.Metadata.Rom.SHA512Key] = " (SHA-512)", [Models.Metadata.Rom.SHA384Key] = " (SHA-384)", [Models.Metadata.Rom.SHA256Key] = " (SHA-256)", [Models.Metadata.Rom.SHA1Key] = " (SHA-1)", [Models.Metadata.Rom.MD5Key] = " (MD5)", [Models.Metadata.Rom.CRCKey] = " (CRC)", ["null"] = " (Other)", }; // Create the set of field-to-dat mappings Dictionary fieldDats = []; foreach (var kvp in mappings) { fieldDats[kvp.Key] = DatFile.Create(datFile.Header.CloneStandard()); fieldDats[kvp.Key].Header.SetFieldValue(DatHeader.FileNameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(DatHeader.FileNameKey) + kvp.Value); fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.NameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + kvp.Value); fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + kvp.Value); } // Get all current items, machines, and mappings var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2); var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2); var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2); var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2); var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2); // Create mappings from old index to new index var machineRemapping = new Dictionary(); var sourceRemapping = new Dictionary(); // Loop through and add all sources foreach (var source in sources) { long newSourceIndex = fieldDats[Models.Metadata.Rom.StatusKey].ItemsDB.AddSource(source.Value); sourceRemapping[source.Key] = newSourceIndex; _ = fieldDats[Models.Metadata.Rom.SHA512Key].ItemsDB.AddSource(source.Value); _ = fieldDats[Models.Metadata.Rom.SHA384Key].ItemsDB.AddSource(source.Value); _ = fieldDats[Models.Metadata.Rom.SHA256Key].ItemsDB.AddSource(source.Value); _ = fieldDats[Models.Metadata.Rom.SHA1Key].ItemsDB.AddSource(source.Value); _ = fieldDats[Models.Metadata.Rom.MD5Key].ItemsDB.AddSource(source.Value); _ = fieldDats[Models.Metadata.Rom.CRCKey].ItemsDB.AddSource(source.Value); _ = fieldDats["null"].ItemsDB.AddSource(source.Value); } // Loop through and add all machines foreach (var machine in machines) { long newMachineIndex = fieldDats[Models.Metadata.Rom.StatusKey].ItemsDB.AddMachine(machine.Value); _ = fieldDats[Models.Metadata.Rom.SHA512Key].ItemsDB.AddMachine(machine.Value); _ = fieldDats[Models.Metadata.Rom.SHA384Key].ItemsDB.AddMachine(machine.Value); _ = fieldDats[Models.Metadata.Rom.SHA256Key].ItemsDB.AddMachine(machine.Value); _ = fieldDats[Models.Metadata.Rom.SHA1Key].ItemsDB.AddMachine(machine.Value); _ = fieldDats[Models.Metadata.Rom.MD5Key].ItemsDB.AddMachine(machine.Value); _ = fieldDats[Models.Metadata.Rom.CRCKey].ItemsDB.AddMachine(machine.Value); _ = fieldDats["null"].ItemsDB.AddMachine(machine.Value); machineRemapping[machine.Key] = newMachineIndex; } // Loop through and add the items #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datItems, Globals.ParallelOptions, item => #elif NET40_OR_GREATER Parallel.ForEach(datItems, item => #else foreach (var item in datItems) #endif { // Get the machine and source index for this item long machineIndex = itemMachineMappings[item.Key]; long sourceIndex = itemSourceMappings[item.Key]; // Only process Disk, Media, and Rom switch (item.Value) { case Disk disk: if (disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump) fieldDats[Models.Metadata.Disk.StatusKey].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key))) fieldDats[Models.Metadata.Disk.SHA1Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key))) fieldDats[Models.Metadata.Disk.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key))) fieldDats[Models.Metadata.Disk.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else fieldDats["null"].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); break; case Media media: if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA256Key))) fieldDats[Models.Metadata.Media.SHA256Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA1Key))) fieldDats[Models.Metadata.Media.SHA1Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.MD5Key))) fieldDats[Models.Metadata.Media.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else fieldDats["null"].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); break; case Rom rom: if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump) fieldDats[Models.Metadata.Rom.StatusKey].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA512Key))) fieldDats[Models.Metadata.Rom.SHA512Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA384Key))) fieldDats[Models.Metadata.Rom.SHA384Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA256Key))) fieldDats[Models.Metadata.Rom.SHA256Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA1Key))) fieldDats[Models.Metadata.Rom.SHA1Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.MD5Key))) fieldDats[Models.Metadata.Rom.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey))) fieldDats[Models.Metadata.Rom.CRCKey].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); else fieldDats["null"].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); break; default: #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return fieldDats; } /// /// Split a SuperDAT by lowest available directory level /// /// Current DatFile object to split /// Name of the directory to write the DATs out to /// True if short names should be used, false otherwise /// True if original filenames should be used as the base for output filename, false otherwise /// True if split succeeded, false otherwise public static bool SplitByLevel(DatFile datFile, string outDir, bool shortname, bool basedat) { InternalStopwatch watch = new($"Splitting DAT by level"); // First, bucket by games so that we can do the right thing datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, lower: false, norename: true); // Create a temporary DAT to add things to DatFile tempDat = DatFile.Create(datFile.Header); tempDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, null); // Sort the input keys List keys = [.. datFile.Items.Keys]; keys.Sort(SplitByLevelSort); // Then, we loop over the games #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(keys, key => #else foreach (var key in keys) #endif { // Here, the key is the name of the game to be used for comparison if (tempDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) != null && tempDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) != Path.GetDirectoryName(key)) { // Reset the DAT for the next items tempDat = DatFile.Create(datFile.Header); tempDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, null); } // Clean the input list and set all games to be pathless ConcurrentList? items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif items.ForEach(item => item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, Path.GetFileName(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)))); items.ForEach(item => item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, Path.GetFileName(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey)))); // Now add the game to the output DAT tempDat.Items.AddRange(key, items); // Then set the DAT name to be the parent directory name tempDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, Path.GetDirectoryName(key)); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return true; } /// /// Helper function for SplitByLevel to sort the input game names /// /// First string to compare /// Second string to compare /// -1 for a coming before b, 0 for a == b, 1 for a coming after b private static int SplitByLevelSort(string a, string b) { NaturalComparer nc = new(); int adeep = a.Count(c => c == '/' || c == '\\'); int bdeep = b.Count(c => c == '/' || c == '\\'); if (adeep == bdeep) return nc.Compare(a, b); return adeep - bdeep; } /// /// Helper function for SplitByLevel to clean and write out a DAT /// /// Current DatFile object to split /// DAT to clean and write out /// Directory to write out to /// True if short naming scheme should be used, false otherwise /// True if original filenames should be used as the base for output filename, false otherwise private static void SplitByLevelHelper(DatFile datFile, DatFile newDatFile, string outDir, bool shortname, bool restore) { // Get the name from the DAT to use separately string? name = newDatFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey); string? expName = name?.Replace("/", " - ")?.Replace("\\", " - "); // Now set the new output values #if NET20 || NET35 newDatFile.Header.SetFieldValue(DatHeader.FileNameKey, string.IsNullOrEmpty(name) ? datFile.Header.GetStringFieldValue(DatHeader.FileNameKey) : (shortname ? Path.GetFileName(name) : expName )); #else newDatFile.Header.SetFieldValue(DatHeader.FileNameKey, WebUtility.HtmlDecode(string.IsNullOrEmpty(name) ? datFile.Header.GetStringFieldValue(DatHeader.FileNameKey) : (shortname ? Path.GetFileName(name) : expName ) )); #endif newDatFile.Header.SetFieldValue(DatHeader.FileNameKey, restore ? $"{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)} ({newDatFile.Header.GetStringFieldValue(DatHeader.FileNameKey)})" : newDatFile.Header.GetStringFieldValue(DatHeader.FileNameKey)); newDatFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)} ({expName})"); newDatFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, string.IsNullOrEmpty(datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)) ? newDatFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) : $"{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)} ({expName})"); newDatFile.Header.SetFieldValue(Models.Metadata.Header.TypeKey, null); // Write out the temporary DAT to the proper directory Writer.Write(newDatFile, outDir); } /// /// Split a DAT by size of Rom /// /// Current DatFile object to split /// Long value representing the split point /// Less Than and Greater Than DatFiles public static (DatFile lessThan, DatFile greaterThan) SplitBySize(DatFile datFile, long radix) { // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by size"); DatFile lessThan = DatFile.Create(datFile.Header.CloneStandard()); lessThan.Header.SetFieldValue(DatHeader.FileNameKey, lessThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (less than {radix})"); lessThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (less than {radix})"); lessThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (less than {radix})"); DatFile greaterThan = DatFile.Create(datFile.Header.CloneStandard()); greaterThan.Header.SetFieldValue(DatHeader.FileNameKey, greaterThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (equal-greater than {radix})"); greaterThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (equal-greater than {radix})"); greaterThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (equal-greater than {radix})"); // Now populate each of the DAT objects in turn #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { ConcurrentList? items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { // If the file is not a Rom, it automatically goes in the "lesser" dat if (item is not Rom rom) lessThan.Items.Add(key, item); // If the file is a Rom and has no size, put it in the "lesser" dat else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) == null) lessThan.Items.Add(key, item); // If the file is a Rom and less than the radix, put it in the "lesser" dat else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) < radix) lessThan.Items.Add(key, item); // If the file is a Rom and greater than or equal to the radix, put it in the "greater" dat else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) >= radix) greaterThan.Items.Add(key, item); } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Then return both DatFiles watch.Stop(); return (lessThan, greaterThan); } /// /// Split a DAT by size of Rom /// /// Current DatFile object to split /// Long value representing the split point /// Less Than and Greater Than DatFiles public static (DatFile lessThan, DatFile greaterThan) SplitBySizeDB(DatFile datFile, long radix) { // Create each of the respective output DATs var watch = new InternalStopwatch($"Splitting DAT by size"); DatFile lessThan = DatFile.Create(datFile.Header.CloneStandard()); lessThan.Header.SetFieldValue(DatHeader.FileNameKey, lessThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (less than {radix})"); lessThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (less than {radix})"); lessThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (less than {radix})"); DatFile greaterThan = DatFile.Create(datFile.Header.CloneStandard()); greaterThan.Header.SetFieldValue(DatHeader.FileNameKey, greaterThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (equal-greater than {radix})"); greaterThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (equal-greater than {radix})"); greaterThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (equal-greater than {radix})"); // Get all current items, machines, and mappings var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2); var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2); var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2); var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2); var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2); // Create mappings from old index to new index var machineRemapping = new Dictionary(); var sourceRemapping = new Dictionary(); // Loop through and add all sources foreach (var source in sources) { long newSourceIndex = lessThan.ItemsDB.AddSource(source.Value); _ = greaterThan.ItemsDB.AddSource(source.Value); sourceRemapping[source.Key] = newSourceIndex; } // Loop through and add all machines foreach (var machine in machines) { long newMachineIndex = lessThan.ItemsDB.AddMachine(machine.Value); _ = greaterThan.ItemsDB.AddMachine(machine.Value); machineRemapping[machine.Key] = newMachineIndex; } // Loop through and add the items #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datItems, Globals.ParallelOptions, item => #elif NET40_OR_GREATER Parallel.ForEach(datItems, item => #else foreach (var item in datItems) #endif { // Get the machine and source index for this item long machineIndex = itemMachineMappings[item.Key]; long sourceIndex = itemSourceMappings[item.Key]; // If the file is not a Rom, it automatically goes in the "lesser" dat if (item.Value is not Rom rom) lessThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); // If the file is a Rom and has no size, put it in the "lesser" dat else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) == null) lessThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); // If the file is a Rom and less than the radix, put it in the "lesser" dat else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) < radix) lessThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); // If the file is a Rom and greater than or equal to the radix, put it in the "greater" dat else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) >= radix) greaterThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Then return both DatFiles watch.Stop(); return (lessThan, greaterThan); } /// /// Split a DAT by size of Rom /// /// Current DatFile object to split /// Long value representing the total size to split at /// Less Than and Greater Than DatFiles public static List SplitByTotalSize(DatFile datFile, long chunkSize) { // If the size is invalid, just return if (chunkSize <= 0) return []; // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by total size"); // Sort the DatFile by machine name datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None); // Get the keys in a known order for easier sorting var keys = datFile.Items.SortedKeys; // Get the output list List datFiles = []; // Initialize everything long currentSize = 0; long currentIndex = 0; DatFile currentDat = DatFile.Create(datFile.Header.CloneStandard()); currentDat.Header.SetFieldValue(DatHeader.FileNameKey, currentDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $"_{currentIndex}"); currentDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $"_{currentIndex}"); currentDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $"_{currentIndex}"); // Loop through each machine foreach (string machine in keys) { // Get the current machine var items = datFile.Items[machine]; if (items == null || items.Count == 0) { logger.Error($"{machine} contains no items and will be skipped"); continue; } // Get the total size of the current machine long machineSize = 0; foreach (var item in items) { if (item is Rom rom) { // TODO: Should there be more than just a log if a single item is larger than the chunksize? machineSize += rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) ?? 0; if ((rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) ?? 0) > chunkSize) logger.Error($"{rom.GetName() ?? string.Empty} in {machine} is larger than {chunkSize}"); } } // If the current machine size is greater than the chunk size by itself, we want to log and skip // TODO: Should this eventually try to split the machine here? if (machineSize > chunkSize) { logger.Error($"{machine} is larger than {chunkSize} and will be skipped"); continue; } // If the current machine size makes the current DatFile too big, split else if (currentSize + machineSize > chunkSize) { datFiles.Add(currentDat); currentSize = 0; currentIndex++; currentDat = DatFile.Create(datFile.Header.CloneStandard()); currentDat.Header.SetFieldValue(DatHeader.FileNameKey, currentDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $"_{currentIndex}"); currentDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $"_{currentIndex}"); currentDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $"_{currentIndex}"); } // Add the current machine to the current DatFile currentDat.Items[machine] = items; currentSize += machineSize; } // Add the final DatFile to the list datFiles.Add(currentDat); // Then return the list watch.Stop(); return datFiles; } /// /// Split a DAT by type of DatItem /// /// Current DatFile object to split /// Dictionary of ItemType to DatFile mappings public static Dictionary SplitByType(DatFile datFile) { // Create each of the respective output DATs InternalStopwatch watch = new($"Splitting DAT by item type"); // Create the set of type-to-dat mappings Dictionary typeDats = []; // We only care about a subset of types List outputTypes = [ ItemType.Disk, ItemType.Media, ItemType.Rom, ItemType.Sample, ]; // Setup all of the DatFiles foreach (ItemType itemType in outputTypes) { typeDats[itemType] = DatFile.Create(datFile.Header.CloneStandard()); typeDats[itemType].Header.SetFieldValue(DatHeader.FileNameKey, typeDats[itemType].Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({itemType})"); typeDats[itemType].Header.SetFieldValue(Models.Metadata.Header.NameKey, typeDats[itemType].Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({itemType})"); typeDats[itemType].Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, typeDats[itemType].Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({itemType})"); } // Now populate each of the DAT objects in turn #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(outputTypes, Globals.ParallelOptions, itemType => #elif NET40_OR_GREATER Parallel.ForEach(outputTypes, itemType => #else foreach (var itemType in outputTypes) #endif { FillWithItemType(datFile, typeDats[itemType], itemType); FillWithItemTypeDB(datFile, typeDats[itemType], itemType); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif watch.Stop(); return typeDats; } /// /// Fill a DatFile with all items with a particular ItemType /// /// Current DatFile object to split /// DatFile to add found items to /// ItemType to retrieve items for /// DatFile containing all items with the ItemType/returns> private static void FillWithItemType(DatFile datFile, DatFile indexDat, ItemType itemType) { // Loop through and add the items for this index to the output #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { ConcurrentList items = DatItem.Merge(datFile.Items[key]); // If the rom list is empty or null, just skip it if (items == null || items.Count == 0) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { if (item.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue() == itemType) indexDat.Items.Add(key, item); } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } /// /// Fill a DatFile with all items with a particular ItemType /// /// Current DatFile object to split /// DatFile to add found items to /// ItemType to retrieve items for /// DatFile containing all items with the ItemType/returns> private static void FillWithItemTypeDB(DatFile datFile, DatFile indexDat, ItemType itemType) { // Get all current items, machines, and mappings var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2); var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2); var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2); var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2); var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2); // Create mappings from old index to new index var machineRemapping = new Dictionary(); var sourceRemapping = new Dictionary(); // Loop through and add all sources foreach (var source in sources) { long newSourceIndex = indexDat.ItemsDB.AddSource(source.Value); sourceRemapping[source.Key] = newSourceIndex; } // Loop through and add all machines foreach (var machine in machines) { long newMachineIndex = indexDat.ItemsDB.AddMachine(machine.Value); machineRemapping[machine.Key] = newMachineIndex; } // Loop through and add the items #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datItems, Globals.ParallelOptions, item => #elif NET40_OR_GREATER Parallel.ForEach(datItems, item => #else foreach (var item in datItems) #endif { // Get the machine and source index for this item long machineIndex = itemMachineMappings[item.Key]; long sourceIndex = itemSourceMappings[item.Key]; if (item.Value.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue() == itemType) indexDat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } } }