using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
#if NET40_OR_GREATER || NETCOREAPP
using System.Threading.Tasks;
#endif
using SabreTools.Core.Tools;
using SabreTools.DatFiles;
using SabreTools.DatItems;
using SabreTools.DatItems.Formats;
using SabreTools.IO.Extensions;
using SabreTools.IO.Logging;
using SabreTools.Matching.Compare;
namespace SabreTools.DatTools
{
///
/// Helper methods for splitting DatFiles
///
/// TODO: Implement Level split
public class Splitter
{
#region Logging
///
/// Logging object
///
private static readonly Logger logger = new();
#endregion
///
/// Split a DAT by input extensions
///
/// Current DatFile object to split
/// List of extensions to split on (first DAT)
/// List of extensions to split on (second DAT)
/// Extension Set A and Extension Set B DatFiles
public static (DatFile? extADat, DatFile? extBDat) SplitByExtension(DatFile datFile, List extA, List extB)
{
// If roms is empty, return false
if (datFile.Items.DatStatistics.TotalCount == 0)
return (null, null);
InternalStopwatch watch = new($"Splitting DAT by extension");
// Make sure all of the extensions don't have a dot at the beginning
var newExtA = extA.ConvertAll(s => s.TrimStart('.').ToLowerInvariant()).ToArray();
string newExtAString = string.Join(",", newExtA);
var newExtB = extB.ConvertAll(s => s.TrimStart('.').ToLowerInvariant()).ToArray();
string newExtBString = string.Join(",", newExtB);
// Set all of the appropriate outputs for each of the subsets
DatFile extADat = DatFile.Create(datFile.Header.CloneStandard());
extADat.Header.SetFieldValue(DatHeader.FileNameKey, extADat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtAString})");
extADat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtAString})");
extADat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtAString})");
DatFile extBDat = DatFile.Create(datFile.Header.CloneStandard());
extBDat.Header.SetFieldValue(DatHeader.FileNameKey, extBDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtBString})");
extBDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtBString})");
extBDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtBString})");
// Now separate the roms accordingly
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Core.Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
var items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
foreach (DatItem item in items)
{
if (Array.IndexOf(newExtA, (item.GetName() ?? string.Empty).GetNormalizedExtension()) > -1)
{
extADat.Items.Add(key, item);
}
if (Array.IndexOf(newExtB, (item.GetName() ?? string.Empty).GetNormalizedExtension()) > -1)
{
extBDat.Items.Add(key, item);
}
else
{
extADat.Items.Add(key, item);
extBDat.Items.Add(key, item);
}
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
// Then return both DatFiles
watch.Stop();
return (extADat, extBDat);
}
///
/// Split a DAT by input extensions
///
/// Current DatFile object to split
/// List of extensions to split on (first DAT)
/// List of extensions to split on (second DAT)
/// Extension Set A and Extension Set B DatFiles
public static (DatFile? extADat, DatFile? extBDat) SplitByExtensionDB(DatFile datFile, List extA, List extB)
{
// If roms is empty, return false
if (datFile.ItemsDB.DatStatistics.TotalCount == 0)
return (null, null);
InternalStopwatch watch = new($"Splitting DAT by extension");
// Make sure all of the extensions don't have a dot at the beginning
var newExtA = extA.ConvertAll(s => s.TrimStart('.').ToLowerInvariant()).ToArray();
string newExtAString = string.Join(",", newExtA);
var newExtB = extB.ConvertAll(s => s.TrimStart('.').ToLowerInvariant()).ToArray();
string newExtBString = string.Join(",", newExtB);
// Set all of the appropriate outputs for each of the subsets
DatFile extADat = DatFile.Create(datFile.Header.CloneStandard());
extADat.Header.SetFieldValue(DatHeader.FileNameKey, extADat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtAString})");
extADat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtAString})");
extADat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extADat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtAString})");
DatFile extBDat = DatFile.Create(datFile.Header.CloneStandard());
extBDat.Header.SetFieldValue(DatHeader.FileNameKey, extBDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({newExtBString})");
extBDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({newExtBString})");
extBDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, extBDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({newExtBString})");
// Get all current items, machines, and mappings
var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2);
var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2);
var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2);
var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2);
var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2);
// Create mappings from old index to new index
var machineRemapping = new Dictionary();
var sourceRemapping = new Dictionary();
// Loop through and add all sources
foreach (var source in sources)
{
long newSourceIndex = extADat.ItemsDB.AddSource(source.Value);
_ = extBDat.ItemsDB.AddSource(source.Value);
sourceRemapping[source.Key] = newSourceIndex;
}
// Loop through and add all machines
foreach (var machine in machines)
{
long newMachineIndex = extADat.ItemsDB.AddMachine(machine.Value);
_ = extBDat.ItemsDB.AddMachine(machine.Value);
machineRemapping[machine.Key] = newMachineIndex;
}
// Loop through and add the items
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datItems, Core.Globals.ParallelOptions, item =>
#elif NET40_OR_GREATER
Parallel.ForEach(datItems, item =>
#else
foreach (var item in datItems)
#endif
{
// Get the machine and source index for this item
long machineIndex = itemMachineMappings[item.Key];
long sourceIndex = itemSourceMappings[item.Key];
if (newExtA.Contains((item.Value.GetName() ?? string.Empty).GetNormalizedExtension()))
{
extADat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
}
else if (newExtB.Contains((item.Value.GetName() ?? string.Empty).GetNormalizedExtension()))
{
extBDat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
}
else
{
extADat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
extBDat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
// Then return both DatFiles
watch.Stop();
return (extADat, extBDat);
}
///
/// Split a DAT by best available hashes
///
/// Current DatFile object to split
/// Dictionary of Field to DatFile mappings
public static Dictionary SplitByHash(DatFile datFile)
{
// Create each of the respective output DATs
var watch = new InternalStopwatch($"Splitting DAT by best available hashes");
// Create mapping of keys to suffixes
var mappings = new Dictionary
{
[Models.Metadata.Rom.StatusKey] = " (Nodump)",
[Models.Metadata.Rom.SHA512Key] = " (SHA-512)",
[Models.Metadata.Rom.SHA384Key] = " (SHA-384)",
[Models.Metadata.Rom.SHA256Key] = " (SHA-256)",
[Models.Metadata.Rom.SHA1Key] = " (SHA-1)",
[Models.Metadata.Rom.MD5Key] = " (MD5)",
[Models.Metadata.Rom.CRCKey] = " (CRC)",
["null"] = " (Other)",
};
// Create the set of field-to-dat mappings
Dictionary fieldDats = [];
foreach (var kvp in mappings)
{
fieldDats[kvp.Key] = DatFile.Create(datFile.Header.CloneStandard());
fieldDats[kvp.Key].Header.SetFieldValue(DatHeader.FileNameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(DatHeader.FileNameKey) + kvp.Value);
fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.NameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + kvp.Value);
fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + kvp.Value);
}
// Now populate each of the DAT objects in turn
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Core.Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
var items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
foreach (DatItem item in items)
{
// If the file is not a Disk, Media, or Rom, continue
switch (item)
{
case Disk disk:
if (disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump)
fieldDats[Models.Metadata.Disk.StatusKey].Items.Add(key, item);
else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key)))
fieldDats[Models.Metadata.Disk.SHA1Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key)))
fieldDats[Models.Metadata.Disk.MD5Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key)))
fieldDats[Models.Metadata.Disk.MD5Key].Items.Add(key, item);
else
fieldDats["null"].Items.Add(key, item);
break;
case Media media:
if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA256Key)))
fieldDats[Models.Metadata.Media.SHA256Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA1Key)))
fieldDats[Models.Metadata.Media.SHA1Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.MD5Key)))
fieldDats[Models.Metadata.Media.MD5Key].Items.Add(key, item);
else
fieldDats["null"].Items.Add(key, item);
break;
case Rom rom:
if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump)
fieldDats[Models.Metadata.Rom.StatusKey].Items.Add(key, item);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA512Key)))
fieldDats[Models.Metadata.Rom.SHA512Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA384Key)))
fieldDats[Models.Metadata.Rom.SHA384Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA256Key)))
fieldDats[Models.Metadata.Rom.SHA256Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA1Key)))
fieldDats[Models.Metadata.Rom.SHA1Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.MD5Key)))
fieldDats[Models.Metadata.Rom.MD5Key].Items.Add(key, item);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey)))
fieldDats[Models.Metadata.Rom.CRCKey].Items.Add(key, item);
else
fieldDats["null"].Items.Add(key, item);
break;
default:
continue;
}
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
watch.Stop();
return fieldDats;
}
///
/// Split a DAT by best available hashes
///
/// Current DatFile object to split
/// Dictionary of Field to DatFile mappings
public static Dictionary SplitByHashDB(DatFile datFile)
{
// Create each of the respective output DATs
var watch = new InternalStopwatch($"Splitting DAT by best available hashes");
// Create mapping of keys to suffixes
var mappings = new Dictionary
{
[Models.Metadata.Rom.StatusKey] = " (Nodump)",
[Models.Metadata.Rom.SHA512Key] = " (SHA-512)",
[Models.Metadata.Rom.SHA384Key] = " (SHA-384)",
[Models.Metadata.Rom.SHA256Key] = " (SHA-256)",
[Models.Metadata.Rom.SHA1Key] = " (SHA-1)",
[Models.Metadata.Rom.MD5Key] = " (MD5)",
[Models.Metadata.Rom.CRCKey] = " (CRC)",
["null"] = " (Other)",
};
// Create the set of field-to-dat mappings
Dictionary fieldDats = [];
foreach (var kvp in mappings)
{
fieldDats[kvp.Key] = DatFile.Create(datFile.Header.CloneStandard());
fieldDats[kvp.Key].Header.SetFieldValue(DatHeader.FileNameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(DatHeader.FileNameKey) + kvp.Value);
fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.NameKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + kvp.Value);
fieldDats[kvp.Key].Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, fieldDats[kvp.Key].Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + kvp.Value);
}
// Get all current items, machines, and mappings
var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2);
var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2);
var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2);
var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2);
var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2);
// Create mappings from old index to new index
var machineRemapping = new Dictionary();
var sourceRemapping = new Dictionary();
// Loop through and add all sources
foreach (var source in sources)
{
long newSourceIndex = fieldDats[Models.Metadata.Rom.StatusKey].ItemsDB.AddSource(source.Value);
sourceRemapping[source.Key] = newSourceIndex;
_ = fieldDats[Models.Metadata.Rom.SHA512Key].ItemsDB.AddSource(source.Value);
_ = fieldDats[Models.Metadata.Rom.SHA384Key].ItemsDB.AddSource(source.Value);
_ = fieldDats[Models.Metadata.Rom.SHA256Key].ItemsDB.AddSource(source.Value);
_ = fieldDats[Models.Metadata.Rom.SHA1Key].ItemsDB.AddSource(source.Value);
_ = fieldDats[Models.Metadata.Rom.MD5Key].ItemsDB.AddSource(source.Value);
_ = fieldDats[Models.Metadata.Rom.CRCKey].ItemsDB.AddSource(source.Value);
_ = fieldDats["null"].ItemsDB.AddSource(source.Value);
}
// Loop through and add all machines
foreach (var machine in machines)
{
long newMachineIndex = fieldDats[Models.Metadata.Rom.StatusKey].ItemsDB.AddMachine(machine.Value);
_ = fieldDats[Models.Metadata.Rom.SHA512Key].ItemsDB.AddMachine(machine.Value);
_ = fieldDats[Models.Metadata.Rom.SHA384Key].ItemsDB.AddMachine(machine.Value);
_ = fieldDats[Models.Metadata.Rom.SHA256Key].ItemsDB.AddMachine(machine.Value);
_ = fieldDats[Models.Metadata.Rom.SHA1Key].ItemsDB.AddMachine(machine.Value);
_ = fieldDats[Models.Metadata.Rom.MD5Key].ItemsDB.AddMachine(machine.Value);
_ = fieldDats[Models.Metadata.Rom.CRCKey].ItemsDB.AddMachine(machine.Value);
_ = fieldDats["null"].ItemsDB.AddMachine(machine.Value);
machineRemapping[machine.Key] = newMachineIndex;
}
// Loop through and add the items
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datItems, Core.Globals.ParallelOptions, item =>
#elif NET40_OR_GREATER
Parallel.ForEach(datItems, item =>
#else
foreach (var item in datItems)
#endif
{
// Get the machine and source index for this item
long machineIndex = itemMachineMappings[item.Key];
long sourceIndex = itemSourceMappings[item.Key];
// Only process Disk, Media, and Rom
switch (item.Value)
{
case Disk disk:
if (disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump)
fieldDats[Models.Metadata.Disk.StatusKey].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key)))
fieldDats[Models.Metadata.Disk.SHA1Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key)))
fieldDats[Models.Metadata.Disk.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key)))
fieldDats[Models.Metadata.Disk.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else
fieldDats["null"].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
break;
case Media media:
if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA256Key)))
fieldDats[Models.Metadata.Media.SHA256Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA1Key)))
fieldDats[Models.Metadata.Media.SHA1Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.MD5Key)))
fieldDats[Models.Metadata.Media.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else
fieldDats["null"].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
break;
case Rom rom:
if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump)
fieldDats[Models.Metadata.Rom.StatusKey].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA512Key)))
fieldDats[Models.Metadata.Rom.SHA512Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA384Key)))
fieldDats[Models.Metadata.Rom.SHA384Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA256Key)))
fieldDats[Models.Metadata.Rom.SHA256Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.SHA1Key)))
fieldDats[Models.Metadata.Rom.SHA1Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.MD5Key)))
fieldDats[Models.Metadata.Rom.MD5Key].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else if (!string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey)))
fieldDats[Models.Metadata.Rom.CRCKey].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
else
fieldDats["null"].ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
break;
default:
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
watch.Stop();
return fieldDats;
}
///
/// Split a SuperDAT by lowest available directory level
///
/// Current DatFile object to split
/// Name of the directory to write the DATs out to
/// True if short names should be used, false otherwise
/// True if original filenames should be used as the base for output filename, false otherwise
/// True if split succeeded, false otherwise
public static bool SplitByLevel(DatFile datFile, string outDir, bool shortname, bool basedat)
{
InternalStopwatch watch = new($"Splitting DAT by level");
// First, bucket by games so that we can do the right thing
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, lower: false, norename: true);
// Create a temporary DAT to add things to
DatFile tempDat = DatFile.Create(datFile.Header);
tempDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, null);
// Sort the input keys
List keys = [.. datFile.Items.Keys];
keys.Sort(SplitByLevelSort);
// Then, we loop over the games
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(keys, Core.Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(keys, key =>
#else
foreach (var key in keys)
#endif
{
// Here, the key is the name of the game to be used for comparison
if (tempDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) != null && tempDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) != Path.GetDirectoryName(key))
{
// Reset the DAT for the next items
tempDat = DatFile.Create(datFile.Header);
tempDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, null);
}
// Clean the input list and set all games to be pathless
List? items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
items.ForEach(item => item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, Path.GetFileName(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey))));
items.ForEach(item => item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, Path.GetFileName(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey))));
// Now add the game to the output DAT
tempDat.Items.AddRange(key, items);
// Then set the DAT name to be the parent directory name
tempDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, Path.GetDirectoryName(key));
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
watch.Stop();
return true;
}
///
/// Helper function for SplitByLevel to sort the input game names
///
/// First string to compare
/// Second string to compare
/// -1 for a coming before b, 0 for a == b, 1 for a coming after b
private static int SplitByLevelSort(string a, string b)
{
NaturalComparer nc = new();
int adeep = a.Count(c => c == '/' || c == '\\');
int bdeep = b.Count(c => c == '/' || c == '\\');
if (adeep == bdeep)
return nc.Compare(a, b);
return adeep - bdeep;
}
///
/// Helper function for SplitByLevel to clean and write out a DAT
///
/// Current DatFile object to split
/// DAT to clean and write out
/// Directory to write out to
/// True if short naming scheme should be used, false otherwise
/// True if original filenames should be used as the base for output filename, false otherwise
private static void SplitByLevelHelper(DatFile datFile, DatFile newDatFile, string outDir, bool shortname, bool restore)
{
// Get the name from the DAT to use separately
string? name = newDatFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey);
string? expName = name?.Replace("/", " - ")?.Replace("\\", " - ");
// Now set the new output values
#if NET20 || NET35
newDatFile.Header.SetFieldValue(DatHeader.FileNameKey, string.IsNullOrEmpty(name)
? datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)
: (shortname
? Path.GetFileName(name)
: expName
));
#else
newDatFile.Header.SetFieldValue(DatHeader.FileNameKey, WebUtility.HtmlDecode(string.IsNullOrEmpty(name)
? datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)
: (shortname
? Path.GetFileName(name)
: expName
)
));
#endif
newDatFile.Header.SetFieldValue(DatHeader.FileNameKey, restore
? $"{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)} ({newDatFile.Header.GetStringFieldValue(DatHeader.FileNameKey)})"
: newDatFile.Header.GetStringFieldValue(DatHeader.FileNameKey));
newDatFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)} ({expName})");
newDatFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, string.IsNullOrEmpty(datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey))
? newDatFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)
: $"{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)} ({expName})");
newDatFile.Header.SetFieldValue(Models.Metadata.Header.TypeKey, null);
// Write out the temporary DAT to the proper directory
Writer.Write(newDatFile, outDir);
}
///
/// Split a DAT by size of Rom
///
/// Current DatFile object to split
/// Long value representing the split point
/// Less Than and Greater Than DatFiles
public static (DatFile lessThan, DatFile greaterThan) SplitBySize(DatFile datFile, long radix)
{
// Create each of the respective output DATs
InternalStopwatch watch = new($"Splitting DAT by size");
DatFile lessThan = DatFile.Create(datFile.Header.CloneStandard());
lessThan.Header.SetFieldValue(DatHeader.FileNameKey, lessThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (less than {radix})");
lessThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (less than {radix})");
lessThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (less than {radix})");
DatFile greaterThan = DatFile.Create(datFile.Header.CloneStandard());
greaterThan.Header.SetFieldValue(DatHeader.FileNameKey, greaterThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (equal-greater than {radix})");
greaterThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (equal-greater than {radix})");
greaterThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (equal-greater than {radix})");
// Now populate each of the DAT objects in turn
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Core.Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
List? items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
foreach (DatItem item in items)
{
// If the file is not a Rom, it automatically goes in the "lesser" dat
if (item is not Rom rom)
lessThan.Items.Add(key, item);
// If the file is a Rom and has no size, put it in the "lesser" dat
else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) == null)
lessThan.Items.Add(key, item);
// If the file is a Rom and less than the radix, put it in the "lesser" dat
else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) < radix)
lessThan.Items.Add(key, item);
// If the file is a Rom and greater than or equal to the radix, put it in the "greater" dat
else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) >= radix)
greaterThan.Items.Add(key, item);
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
// Then return both DatFiles
watch.Stop();
return (lessThan, greaterThan);
}
///
/// Split a DAT by size of Rom
///
/// Current DatFile object to split
/// Long value representing the split point
/// Less Than and Greater Than DatFiles
public static (DatFile lessThan, DatFile greaterThan) SplitBySizeDB(DatFile datFile, long radix)
{
// Create each of the respective output DATs
var watch = new InternalStopwatch($"Splitting DAT by size");
DatFile lessThan = DatFile.Create(datFile.Header.CloneStandard());
lessThan.Header.SetFieldValue(DatHeader.FileNameKey, lessThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (less than {radix})");
lessThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (less than {radix})");
lessThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, lessThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (less than {radix})");
DatFile greaterThan = DatFile.Create(datFile.Header.CloneStandard());
greaterThan.Header.SetFieldValue(DatHeader.FileNameKey, greaterThan.Header.GetStringFieldValue(DatHeader.FileNameKey) + $" (equal-greater than {radix})");
greaterThan.Header.SetFieldValue(Models.Metadata.Header.NameKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" (equal-greater than {radix})");
greaterThan.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, greaterThan.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" (equal-greater than {radix})");
// Get all current items, machines, and mappings
var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2);
var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2);
var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2);
var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2);
var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2);
// Create mappings from old index to new index
var machineRemapping = new Dictionary();
var sourceRemapping = new Dictionary();
// Loop through and add all sources
foreach (var source in sources)
{
long newSourceIndex = lessThan.ItemsDB.AddSource(source.Value);
_ = greaterThan.ItemsDB.AddSource(source.Value);
sourceRemapping[source.Key] = newSourceIndex;
}
// Loop through and add all machines
foreach (var machine in machines)
{
long newMachineIndex = lessThan.ItemsDB.AddMachine(machine.Value);
_ = greaterThan.ItemsDB.AddMachine(machine.Value);
machineRemapping[machine.Key] = newMachineIndex;
}
// Loop through and add the items
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datItems, Core.Globals.ParallelOptions, item =>
#elif NET40_OR_GREATER
Parallel.ForEach(datItems, item =>
#else
foreach (var item in datItems)
#endif
{
// Get the machine and source index for this item
long machineIndex = itemMachineMappings[item.Key];
long sourceIndex = itemSourceMappings[item.Key];
// If the file is not a Rom, it automatically goes in the "lesser" dat
if (item.Value is not Rom rom)
lessThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
// If the file is a Rom and has no size, put it in the "lesser" dat
else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) == null)
lessThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
// If the file is a Rom and less than the radix, put it in the "lesser" dat
else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) < radix)
lessThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
// If the file is a Rom and greater than or equal to the radix, put it in the "greater" dat
else if (rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) >= radix)
greaterThan.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
// Then return both DatFiles
watch.Stop();
return (lessThan, greaterThan);
}
///
/// Split a DAT by size of Rom
///
/// Current DatFile object to split
/// Long value representing the total size to split at
/// Less Than and Greater Than DatFiles
public static List SplitByTotalSize(DatFile datFile, long chunkSize)
{
// If the size is invalid, just return
if (chunkSize <= 0)
return [];
// Create each of the respective output DATs
InternalStopwatch watch = new($"Splitting DAT by total size");
// Sort the DatFile by machine name
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None);
// Get the keys in a known order for easier sorting
var keys = datFile.Items.SortedKeys;
// Get the output list
List datFiles = [];
// Initialize everything
long currentSize = 0;
long currentIndex = 0;
DatFile currentDat = DatFile.Create(datFile.Header.CloneStandard());
currentDat.Header.SetFieldValue(DatHeader.FileNameKey, currentDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $"_{currentIndex}");
currentDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $"_{currentIndex}");
currentDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $"_{currentIndex}");
// Loop through each machine
foreach (string machine in keys)
{
// Get the current machine
var items = datFile.Items[machine];
if (items == null || items.Count == 0)
{
logger.Error($"{machine} contains no items and will be skipped");
continue;
}
// Get the total size of the current machine
long machineSize = 0;
foreach (var item in items)
{
if (item is Rom rom)
{
// TODO: Should there be more than just a log if a single item is larger than the chunksize?
machineSize += rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) ?? 0;
if ((rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey) ?? 0) > chunkSize)
logger.Error($"{rom.GetName() ?? string.Empty} in {machine} is larger than {chunkSize}");
}
}
// If the current machine size is greater than the chunk size by itself, we want to log and skip
// TODO: Should this eventually try to split the machine here?
if (machineSize > chunkSize)
{
logger.Error($"{machine} is larger than {chunkSize} and will be skipped");
continue;
}
// If the current machine size makes the current DatFile too big, split
else if (currentSize + machineSize > chunkSize)
{
datFiles.Add(currentDat);
currentSize = 0;
currentIndex++;
currentDat = DatFile.Create(datFile.Header.CloneStandard());
currentDat.Header.SetFieldValue(DatHeader.FileNameKey, currentDat.Header.GetStringFieldValue(DatHeader.FileNameKey) + $"_{currentIndex}");
currentDat.Header.SetFieldValue(Models.Metadata.Header.NameKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $"_{currentIndex}");
currentDat.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, currentDat.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $"_{currentIndex}");
}
// Add the current machine to the current DatFile
currentDat.Items[machine] = items;
currentSize += machineSize;
}
// Add the final DatFile to the list
datFiles.Add(currentDat);
// Then return the list
watch.Stop();
return datFiles;
}
///
/// Split a DAT by type of DatItem
///
/// Current DatFile object to split
/// Dictionary of ItemType to DatFile mappings
public static Dictionary SplitByType(DatFile datFile)
{
// Create each of the respective output DATs
InternalStopwatch watch = new($"Splitting DAT by item type");
// Create the set of type-to-dat mappings
Dictionary typeDats = [];
// We only care about a subset of types
List outputTypes =
[
ItemType.Disk,
ItemType.Media,
ItemType.Rom,
ItemType.Sample,
];
// Setup all of the DatFiles
foreach (ItemType itemType in outputTypes)
{
typeDats[itemType] = DatFile.Create(datFile.Header.CloneStandard());
typeDats[itemType].Header.SetFieldValue(DatHeader.FileNameKey, typeDats[itemType].Header.GetStringFieldValue(DatHeader.FileNameKey) + $" ({itemType})");
typeDats[itemType].Header.SetFieldValue(Models.Metadata.Header.NameKey, typeDats[itemType].Header.GetStringFieldValue(Models.Metadata.Header.NameKey) + $" ({itemType})");
typeDats[itemType].Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, typeDats[itemType].Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey) + $" ({itemType})");
}
// Now populate each of the DAT objects in turn
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(outputTypes, Core.Globals.ParallelOptions, itemType =>
#elif NET40_OR_GREATER
Parallel.ForEach(outputTypes, itemType =>
#else
foreach (var itemType in outputTypes)
#endif
{
FillWithItemType(datFile, typeDats[itemType], itemType);
FillWithItemTypeDB(datFile, typeDats[itemType], itemType);
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
watch.Stop();
return typeDats;
}
///
/// Fill a DatFile with all items with a particular ItemType
///
/// Current DatFile object to split
/// DatFile to add found items to
/// ItemType to retrieve items for
/// DatFile containing all items with the ItemType/returns>
private static void FillWithItemType(DatFile datFile, DatFile indexDat, ItemType itemType)
{
// Loop through and add the items for this index to the output
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Core.Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
List items = DatItem.Merge(datFile.Items[key]);
// If the rom list is empty or null, just skip it
if (items == null || items.Count == 0)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
foreach (DatItem item in items)
{
if (item.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue() == itemType)
indexDat.Items.Add(key, item);
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
}
///
/// Fill a DatFile with all items with a particular ItemType
///
/// Current DatFile object to split
/// DatFile to add found items to
/// ItemType to retrieve items for
/// DatFile containing all items with the ItemType/returns>
private static void FillWithItemTypeDB(DatFile datFile, DatFile indexDat, ItemType itemType)
{
// Get all current items, machines, and mappings
var datItems = datFile.ItemsDB.GetItems().ToDictionary(m => m.Item1, m => m.Item2);
var machines = datFile.ItemsDB.GetMachines().ToDictionary(m => m.Item1, m => m.Item2);
var sources = datFile.ItemsDB.GetSources().ToDictionary(m => m.Item1, m => m.Item2);
var itemMachineMappings = datFile.ItemsDB.GetItemMachineMappings().ToDictionary(m => m.Item1, m => m.Item2);
var itemSourceMappings = datFile.ItemsDB.GetItemSourceMappings().ToDictionary(m => m.Item1, m => m.Item2);
// Create mappings from old index to new index
var machineRemapping = new Dictionary();
var sourceRemapping = new Dictionary();
// Loop through and add all sources
foreach (var source in sources)
{
long newSourceIndex = indexDat.ItemsDB.AddSource(source.Value);
sourceRemapping[source.Key] = newSourceIndex;
}
// Loop through and add all machines
foreach (var machine in machines)
{
long newMachineIndex = indexDat.ItemsDB.AddMachine(machine.Value);
machineRemapping[machine.Key] = newMachineIndex;
}
// Loop through and add the items
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datItems, Core.Globals.ParallelOptions, item =>
#elif NET40_OR_GREATER
Parallel.ForEach(datItems, item =>
#else
foreach (var item in datItems)
#endif
{
// Get the machine and source index for this item
long machineIndex = itemMachineMappings[item.Key];
long sourceIndex = itemSourceMappings[item.Key];
if (item.Value.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue() == itemType)
indexDat.ItemsDB.AddItem(item.Value, machineRemapping[machineIndex], sourceRemapping[sourceIndex], statsOnly: false);
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
}
}
}