Files
SabreTools/SabreTools.Library/Dats/Partials/DatFile.ConvertUpdate.cs

636 lines
24 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using SabreTools.Library.Data;
using SabreTools.Library.Tools;
#if MONO
using System.IO;
#else
using Alphaleonis.Win32.Filesystem;
using SearchOption = System.IO.SearchOption;
#endif
using NaturalSort;
namespace SabreTools.Library.Dats
{
public partial class DatFile
{
#region Converting and Updating
/// <summary>
/// Determine if input files should be merged, diffed, or processed invidually
/// </summary>
/// <param name="inputPaths">Names of the input files and/or folders</param>
/// <param name="basePaths">Names of base files and/or folders</param>
/// <param name="outDir">Optional param for output directory</param>
/// <param name="merge">True if input files should be merged into a single file, false otherwise</param>
/// <param name="diff">Non-zero flag for diffing mode, zero otherwise</param>
/// <param name="inplace">True if the output files should overwrite their inputs, false otherwise</param>
/// <param name="skip">True if the first cascaded diff file should be skipped on output, false otherwise</param>
/// <param name="bare">True if the date should not be appended to the default name, false otherwise [OBSOLETE]</param>
/// <param name="clean">True to clean the game names to WoD standard, false otherwise (default)</param>
/// <param name="remUnicode">True if we should remove non-ASCII characters from output, false otherwise (default)</param>
/// <param name="descAsName">True to allow SL DATs to have game names used instead of descriptions, false otherwise (default)</param>
/// <param name="filter">Filter object to be passed to the DatItem level</param>
/// <param name="splitType">Type of the split that should be performed (split, merged, fully merged)</param>
/// <param name="trim">True if we are supposed to trim names to NTFS length, false otherwise</param>
/// <param name="single">True if all games should be replaced by '!', false otherwise</param>
/// <param name="root">String representing root directory to compare against for length calculation</param>
public void DetermineUpdateType(List<string> inputPaths, List<string> basePaths, string outDir, bool merge, DiffMode diff, bool inplace, bool skip,
bool bare, bool clean, bool remUnicode, bool descAsName, Filter filter, SplitType splitType, bool trim, bool single, string root)
{
// If we're in merging or diffing mode, use the full list of inputs
if (merge || (diff != 0 && (diff & DiffMode.Against) == 0))
{
// Make sure there are no folders in inputs
List<string> newInputFileNames = FileTools.GetOnlyFilesFromInputs(inputPaths, appendparent: true);
// Reverse if we have to
if ((diff & DiffMode.ReverseCascade) != 0)
{
newInputFileNames.Reverse();
}
// Create a dictionary of all ROMs from the input DATs
List<DatFile> datHeaders = PopulateUserData(newInputFileNames, inplace, clean,
remUnicode, descAsName, outDir, filter, splitType, trim, single, root);
// Modify the Dictionary if necessary and output the results
if (diff != 0 && diff < DiffMode.Cascade)
{
DiffNoCascade(diff, outDir, newInputFileNames);
}
// If we're in cascade and diff, output only cascaded diffs
else if (diff != 0 && diff >= DiffMode.Cascade)
{
DiffCascade(outDir, inplace, newInputFileNames, datHeaders, skip);
}
// Output all entries with user-defined merge
else
{
MergeNoDiff(outDir, newInputFileNames, datHeaders);
}
}
// If we're in "diff against" mode, we treat the inputs differently
else if ((diff & DiffMode.Against) != 0)
{
DiffAgainst(inputPaths, basePaths, outDir, inplace, clean, remUnicode, descAsName, filter, splitType, trim, single, root);
}
// Otherwise, loop through all of the inputs individually
else
{
Update(inputPaths, outDir, inplace, clean, remUnicode, descAsName, filter, splitType, trim, single, root);
}
return;
}
/// <summary>
/// Populate the user DatData object from the input files
/// </summary>
/// <param name="inputs">Paths to DATs to parse</param>
/// <param name="inplace">True if the output files should overwrite their inputs, false otherwise</param>
/// <param name="clean">True to clean the game names to WoD standard, false otherwise (default)</param>
/// <param name="remUnicode">True if we should remove non-ASCII characters from output, false otherwise (default)</param>
/// <param name="descAsName">True to allow SL DATs to have game names used instead of descriptions, false otherwise (default)</param>
/// <param name="splitType">Type of the split that should be performed (split, merged, fully merged)</param>
/// <param name="outDir">Optional param for output directory</param>
/// <param name="filter">Filter object to be passed to the DatItem level</param>
/// <param name="splitType">Type of the split that should be performed (split, merged, fully merged)</param>
/// <param name="trim">True if we are supposed to trim names to NTFS length, false otherwise</param>
/// <param name="single">True if all games should be replaced by '!', false otherwise</param>
/// <param name="root">String representing root directory to compare against for length calculation</param>
/// <returns>List of DatData objects representing headers</returns>
private List<DatFile> PopulateUserData(List<string> inputs, bool inplace, bool clean, bool remUnicode, bool descAsName,
string outDir, Filter filter, SplitType splitType, bool trim, bool single, string root)
{
DatFile[] datHeaders = new DatFile[inputs.Count];
InternalStopwatch watch = new InternalStopwatch("Processing individual DATs");
// Parse all of the DATs into their own DatFiles in the array
Parallel.For(0, inputs.Count, Globals.ParallelOptions, i =>
{
string input = inputs[i];
Globals.Logger.User("Adding DAT: {0}", input.Split('¬')[0]);
datHeaders[i] = new DatFile
{
DatFormat = (DatFormat != 0 ? DatFormat : 0),
DedupeRoms = DedupeRoms,
};
datHeaders[i].Parse(input.Split('¬')[0], i, 0, splitType, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName);
});
watch.Stop();
watch.Start("Populating internal DAT");
Parallel.For(0, inputs.Count, Globals.ParallelOptions, i =>
{
// Get the list of keys from the DAT
List<string> keys = datHeaders[i].Keys.ToList();
foreach (string key in keys)
{
// Add everything from the key to the internal DAT
AddRange(key, datHeaders[i][key]);
// Now remove the key from the source DAT
datHeaders[i].Remove(key);
}
// Now remove the file dictionary from the souce DAT to save memory
datHeaders[i].DeleteDictionary();
});
// Now that we have a merged DAT, filter it
Filter(filter, single, trim, root);
watch.Stop();
return datHeaders.ToList();
}
/// <summary>
/// Output diffs against a base set
/// </summary>
/// <param name="inputPaths">Names of the input files and/or folders</param>
/// <param name="basePaths">Names of base files and/or folders</param>
/// <param name="outDir">Optional param for output directory</param>
/// <param name="inplace">True if the output files should overwrite their inputs, false otherwise</param>
/// <param name="clean">True to clean the game names to WoD standard, false otherwise (default)</param>
/// <param name="remUnicode">True if we should remove non-ASCII characters from output, false otherwise (default)</param>
/// <param name="descAsName">True to allow SL DATs to have game names used instead of descriptions, false otherwise (default)</param>
/// <param name="filter">Filter object to be passed to the DatItem level</param>
/// <param name="splitType">Type of the split that should be performed (split, merged, fully merged)</param>
/// <param name="trim">True if we are supposed to trim names to NTFS length, false otherwise</param>
/// <param name="single">True if all games should be replaced by '!', false otherwise</param>
/// <param name="root">String representing root directory to compare against for length calculation</param>
public void DiffAgainst(List<string> inputPaths, List<string> basePaths, string outDir, bool inplace, bool clean, bool remUnicode,
bool descAsName, Filter filter, SplitType splitType, bool trim, bool single, string root)
{
// First we want to parse all of the base DATs into the input
InternalStopwatch watch = new InternalStopwatch("Populating base DAT for comparison...");
List<string> baseFileNames = FileTools.GetOnlyFilesFromInputs(basePaths);
Parallel.ForEach(baseFileNames, Globals.ParallelOptions, path =>
{
Parse(path, 0, 0, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName);
});
watch.Stop();
// For comparison's sake, we want to use CRC as the base ordering
BucketBy(SortedBy.CRC, DedupeType.Full);
// Now we want to compare each input DAT against the base
List<string> inputFileNames = FileTools.GetOnlyFilesFromInputs(inputPaths, appendparent: true);
foreach (string path in inputFileNames)
{
// Get the two halves of the path
string[] splitpath = path.Split('¬');
Globals.Logger.User("Comparing '{0}'' to base DAT", splitpath[0]);
// First we parse in the DAT internally
DatFile intDat = new DatFile();
intDat.Parse(splitpath[0], 1, 1, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName);
// For comparison's sake, we want to use CRC as the base ordering
intDat.BucketBy(SortedBy.CRC, DedupeType.Full);
// Then we do a hashwise comparison against the base DAT
List<string> keys = intDat.Keys.ToList();
Parallel.ForEach(keys, Globals.ParallelOptions, key =>
{
List<DatItem> datItems = intDat[key];
List<DatItem> keepDatItems = new List<DatItem>();
foreach (DatItem datItem in datItems)
{
if (!datItem.HasDuplicates(this, true))
{
keepDatItems.Add(datItem);
}
}
// Now add the new list to the key
intDat.Remove(key);
intDat.AddRange(key, keepDatItems);
});
// Determine the output path for the DAT
string interOutDir = outDir;
if (inplace)
{
interOutDir = Path.GetDirectoryName(path);
}
else if (!String.IsNullOrEmpty(interOutDir))
{
interOutDir = Path.GetDirectoryName(Path.Combine(interOutDir, splitpath[0].Remove(0, splitpath[1].Length + 1)));
}
else
{
interOutDir = Path.GetDirectoryName(Path.Combine(Environment.CurrentDirectory, splitpath[0].Remove(0, splitpath[1].Length + 1)));
}
// Once we're done, try writing out
intDat.WriteToFile(interOutDir);
// Due to possible memory requirements, we force a garbage collection
GC.Collect();
}
}
/// <summary>
/// Output cascading diffs
/// </summary>
/// <param name="outDir">Output directory to write the DATs to</param>
/// <param name="inplace">True if cascaded diffs are outputted in-place, false otherwise</param>
/// <param name="inputs">List of inputs to write out from</param>
/// <param name="datHeaders">Dat headers used optionally</param>
/// <param name="skip">True if the first cascaded diff file should be skipped on output, false otherwise</param>
public void DiffCascade(string outDir, bool inplace, List<string> inputs, List<DatFile> datHeaders, bool skip)
{
string post = "";
// Create a list of DatData objects representing output files
List<DatFile> outDats = new List<DatFile>();
// Loop through each of the inputs and get or create a new DatData object
InternalStopwatch watch = new InternalStopwatch("Initializing all output DATs");
DatFile[] outDatsArray = new DatFile[inputs.Count];
Parallel.For(0, inputs.Count, Globals.ParallelOptions, j =>
{
string innerpost = " (" + Path.GetFileNameWithoutExtension(inputs[j].Split('¬')[0]) + " Only)";
DatFile diffData;
// If we're in inplace mode, take the appropriate DatData object already stored
if (inplace || outDir != Environment.CurrentDirectory)
{
diffData = datHeaders[j];
}
else
{
diffData = new DatFile(this);
diffData.FileName += post;
diffData.Name += post;
diffData.Description += post;
}
diffData.ResetDictionary();
outDatsArray[j] = diffData;
});
outDats = outDatsArray.ToList();
watch.Stop();
// Now, loop through the dictionary and populate the correct DATs
watch.Start("Populating all output DATs");
List<string> keys = Keys.ToList();
Parallel.ForEach(keys, Globals.ParallelOptions, key =>
{
List<DatItem> items = DatItem.Merge(this[key]);
// If the rom list is empty or null, just skip it
if (items == null || items.Count == 0)
{
return;
}
foreach (DatItem item in items)
{
// There's odd cases where there are items with System ID < 0. Skip them for now
if (item.SystemID < 0)
{
Globals.Logger.Warning("Item found with a <0 SystemID: {0}", item.Name);
continue;
}
outDats[item.SystemID].Add(key, item);
}
});
watch.Stop();
// Finally, loop through and output each of the DATs
watch.Start("Outputting all created DATs");
Parallel.For((skip ? 1 : 0), inputs.Count, Globals.ParallelOptions, j =>
{
// If we have an output directory set, replace the path
string path = "";
if (inplace)
{
path = Path.GetDirectoryName(inputs[j].Split('¬')[0]);
}
else if (outDir != Environment.CurrentDirectory)
{
string[] split = inputs[j].Split('¬');
path = outDir + (split[0] == split[1]
? Path.GetFileName(split[0])
: (Path.GetDirectoryName(split[0]).Remove(0, split[1].Length))); ;
}
// Try to output the file
outDats[j].WriteToFile(path);
});
watch.Stop();
}
/// <summary>
/// Output non-cascading diffs
/// </summary>
/// <param name="diff">Non-zero flag for diffing mode, zero otherwise</param>
/// <param name="outDir">Output directory to write the DATs to</param>
/// <param name="inputs">List of inputs to write out from</param>
public void DiffNoCascade(DiffMode diff, string outDir, List<string> inputs)
{
InternalStopwatch watch = new InternalStopwatch("Initializing all output DATs");
// Default vars for use
string post = "";
DatFile outerDiffData = new DatFile();
DatFile dupeData = new DatFile();
// Fill in any information not in the base DAT
if (String.IsNullOrEmpty(_fileName))
{
_fileName = "All DATs";
}
if (String.IsNullOrEmpty(_name))
{
_name = "All DATs";
}
if (String.IsNullOrEmpty(_description))
{
_description = "All DATs";
}
// Don't have External dupes
if ((diff & DiffMode.NoDupes) != 0)
{
post = " (No Duplicates)";
outerDiffData = new DatFile(this);
outerDiffData.FileName += post;
outerDiffData.Name += post;
outerDiffData.Description += post;
outerDiffData.ResetDictionary();
}
// Have External dupes
if ((diff & DiffMode.Dupes) != 0)
{
post = " (Duplicates)";
dupeData = new DatFile(this);
dupeData.FileName += post;
dupeData.Name += post;
dupeData.Description += post;
dupeData.ResetDictionary();
}
// Create a list of DatData objects representing individual output files
List<DatFile> outDats = new List<DatFile>();
// Loop through each of the inputs and get or create a new DatData object
if ((diff & DiffMode.Individuals) != 0)
{
DatFile[] outDatsArray = new DatFile[inputs.Count];
Parallel.For(0, inputs.Count, Globals.ParallelOptions, j =>
{
string innerpost = " (" + Path.GetFileNameWithoutExtension(inputs[j].Split('¬')[0]) + " Only)";
DatFile diffData = new DatFile(this);
diffData.FileName += innerpost;
diffData.Name += innerpost;
diffData.Description += innerpost;
diffData.ResetDictionary();
outDatsArray[j] = diffData;
});
outDats = outDatsArray.ToList();
}
watch.Stop();
// Now, loop through the dictionary and populate the correct DATs
watch.Start("Populating all output DATs");
List<string> keys = Keys.ToList();
Parallel.ForEach(keys, Globals.ParallelOptions, key =>
{
List<DatItem> items = DatItem.Merge(this[key]);
// If the rom list is empty or null, just skip it
if (items == null || items.Count == 0)
{
return;
}
// Loop through and add the items correctly
foreach(DatItem item in items)
{
// No duplicates
if ((diff & DiffMode.NoDupes) != 0 || (diff & DiffMode.Individuals) != 0)
{
if ((item.Dupe & DupeType.Internal) != 0)
{
// Individual DATs that are output
if ((diff & DiffMode.Individuals) != 0)
{
outDats[item.SystemID].Add(key, item);
}
// Merged no-duplicates DAT
if ((diff & DiffMode.NoDupes) != 0)
{
DatItem newrom = item.Clone() as DatItem;
newrom.Machine.AppendName(" (" + Path.GetFileNameWithoutExtension(inputs[newrom.SystemID].Split('¬')[0]) + ")");
outerDiffData.Add(key, newrom);
}
}
}
// Duplicates only
if ((diff & DiffMode.Dupes) != 0)
{
if ((item.Dupe & DupeType.External) != 0)
{
DatItem newrom = item.Clone() as DatItem;
newrom.Machine.AppendName(" (" + Path.GetFileNameWithoutExtension(inputs[newrom.SystemID].Split('¬')[0]) + ")");
dupeData.Add(key, newrom);
}
}
}
});
watch.Stop();
// Finally, loop through and output each of the DATs
watch.Start("Outputting all created DATs");
// Output the difflist (a-b)+(b-a) diff
if ((diff & DiffMode.NoDupes) != 0)
{
outerDiffData.WriteToFile(outDir);
}
// Output the (ab) diff
if ((diff & DiffMode.Dupes) != 0)
{
dupeData.WriteToFile(outDir);
}
// Output the individual (a-b) DATs
if ((diff & DiffMode.Individuals) != 0)
{
Parallel.For(0, inputs.Count, Globals.ParallelOptions, j =>
{
// If we have an output directory set, replace the path
string[] split = inputs[j].Split('¬');
string path = Path.Combine(outDir,
(split[0] == split[1]
? Path.GetFileName(split[0])
: (Path.GetDirectoryName(split[0]).Remove(0, split[1].Length))));
// Try to output the file
outDats[j].WriteToFile(path);
});
}
watch.Stop();
}
/// <summary>
/// Output user defined merge
/// </summary>
/// <param name="outDir">Output directory to write the DATs to</param>
/// <param name="inputs">List of inputs to write out from</param>
/// <param name="datHeaders">Dat headers used optionally</param>
public void MergeNoDiff(string outDir, List<string> inputs, List<DatFile> datHeaders)
{
// If we're in SuperDAT mode, prefix all games with their respective DATs
if (Type == "SuperDAT")
{
List<string> keys = Keys.ToList();
Parallel.ForEach(keys, Globals.ParallelOptions, key =>
{
List<DatItem> items = this[key].ToList();
List<DatItem> newItems = new List<DatItem>();
foreach (DatItem item in items)
{
DatItem newItem = item;
string filename = inputs[newItem.SystemID].Split('¬')[0];
string rootpath = inputs[newItem.SystemID].Split('¬')[1];
rootpath += (rootpath == "" ? "" : Path.DirectorySeparatorChar.ToString());
filename = filename.Remove(0, rootpath.Length);
newItem.Machine.UpdateName(Path.GetDirectoryName(filename) + Path.DirectorySeparatorChar
+ Path.GetFileNameWithoutExtension(filename) + Path.DirectorySeparatorChar
+ newItem.Machine.Name);
newItems.Add(newItem);
}
Remove(key);
AddRange(key, newItems);
});
}
// Try to output the file
WriteToFile(outDir);
}
/// <summary>
/// Convert, update, and filter a DAT file or set of files using a base
/// </summary>
/// <param name="inputFileNames">Names of the input files and/or folders</param>
/// <param name="outDir">Optional param for output directory</param>
/// <param name="merge">True if input files should be merged into a single file, false otherwise</param>
/// <param name="diff">Non-zero flag for diffing mode, zero otherwise</param>
/// <param name="inplace">True if the output files should overwrite their inputs, false otherwise</param>
/// <param name="skip">True if the first cascaded diff file should be skipped on output, false otherwise</param>
/// <param name="bare">True if the date should not be appended to the default name, false otherwise [OBSOLETE]</param>
/// <param name="clean">True to clean the game names to WoD standard, false otherwise (default)</param>
/// <param name="remUnicode">True if we should remove non-ASCII characters from output, false otherwise (default)</param>
/// <param name="descAsName">True to allow SL DATs to have game names used instead of descriptions, false otherwise (default)</param>
/// <param name="filter">Filter object to be passed to the DatItem level</param>
/// <param name="splitType">Type of the split that should be performed (split, merged, fully merged)</param>
/// <param name="trim">True if we are supposed to trim names to NTFS length, false otherwise</param>
/// <param name="single">True if all games should be replaced by '!', false otherwise</param>
/// <param name="root">String representing root directory to compare against for length calculation</param>
public void Update(List<string> inputFileNames, string outDir, bool inplace, bool clean, bool remUnicode, bool descAsName,
Filter filter, SplitType splitType, bool trim, bool single, string root)
{
for (int i = 0; i < inputFileNames.Count; i++)
{
// Get the input file name
string inputFileName = inputFileNames[i];
// Clean the input string
if (inputFileName != "")
{
inputFileName = Path.GetFullPath(inputFileName);
}
if (File.Exists(inputFileName))
{
// If inplace is set, override the output dir
string realOutDir = outDir;
if (inplace)
{
realOutDir = Path.GetDirectoryName(inputFileName);
}
DatFile innerDatdata = new DatFile(this);
Globals.Logger.User("Processing '{0}'", Path.GetFileName(inputFileName));
innerDatdata.Parse(inputFileName, 0, 0, splitType, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName,
keepext: ((innerDatdata.DatFormat & DatFormat.TSV) != 0 || (innerDatdata.DatFormat & DatFormat.CSV) != 0));
innerDatdata.Filter(filter, trim, single, root);
// Try to output the file
innerDatdata.WriteToFile((realOutDir == Environment.CurrentDirectory ? Path.GetDirectoryName(inputFileName) : realOutDir), overwrite: (realOutDir != Environment.CurrentDirectory));
}
else if (Directory.Exists(inputFileName))
{
inputFileName = Path.GetFullPath(inputFileName) + Path.DirectorySeparatorChar;
// If inplace is set, override the output dir
string realOutDir = outDir;
if (inplace)
{
realOutDir = Path.GetDirectoryName(inputFileName);
}
List<string> subFiles = Directory.EnumerateFiles(inputFileName, "*", SearchOption.AllDirectories).ToList();
Parallel.ForEach(subFiles, Globals.ParallelOptions, file =>
{
Globals.Logger.User("Processing '{0}'", Path.GetFullPath(file).Remove(0, inputFileName.Length));
DatFile innerDatdata = new DatFile(this);
innerDatdata.Parse(file, 0, 0, splitType, keep: true, clean: clean, remUnicode: remUnicode, descAsName: descAsName,
keepext: ((innerDatdata.DatFormat & DatFormat.TSV) != 0 || (innerDatdata.DatFormat & DatFormat.CSV) != 0));
innerDatdata.Filter(filter, trim, single, root);
// Try to output the file
innerDatdata.WriteToFile((realOutDir == Environment.CurrentDirectory ? Path.GetDirectoryName(file) : realOutDir + Path.GetDirectoryName(file).Remove(0, inputFileName.Length - 1)),
overwrite: (realOutDir != Environment.CurrentDirectory));
});
}
else
{
Globals.Logger.Error("I'm sorry but '{0}' doesn't exist!", inputFileName);
return;
}
}
}
#endregion
}
}