Move parsing to new class

This commit is contained in:
Matt Nadareski
2020-12-10 10:58:00 -08:00
parent 34b9005e58
commit 2ee6d13a77
16 changed files with 206 additions and 165 deletions

View File

@@ -705,7 +705,8 @@ CREATE TABLE IF NOT EXISTS dat (
// Parse the Dat if possible
logger.User($"Adding from '{dat.Name}'");
DatFile tempdat = DatFile.CreateAndParse(fullpath);
DatTool dt = new DatTool();
DatFile tempdat = dt.CreateAndParse(fullpath);
// If the Dat wasn't empty, add the information
SqliteCommand slc = null;

View File

@@ -46,11 +46,14 @@ structure according to the original DAT master directory tree structure.";
if (string.IsNullOrWhiteSpace(outdat))
outdat = "out";
// Get the DatTool for parsing
DatTool dt = new DatTool();
// Now that we have the dictionary, we can loop through and output to a new folder for each
foreach (string key in foundDats.Keys)
{
// Get the DAT file associated with the key
DatFile datFile = DatFile.CreateAndParse(Path.Combine(_dats, foundDats[key]));
DatFile datFile = dt.CreateAndParse(Path.Combine(_dats, foundDats[key]));
// Set the depot values
datFile.Header.InputDepot = new DepotInformation(true, 4);

View File

@@ -55,14 +55,17 @@ in -old DAT file. Ignores those entries in -old that are not in -new.";
return;
}
// Get the DatTool for parsing
DatTool dt = new DatTool();
// Create the encapsulating datfile
DatFile datfile = DatFile.Create();
datfile.Header.Name = name;
datfile.Header.Description = description;
datfile.Parse(olddat);
dt.ParseInto(datfile, olddat);
// Diff against the new datfile
DatFile intDat = DatFile.CreateAndParse(newdat);
DatFile intDat = dt.CreateAndParse(newdat);
datfile.DiffAgainst(intDat, false);
intDat.Write(outdat);
}

View File

@@ -50,11 +50,14 @@ namespace RombaSharp.Features
return;
}
// Get the DatTool for parsing
DatTool dt = new DatTool();
// Create the encapsulating datfile
DatFile datfile = DatFile.CreateAndParse(olddat);
DatFile datfile = dt.CreateAndParse(olddat);
// Diff against the new datfile
DatFile intDat = DatFile.CreateAndParse(newdat);
DatFile intDat = dt.CreateAndParse(newdat);
datfile.DiffAgainst(intDat, false);
intDat.Write(outdat);
}

View File

@@ -33,11 +33,14 @@ namespace RombaSharp.Features
// Create the new output directory if it doesn't exist
DirectoryExtensions.Ensure(Path.Combine(Globals.ExeDir, "out"), create: true);
// Get the DatTool for parsing
DatTool dt = new DatTool();
// Now that we have the dictionary, we can loop through and output to a new folder for each
foreach (string key in foundDats.Keys)
{
// Get the DAT file associated with the key
DatFile datFile = DatFile.CreateAndParse(Path.Combine(_dats, foundDats[key]));
DatFile datFile = dt.CreateAndParse(Path.Combine(_dats, foundDats[key]));
// Now loop through and see if all of the hash combinations exist in the database
/* ended here */

View File

@@ -454,6 +454,7 @@ namespace SabreTools.DatFiles
/// <returns>List of DatHeader objects representing headers</returns>
public List<DatHeader> PopulateUserData(List<ParentablePath> inputs)
{
DatTool dt = new DatTool();
DatFile[] datFiles = new DatFile[inputs.Count];
InternalStopwatch watch = new InternalStopwatch("Processing individual DATs");
@@ -463,7 +464,7 @@ namespace SabreTools.DatFiles
var input = inputs[i];
logger.User($"Adding DAT: {input.CurrentPath}");
datFiles[i] = Create(Header.CloneFiltering());
datFiles[i].Parse(input, i, keep: true);
dt.ParseInto(datFiles[i], input, i, keep: true);
});
watch.Stop();

View File

@@ -1,9 +1,11 @@
using System.IO;
using System;
using System.IO;
using System.Linq;
using System.Xml.Serialization;
using SabreTools.Core;
using SabreTools.DatFiles.Formats;
using SabreTools.DatItems;
using SabreTools.Logging;
using Newtonsoft.Json;
@@ -149,7 +151,7 @@ namespace SabreTools.DatFiles
return new SabreXML(baseDat);
case DatFormat.SoftwareList:
return new SoftwareList(baseDat);
return new Formats.SoftwareList(baseDat);
case DatFormat.SSV:
return new SeparatedValue(baseDat, ';');
@@ -237,5 +239,141 @@ namespace SabreTools.DatFiles
}
#endregion
#region Parsing
/// <summary>
/// Parse DatFile and return all found games and roms within
/// </summary>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="indexId">Index ID for the DAT</param>
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
public abstract void ParseFile(string filename, int indexId, bool keep, bool throwOnError = false);
/// <summary>
/// Add a rom to the Dat after checking
/// </summary>
/// <param name="item">Item data to check against</param>
/// <returns>The key for the item</returns>
protected string ParseAddHelper(DatItem item)
{
string key = string.Empty;
// If we have a Disk, Media, or Rom, clean the hash data
if (item.ItemType == ItemType.Disk)
{
Disk disk = item as Disk;
// If the file has aboslutely no hashes, skip and log
if (disk.ItemStatus != ItemStatus.Nodump
&& string.IsNullOrWhiteSpace(disk.MD5)
&& string.IsNullOrWhiteSpace(disk.SHA1))
{
logger.Verbose($"Incomplete entry for '{disk.Name}' will be output as nodump");
disk.ItemStatus = ItemStatus.Nodump;
}
item = disk;
}
else if (item.ItemType == ItemType.Rom)
{
Rom rom = item as Rom;
// If we have the case where there is SHA-1 and nothing else, we don't fill in any other part of the data
if (rom.Size == null && !rom.HasHashes())
{
// No-op, just catch it so it doesn't go further
logger.Verbose($"{Header.FileName}: Entry with only SHA-1 found - '{rom.Name}'");
}
// If we have a rom and it's missing size AND the hashes match a 0-byte file, fill in the rest of the info
else if ((rom.Size == 0 || rom.Size == null)
&& (string.IsNullOrWhiteSpace(rom.CRC) || rom.HasZeroHash()))
{
// TODO: All instances of Hash.DeepHashes should be made into 0x0 eventually
rom.Size = Constants.SizeZero;
rom.CRC = Constants.CRCZero;
rom.MD5 = Constants.MD5Zero;
#if NET_FRAMEWORK
rom.RIPEMD160 = null; // Constants.RIPEMD160Zero;
#endif
rom.SHA1 = Constants.SHA1Zero;
rom.SHA256 = null; // Constants.SHA256Zero;
rom.SHA384 = null; // Constants.SHA384Zero;
rom.SHA512 = null; // Constants.SHA512Zero;
rom.SpamSum = null; // Constants.SpamSumZero;
}
// If the file has no size and it's not the above case, skip and log
else if (rom.ItemStatus != ItemStatus.Nodump && (rom.Size == 0 || rom.Size == null))
{
logger.Verbose($"{Header.FileName}: Incomplete entry for '{rom.Name}' will be output as nodump");
rom.ItemStatus = ItemStatus.Nodump;
}
// If the file has a size but aboslutely no hashes, skip and log
else if (rom.ItemStatus != ItemStatus.Nodump
&& rom.Size != null && rom.Size > 0
&& !rom.HasHashes())
{
logger.Verbose($"{Header.FileName}: Incomplete entry for '{rom.Name}' will be output as nodump");
rom.ItemStatus = ItemStatus.Nodump;
}
item = rom;
}
// Get the key and add the file
key = item.GetKey(Field.Machine_Name);
Items.Add(key, item);
return key;
}
#region Input Sanitization
/// <summary>
/// Get a sanitized Date from an input string
/// </summary>
/// <param name="input">String to get value from</param>
/// <returns>Date as a string, if possible</returns>
protected string CleanDate(string input)
{
// Null in, null out
if (input == null)
return null;
string date = string.Empty;
if (input != null)
{
if (DateTime.TryParse(input, out DateTime dateTime))
date = dateTime.ToString();
else
date = input;
}
return date;
}
/// <summary>
/// Clean a hash string from a Listrom DAT
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanListromHashData(string hash)
{
if (hash.StartsWith("CRC"))
return hash.Substring(4, 8).ToLowerInvariant();
else if (hash.StartsWith("SHA1"))
return hash.Substring(5, 40).ToLowerInvariant();
return hash;
}
#endregion
#endregion
}
}

View File

@@ -15,6 +15,7 @@ namespace SabreTools.DatFiles
{
// TODO: See if any of the methods can be broken up a bit more neatly
// TODO: See if any of this can be more stateful given the inputted DatFile
// TODO: Re-evaluate if these should be made static instead of instanced
public partial class DatTool
{
/// <summary>

View File

@@ -3,36 +3,38 @@ using System.IO;
using System.Text.RegularExpressions;
using SabreTools.Core;
using SabreTools.DatItems;
using SabreTools.IO;
// This file represents all methods related to parsing from a file
namespace SabreTools.DatFiles
{
public abstract partial class DatFile
// TODO: Re-evaluate if these should be made static instead of instanced
public partial class DatTool
{
/// <summary>
/// Create a DatFile and parse a file into it
/// </summary>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
public static DatFile CreateAndParse(string filename, bool throwOnError = false)
public DatFile CreateAndParse(string filename, bool throwOnError = false)
{
DatFile datFile = Create();
datFile.Parse(new ParentablePath(filename), throwOnError: throwOnError);
DatFile datFile = DatFile.Create();
ParseInto(datFile, new ParentablePath(filename), throwOnError: throwOnError);
return datFile;
}
/// <summary>
/// Parse a DAT and return all found games and roms within
/// </summary>
/// <param name="datFile">Current DatFile object to add to</param>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="indexId">Index ID for the DAT</param>
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
/// <param name="keepext">True if original extension should be kept, false otherwise (default)</param>
/// <param name="quotes">True if quotes are assumed in supported types (default), false otherwise</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
public void Parse(
public void ParseInto(
DatFile datFile,
string filename,
int indexId = 0,
bool keep = false,
@@ -41,19 +43,21 @@ namespace SabreTools.DatFiles
bool throwOnError = false)
{
ParentablePath path = new ParentablePath(filename.Trim('"'));
Parse(path, indexId, keep, keepext, quotes, throwOnError);
ParseInto(datFile, path, indexId, keep, keepext, quotes, throwOnError);
}
/// <summary>
/// Parse a DAT and return all found games and roms within
/// </summary>
/// <param name="datFile">Current DatFile object to add to</param>
/// <param name="input">Name of the file to be parsed</param>
/// <param name="indexId">Index ID for the DAT</param>
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
/// <param name="keepext">True if original extension should be kept, false otherwise (default)</param>
/// <param name="quotes">True if quotes are assumed in supported types (default), false otherwise</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
public void Parse(
public void ParseInto(
DatFile datFile,
ParentablePath input,
int indexId = 0,
bool keep = false,
@@ -69,17 +73,21 @@ namespace SabreTools.DatFiles
return;
// If the output filename isn't set already, get the internal filename
Header.FileName = (string.IsNullOrWhiteSpace(Header.FileName) ? (keepext ? Path.GetFileName(currentPath) : Path.GetFileNameWithoutExtension(currentPath)) : Header.FileName);
datFile.Header.FileName = string.IsNullOrWhiteSpace(datFile.Header.FileName)
? (keepext
? Path.GetFileName(currentPath)
: Path.GetFileNameWithoutExtension(currentPath))
: datFile.Header.FileName;
// If the output type isn't set already, get the internal output type
DatFormat currentPathFormat = GetDatFormat(currentPath);
Header.DatFormat = (Header.DatFormat == 0 ? currentPathFormat : Header.DatFormat);
Items.SetBucketedBy(Field.DatItem_CRC); // Setting this because it can reduce issues later
datFile.Header.DatFormat = datFile.Header.DatFormat == 0 ? currentPathFormat : datFile.Header.DatFormat;
datFile.Items.SetBucketedBy(Field.DatItem_CRC); // Setting this because it can reduce issues later
// Now parse the correct type of DAT
try
{
Create(currentPathFormat, this, quotes)?.ParseFile(currentPath, indexId, keep, throwOnError);
DatFile.Create(currentPathFormat, datFile, quotes)?.ParseFile(currentPath, indexId, keep, throwOnError);
}
catch (Exception ex)
{
@@ -93,7 +101,7 @@ namespace SabreTools.DatFiles
/// </summary>
/// <param name="filename">Name of the file to be parsed</param>
/// <returns>The DatFormat corresponding to the DAT</returns>
protected DatFormat GetDatFormat(string filename)
private DatFormat GetDatFormat(string filename)
{
// Limit the output formats based on extension
if (!PathExtensions.HasValidDatExtension(filename))
@@ -223,137 +231,5 @@ namespace SabreTools.DatFiles
else
return DatFormat.ClrMamePro;
}
/// <summary>
/// Add a rom to the Dat after checking
/// </summary>
/// <param name="item">Item data to check against</param>
/// <returns>The key for the item</returns>
protected string ParseAddHelper(DatItem item)
{
string key = string.Empty;
// If we have a Disk, Media, or Rom, clean the hash data
if (item.ItemType == ItemType.Disk)
{
Disk disk = item as Disk;
// If the file has aboslutely no hashes, skip and log
if (disk.ItemStatus != ItemStatus.Nodump
&& string.IsNullOrWhiteSpace(disk.MD5)
&& string.IsNullOrWhiteSpace(disk.SHA1))
{
logger.Verbose($"Incomplete entry for '{disk.Name}' will be output as nodump");
disk.ItemStatus = ItemStatus.Nodump;
}
item = disk;
}
else if (item.ItemType == ItemType.Rom)
{
Rom rom = item as Rom;
// If we have the case where there is SHA-1 and nothing else, we don't fill in any other part of the data
if (rom.Size == null && !rom.HasHashes())
{
// No-op, just catch it so it doesn't go further
logger.Verbose($"{Header.FileName}: Entry with only SHA-1 found - '{rom.Name}'");
}
// If we have a rom and it's missing size AND the hashes match a 0-byte file, fill in the rest of the info
else if ((rom.Size == 0 || rom.Size == null)
&& (string.IsNullOrWhiteSpace(rom.CRC) || rom.HasZeroHash()))
{
// TODO: All instances of Hash.DeepHashes should be made into 0x0 eventually
rom.Size = Constants.SizeZero;
rom.CRC = Constants.CRCZero;
rom.MD5 = Constants.MD5Zero;
#if NET_FRAMEWORK
rom.RIPEMD160 = null; // Constants.RIPEMD160Zero;
#endif
rom.SHA1 = Constants.SHA1Zero;
rom.SHA256 = null; // Constants.SHA256Zero;
rom.SHA384 = null; // Constants.SHA384Zero;
rom.SHA512 = null; // Constants.SHA512Zero;
rom.SpamSum = null; // Constants.SpamSumZero;
}
// If the file has no size and it's not the above case, skip and log
else if (rom.ItemStatus != ItemStatus.Nodump && (rom.Size == 0 || rom.Size == null))
{
logger.Verbose($"{Header.FileName}: Incomplete entry for '{rom.Name}' will be output as nodump");
rom.ItemStatus = ItemStatus.Nodump;
}
// If the file has a size but aboslutely no hashes, skip and log
else if (rom.ItemStatus != ItemStatus.Nodump
&& rom.Size != null && rom.Size > 0
&& !rom.HasHashes())
{
logger.Verbose($"{Header.FileName}: Incomplete entry for '{rom.Name}' will be output as nodump");
rom.ItemStatus = ItemStatus.Nodump;
}
item = rom;
}
// Get the key and add the file
key = item.GetKey(Field.Machine_Name);
Items.Add(key, item);
return key;
}
/// <summary>
/// Parse DatFile and return all found games and roms within
/// </summary>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="indexId">Index ID for the DAT</param>
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
protected abstract void ParseFile(string filename, int indexId, bool keep, bool throwOnError = false);
#region Input Sanitization
/// <summary>
/// Get a sanitized Date from an input string
/// </summary>
/// <param name="input">String to get value from</param>
/// <returns>Date as a string, if possible</returns>
protected string CleanDate(string input)
{
// Null in, null out
if (input == null)
return null;
string date = string.Empty;
if (input != null)
{
if (DateTime.TryParse(input, out DateTime dateTime))
date = dateTime.ToString();
else
date = input;
}
return date;
}
/// <summary>
/// Clean a hash string from a Listrom DAT
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanListromHashData(string hash)
{
if (hash.StartsWith("CRC"))
return hash.Substring(4, 8).ToLowerInvariant();
else if (hash.StartsWith("SHA1"))
return hash.Substring(5, 40).ToLowerInvariant();
return hash;
}
#endregion
}
}

View File

@@ -1395,6 +1395,9 @@ namespace SabreTools.DatFiles
string basepath = null;
ItemDictionary dirStats = new ItemDictionary();
// Get the DatTool for parsing
DatTool dt = new DatTool();
// Now process each of the input files
foreach (ParentablePath file in files)
{
@@ -1425,7 +1428,7 @@ namespace SabreTools.DatFiles
staticLogger.Verbose($"Beginning stat collection for '{file.CurrentPath}'");
List<string> games = new List<string>();
DatFile datdata = DatFile.CreateAndParse(file.CurrentPath);
DatFile datdata = dt.CreateAndParse(file.CurrentPath);
datdata.Items.BucketBy(Field.Machine_Name, DedupeType.None, norename: true);
// Output single DAT stats (if asked)

View File

@@ -134,7 +134,7 @@ Reset the internal state: reset();";
// Assume there could be multiple
foreach (ParentablePath datFilePath in datFilePaths)
{
datFile.Parse(datFilePath, index++);
dt.ParseInto(datFile, datFilePath, index++);
}
break;

View File

@@ -84,13 +84,16 @@ namespace SabreTools.Features
var datfiles = GetList(features, DatListValue);
var datfilePaths = DirectoryExtensions.GetFilesOnly(datfiles);
// Get the DatTool for parsing
DatTool dt = new DatTool();
// If we are in individual mode, process each DAT on their own, appending the DAT name to the output dir
if (GetBoolean(features, IndividualValue))
{
foreach (ParentablePath datfile in datfilePaths)
{
DatFile datdata = DatFile.Create();
datdata.Parse(datfile, int.MaxValue, keep: true);
dt.ParseInto(datdata, datfile, int.MaxValue, keep: true);
// Set depot information
datdata.Header.InputDepot = Header.InputDepot.Clone() as DepotInformation;
@@ -128,7 +131,7 @@ namespace SabreTools.Features
DatFile datdata = DatFile.Create();
foreach (ParentablePath datfile in datfilePaths)
{
datdata.Parse(datfile, int.MaxValue, keep: true);
dt.ParseInto(datdata, datfile, int.MaxValue, keep: true);
}
// Set depot information

View File

@@ -50,12 +50,15 @@ namespace SabreTools.Features
// Get only files from the inputs
List<ParentablePath> files = DirectoryExtensions.GetFilesOnly(Inputs, appendparent: true);
// Get the DatTool for parsing
DatTool dt = new DatTool();
// Loop over the input files
foreach (ParentablePath file in files)
{
// Create and fill the new DAT
DatFile internalDat = DatFile.Create(Header);
internalDat.Parse(file);
dt.ParseInto(internalDat, file);
// Get the output directory
OutputDir = file.GetOutputPath(OutputDir, GetBoolean(features, InplaceValue));

View File

@@ -152,6 +152,9 @@ namespace SabreTools.Features
List<ParentablePath> inputPaths = DirectoryExtensions.GetFilesOnly(Inputs, appendparent: true);
List<ParentablePath> basePaths = DirectoryExtensions.GetFilesOnly(GetList(features, BaseDatListValue));
// Get the DatTool for parsing
DatTool dt = new DatTool();
// If we're in standard update mode, run through all of the inputs
if (updateMode == UpdateMode.None)
{
@@ -161,7 +164,7 @@ namespace SabreTools.Features
// Create a new base DatFile
DatFile datFile = DatFile.Create(Header);
logger.User($"Processing '{Path.GetFileName(inputPath.CurrentPath)}'");
datFile.Parse(inputPath, keep: true,
dt.ParseInto(datFile, inputPath, keep: true,
keepext: datFile.Header.DatFormat.HasFlag(DatFormat.TSV)
|| datFile.Header.DatFormat.HasFlag(DatFormat.CSV)
|| datFile.Header.DatFormat.HasFlag(DatFormat.SSV));
@@ -294,7 +297,7 @@ namespace SabreTools.Features
{
// Parse the path to a new DatFile
DatFile repDat = DatFile.Create(userInputDat.Header.CloneFiltering());
repDat.Parse(inputPath, indexId: 1, keep: true);
dt.ParseInto(repDat, inputPath, indexId: 1, keep: true);
// Perform additional processing steps
repDat.ApplyExtras(Extras);
@@ -319,7 +322,7 @@ namespace SabreTools.Features
{
// Parse the path to a new DatFile
DatFile repDat = DatFile.Create(userInputDat.Header.CloneFiltering());
repDat.Parse(inputPath, indexId: 1, keep: true);
dt.ParseInto(repDat, inputPath, indexId: 1, keep: true);
// Perform additional processing steps
repDat.ApplyExtras(Extras);

View File

@@ -61,7 +61,7 @@ namespace SabreTools.Features
{
// Parse in from the file
DatFile datdata = DatFile.Create();
datdata.Parse(datfile, int.MaxValue, keep: true);
dt.ParseInto(datdata, datfile, int.MaxValue, keep: true);
// Perform additional processing steps
datdata.ApplyExtras(Extras);
@@ -107,7 +107,7 @@ namespace SabreTools.Features
DatFile datdata = DatFile.Create();
foreach (ParentablePath datfile in datfilePaths)
{
datdata.Parse(datfile, int.MaxValue, keep: true);
dt.ParseInto(datdata, datfile, int.MaxValue, keep: true);
}
// Perform additional processing steps