2020-12-09 21:52:38 -08:00
|
|
|
using System;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Text.RegularExpressions;
|
2020-12-10 23:24:09 -08:00
|
|
|
using SabreTools.Core.Tools;
|
|
|
|
|
using SabreTools.DatFiles;
|
2020-12-14 15:43:01 -08:00
|
|
|
using SabreTools.DatItems;
|
2020-12-09 21:52:38 -08:00
|
|
|
using SabreTools.IO;
|
2020-12-10 13:53:34 -08:00
|
|
|
using SabreTools.Logging;
|
2020-12-09 21:52:38 -08:00
|
|
|
|
2020-12-10 23:24:09 -08:00
|
|
|
namespace SabreTools.DatTools
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
2020-12-21 11:38:56 -08:00
|
|
|
/// <summary>
|
|
|
|
|
/// Helper methods for parsing into DatFiles
|
|
|
|
|
/// </summary>
|
2020-12-10 13:53:34 -08:00
|
|
|
public class Parser
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
2020-12-10 13:53:34 -08:00
|
|
|
#region Logging
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Logging object
|
|
|
|
|
/// </summary>
|
2023-04-19 16:39:58 -04:00
|
|
|
private static readonly Logger logger = new();
|
2020-12-10 13:53:34 -08:00
|
|
|
|
|
|
|
|
#endregion
|
|
|
|
|
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <summary>
|
|
|
|
|
/// Create a DatFile and parse a file into it
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="filename">Name of the file to be parsed</param>
|
2020-12-23 13:55:09 -08:00
|
|
|
/// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
|
2024-03-05 13:32:49 -05:00
|
|
|
public static DatFile CreateAndParse(string? filename, bool statsOnly = false, bool throwOnError = false)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
2024-03-05 13:32:49 -05:00
|
|
|
// Null filenames are invalid
|
|
|
|
|
if (filename == null)
|
|
|
|
|
return DatFile.Create();
|
|
|
|
|
|
2020-12-10 10:58:00 -08:00
|
|
|
DatFile datFile = DatFile.Create();
|
2020-12-23 13:55:09 -08:00
|
|
|
ParseInto(datFile, new ParentablePath(filename), statsOnly: statsOnly, throwOnError: throwOnError);
|
2020-12-09 21:52:38 -08:00
|
|
|
return datFile;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Parse a DAT and return all found games and roms within
|
|
|
|
|
/// </summary>
|
2020-12-10 10:58:00 -08:00
|
|
|
/// <param name="datFile">Current DatFile object to add to</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="filename">Name of the file to be parsed</param>
|
|
|
|
|
/// <param name="indexId">Index ID for the DAT</param>
|
|
|
|
|
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
|
|
|
|
|
/// <param name="keepext">True if original extension should be kept, false otherwise (default)</param>
|
|
|
|
|
/// <param name="quotes">True if quotes are assumed in supported types (default), false otherwise</param>
|
2020-12-23 13:55:09 -08:00
|
|
|
/// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
|
2020-12-10 11:58:46 -08:00
|
|
|
public static void ParseInto(
|
2020-12-10 10:58:00 -08:00
|
|
|
DatFile datFile,
|
2020-12-09 21:52:38 -08:00
|
|
|
string filename,
|
|
|
|
|
int indexId = 0,
|
|
|
|
|
bool keep = false,
|
|
|
|
|
bool keepext = false,
|
|
|
|
|
bool quotes = true,
|
2020-12-23 13:55:09 -08:00
|
|
|
bool statsOnly = false,
|
2020-12-09 21:52:38 -08:00
|
|
|
bool throwOnError = false)
|
|
|
|
|
{
|
2023-04-19 16:39:58 -04:00
|
|
|
ParentablePath path = new(filename.Trim('"'));
|
2020-12-23 13:55:09 -08:00
|
|
|
ParseInto(datFile, path, indexId, keep, keepext, quotes, statsOnly, throwOnError);
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Parse a DAT and return all found games and roms within
|
|
|
|
|
/// </summary>
|
2020-12-10 10:58:00 -08:00
|
|
|
/// <param name="datFile">Current DatFile object to add to</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="input">Name of the file to be parsed</param>
|
|
|
|
|
/// <param name="indexId">Index ID for the DAT</param>
|
|
|
|
|
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
|
|
|
|
|
/// <param name="keepext">True if original extension should be kept, false otherwise (default)</param>
|
|
|
|
|
/// <param name="quotes">True if quotes are assumed in supported types (default), false otherwise</param>
|
2020-12-23 13:55:09 -08:00
|
|
|
/// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
|
2020-12-10 11:58:46 -08:00
|
|
|
public static void ParseInto(
|
2020-12-10 10:58:00 -08:00
|
|
|
DatFile datFile,
|
2020-12-09 21:52:38 -08:00
|
|
|
ParentablePath input,
|
|
|
|
|
int indexId = 0,
|
|
|
|
|
bool keep = false,
|
|
|
|
|
bool keepext = false,
|
|
|
|
|
bool quotes = true,
|
2020-12-23 13:55:09 -08:00
|
|
|
bool statsOnly = false,
|
2020-12-09 21:52:38 -08:00
|
|
|
bool throwOnError = true)
|
|
|
|
|
{
|
|
|
|
|
// Get the current path from the filename
|
|
|
|
|
string currentPath = input.CurrentPath;
|
|
|
|
|
|
|
|
|
|
// Check the file extension first as a safeguard
|
2020-12-10 23:24:09 -08:00
|
|
|
if (!Utilities.HasValidDatExtension(currentPath))
|
2020-12-09 21:52:38 -08:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// If the output filename isn't set already, get the internal filename
|
2024-03-10 21:41:49 -04:00
|
|
|
datFile.Header.SetFieldValue<string?>(DatHeader.FileNameKey, string.IsNullOrEmpty(datFile.Header.GetFieldValue<string?>(DatHeader.FileNameKey))
|
2020-12-10 10:58:00 -08:00
|
|
|
? (keepext
|
|
|
|
|
? Path.GetFileName(currentPath)
|
|
|
|
|
: Path.GetFileNameWithoutExtension(currentPath))
|
2024-03-10 21:41:49 -04:00
|
|
|
: datFile.Header.GetFieldValue<string?>(DatHeader.FileNameKey));
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// If the output type isn't set already, get the internal output type
|
|
|
|
|
DatFormat currentPathFormat = GetDatFormat(currentPath);
|
2024-03-10 21:54:07 -04:00
|
|
|
datFile.Header.SetFieldValue<DatFormat>(DatHeader.DatFormatKey, datFile.Header.GetFieldValue<DatFormat>(DatHeader.DatFormatKey) == 0
|
|
|
|
|
? currentPathFormat
|
|
|
|
|
: datFile.Header.GetFieldValue<DatFormat>(DatHeader.DatFormatKey));
|
2020-12-14 15:43:01 -08:00
|
|
|
datFile.Items.SetBucketedBy(ItemKey.CRC); // Setting this because it can reduce issues later
|
2024-02-29 00:14:16 -05:00
|
|
|
|
2023-04-19 16:39:58 -04:00
|
|
|
InternalStopwatch watch = new($"Parsing '{currentPath}' into internal DAT");
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Now parse the correct type of DAT
|
|
|
|
|
try
|
|
|
|
|
{
|
2020-12-23 13:55:09 -08:00
|
|
|
var parsingDatFile = DatFile.Create(currentPathFormat, datFile, quotes);
|
|
|
|
|
parsingDatFile?.ParseFile(currentPath, indexId, keep, statsOnly: statsOnly, throwOnError: throwOnError);
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
2021-01-12 15:54:14 -08:00
|
|
|
catch (Exception ex) when (!throwOnError)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
|
|
|
|
logger.Error(ex, $"Error with file '{currentPath}'");
|
|
|
|
|
}
|
2021-02-02 14:09:49 -08:00
|
|
|
|
|
|
|
|
watch.Stop();
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Get what type of DAT the input file is
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="filename">Name of the file to be parsed</param>
|
|
|
|
|
/// <returns>The DatFormat corresponding to the DAT</returns>
|
2020-12-10 11:58:46 -08:00
|
|
|
private static DatFormat GetDatFormat(string filename)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
|
|
|
|
// Limit the output formats based on extension
|
2020-12-10 23:24:09 -08:00
|
|
|
if (!Utilities.HasValidDatExtension(filename))
|
2020-12-09 21:52:38 -08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
// Get the extension from the filename
|
2024-02-28 19:19:50 -05:00
|
|
|
string? ext = filename.GetNormalizedExtension();
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Check if file exists
|
|
|
|
|
if (!File.Exists(filename))
|
|
|
|
|
return 0;
|
2024-02-29 00:14:16 -05:00
|
|
|
|
2020-12-09 21:52:38 -08:00
|
|
|
// Some formats should only require the extension to know
|
|
|
|
|
switch (ext)
|
|
|
|
|
{
|
|
|
|
|
case "csv":
|
|
|
|
|
return DatFormat.CSV;
|
|
|
|
|
case "json":
|
|
|
|
|
return DatFormat.SabreJSON;
|
|
|
|
|
case "md5":
|
|
|
|
|
return DatFormat.RedumpMD5;
|
|
|
|
|
case "sfv":
|
|
|
|
|
return DatFormat.RedumpSFV;
|
|
|
|
|
case "sha1":
|
|
|
|
|
return DatFormat.RedumpSHA1;
|
|
|
|
|
case "sha256":
|
|
|
|
|
return DatFormat.RedumpSHA256;
|
|
|
|
|
case "sha384":
|
|
|
|
|
return DatFormat.RedumpSHA384;
|
|
|
|
|
case "sha512":
|
|
|
|
|
return DatFormat.RedumpSHA512;
|
|
|
|
|
case "spamsum":
|
|
|
|
|
return DatFormat.RedumpSpamSum;
|
|
|
|
|
case "ssv":
|
|
|
|
|
return DatFormat.SSV;
|
|
|
|
|
case "tsv":
|
|
|
|
|
return DatFormat.TSV;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// For everything else, we need to read it
|
|
|
|
|
// Get the first two non-whitespace, non-comment lines to check, if possible
|
|
|
|
|
string first = string.Empty, second = string.Empty;
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
2020-12-14 16:01:28 -08:00
|
|
|
using StreamReader sr = File.OpenText(filename);
|
2021-07-18 13:10:48 -07:00
|
|
|
first = FindNextLine(sr);
|
|
|
|
|
second = FindNextLine(sr);
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
|
|
|
|
catch { }
|
|
|
|
|
|
|
|
|
|
// If we have an XML-based DAT
|
|
|
|
|
if (first.Contains("<?xml") && first.Contains("?>"))
|
|
|
|
|
{
|
|
|
|
|
if (second.StartsWith("<!doctype datafile"))
|
|
|
|
|
return DatFormat.Logiqx;
|
2023-04-03 12:39:31 -04:00
|
|
|
else if (second.StartsWith("<datafile xmlns:xsi"))
|
|
|
|
|
return DatFormat.Logiqx;
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
else if (second.StartsWith("<!doctype mame")
|
|
|
|
|
|| second.StartsWith("<!doctype m1")
|
|
|
|
|
|| second.StartsWith("<mame")
|
|
|
|
|
|| second.StartsWith("<m1"))
|
|
|
|
|
return DatFormat.Listxml;
|
|
|
|
|
|
|
|
|
|
else if (second.StartsWith("<!doctype softwaredb"))
|
|
|
|
|
return DatFormat.OpenMSX;
|
|
|
|
|
|
|
|
|
|
else if (second.StartsWith("<!doctype softwarelist"))
|
|
|
|
|
return DatFormat.SoftwareList;
|
|
|
|
|
|
|
|
|
|
else if (second.StartsWith("<!doctype sabredat"))
|
|
|
|
|
return DatFormat.SabreXML;
|
|
|
|
|
|
|
|
|
|
else if ((second.StartsWith("<dat") && !second.StartsWith("<datafile"))
|
|
|
|
|
|| second.StartsWith("<?xml-stylesheet"))
|
|
|
|
|
return DatFormat.OfflineList;
|
2024-02-29 00:14:16 -05:00
|
|
|
|
2021-07-19 10:39:21 -07:00
|
|
|
else if (second.StartsWith("<files"))
|
|
|
|
|
return DatFormat.ArchiveDotOrg;
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Older and non-compliant DATs
|
|
|
|
|
else
|
|
|
|
|
return DatFormat.Logiqx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we have an SMDB (SHA-256, Filename, SHA-1, MD5, CRC32)
|
|
|
|
|
else if (Regex.IsMatch(first, @"[0-9a-f]{64}\t.*?\t[0-9a-f]{40}\t[0-9a-f]{32}\t[0-9a-f]{8}"))
|
|
|
|
|
return DatFormat.EverdriveSMDB;
|
|
|
|
|
|
|
|
|
|
// If we have an INI-based DAT
|
2024-02-29 00:14:16 -05:00
|
|
|
#if NETFRAMEWORK
|
|
|
|
|
else if (first.Contains("[") && first.Contains("]"))
|
|
|
|
|
#else
|
2023-04-19 16:39:58 -04:00
|
|
|
else if (first.Contains('[') && first.Contains(']'))
|
2024-02-29 00:14:16 -05:00
|
|
|
#endif
|
2020-12-09 21:52:38 -08:00
|
|
|
return DatFormat.RomCenter;
|
|
|
|
|
|
|
|
|
|
// If we have a listroms DAT
|
|
|
|
|
else if (first.StartsWith("roms required for driver"))
|
|
|
|
|
return DatFormat.Listrom;
|
|
|
|
|
|
|
|
|
|
// If we have a CMP-based DAT
|
|
|
|
|
else if (first.Contains("clrmamepro"))
|
|
|
|
|
return DatFormat.ClrMamePro;
|
|
|
|
|
|
|
|
|
|
else if (first.Contains("romvault"))
|
|
|
|
|
return DatFormat.ClrMamePro;
|
|
|
|
|
|
|
|
|
|
else if (first.Contains("doscenter"))
|
|
|
|
|
return DatFormat.DOSCenter;
|
|
|
|
|
|
2024-02-29 00:14:16 -05:00
|
|
|
#if NETFRAMEWORK
|
|
|
|
|
else if (first.ToLowerInvariant().Contains("#name;title;emulator;cloneof;year;manufacturer;category;players;rotation;control;status;displaycount;displaytype;altromname;alttitle;extra"))
|
|
|
|
|
#else
|
2024-02-28 19:19:50 -05:00
|
|
|
else if (first.Contains("#Name;Title;Emulator;CloneOf;Year;Manufacturer;Category;Players;Rotation;Control;Status;DisplayCount;DisplayType;AltRomname;AltTitle;Extra", StringComparison.InvariantCultureIgnoreCase))
|
2024-02-29 00:14:16 -05:00
|
|
|
#endif
|
2020-12-09 21:52:38 -08:00
|
|
|
return DatFormat.AttractMode;
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
return DatFormat.ClrMamePro;
|
|
|
|
|
}
|
2021-07-18 13:10:48 -07:00
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Find the next non-whitespace, non-comment line from an input
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="sr">StreamReader representing the input</param>
|
|
|
|
|
/// <returns>The next complete line, if possible</returns>
|
|
|
|
|
private static string FindNextLine(StreamReader sr)
|
|
|
|
|
{
|
|
|
|
|
// If we're at the end of the stream, we can't do anything
|
|
|
|
|
if (sr.EndOfStream)
|
|
|
|
|
return string.Empty;
|
|
|
|
|
|
|
|
|
|
// Find the first line that's not whitespace or an XML comment
|
2024-02-28 19:19:50 -05:00
|
|
|
string? line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
|
|
|
|
bool inComment = line?.StartsWith("<!--") ?? false;
|
2024-02-29 00:14:16 -05:00
|
|
|
while ((string.IsNullOrEmpty(line) || inComment) && !sr.EndOfStream)
|
2021-07-18 13:10:48 -07:00
|
|
|
{
|
2024-02-28 19:19:50 -05:00
|
|
|
// Null lines should not happen
|
|
|
|
|
if (line == null)
|
|
|
|
|
{
|
|
|
|
|
line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2021-07-18 13:10:48 -07:00
|
|
|
// Self-contained comment lines
|
|
|
|
|
if (line.StartsWith("<!--") && line.EndsWith("-->"))
|
|
|
|
|
{
|
|
|
|
|
inComment = false;
|
2024-02-28 19:19:50 -05:00
|
|
|
line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
2021-07-18 13:10:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Start of block comments
|
|
|
|
|
else if (line.StartsWith("<!--"))
|
|
|
|
|
{
|
|
|
|
|
inComment = true;
|
2024-02-28 19:19:50 -05:00
|
|
|
line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
2021-07-18 13:10:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// End of block comments
|
|
|
|
|
else if (inComment && line.EndsWith("-->"))
|
|
|
|
|
{
|
2024-02-28 19:19:50 -05:00
|
|
|
line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
|
|
|
|
inComment = line?.StartsWith("<!--") ?? false;
|
2021-07-18 13:10:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Empty lines are just skipped
|
2024-02-29 00:14:16 -05:00
|
|
|
else if (string.IsNullOrEmpty(line))
|
2021-07-18 13:10:48 -07:00
|
|
|
{
|
2024-02-28 19:19:50 -05:00
|
|
|
line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
|
|
|
|
inComment |= line?.StartsWith("<!--") ?? false;
|
2021-07-18 13:10:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// In-comment lines
|
|
|
|
|
else if (inComment)
|
|
|
|
|
{
|
2024-02-28 19:19:50 -05:00
|
|
|
line = sr.ReadLine()?.ToLowerInvariant()?.Trim();
|
2021-07-18 13:10:48 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we ended in a comment, return an empty string
|
|
|
|
|
if (inComment)
|
|
|
|
|
return string.Empty;
|
|
|
|
|
|
2024-02-28 19:19:50 -05:00
|
|
|
return line ?? string.Empty;
|
2021-07-18 13:10:48 -07:00
|
|
|
}
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
|
|
|
|
}
|