mirror of
https://github.com/claunia/SabreTools.git
synced 2025-12-16 19:14:27 +00:00
Replace Parse with mature second iteration
This code for parsing needed to be changed because of large file handling. XmlTextReader ended up being the best solution that doesn't crash at 500 MB in. A side effect of this change is that the code for processing a given DAT is very minimal now. The entire sub-block for SL-specific DAT info doesn't need to exist anymore. Small-scale tests have given 1:1 results with previous versions but further testing is needed for large files to make sure no data is lost.
This commit is contained in:
@@ -215,360 +215,134 @@ namespace SabreTools.Helper
|
||||
/// <param name="logger">Logger object for console and/or file output</param>
|
||||
/// <returns>List of RomData objects representing the found data</returns>
|
||||
public static List<RomData> Parse(string filename, int sysid, int srcid, Logger logger)
|
||||
{
|
||||
List<RomData> roms = new List<RomData>();
|
||||
|
||||
bool superdat = false;
|
||||
XmlDocument doc = GetXmlDocument(filename, logger);
|
||||
|
||||
// If the returned document is null, return the empty list
|
||||
if (doc == null)
|
||||
{
|
||||
return roms;
|
||||
}
|
||||
|
||||
// Experimental looping using only XML parsing
|
||||
XmlNode node = doc.FirstChild;
|
||||
if (node != null && node.Name == "xml")
|
||||
{
|
||||
// Skip over everything that's not an element
|
||||
while (node.NodeType != XmlNodeType.Element)
|
||||
{
|
||||
node = node.NextSibling;
|
||||
}
|
||||
}
|
||||
|
||||
// Once we find the main body, enter it
|
||||
if (node != null && (node.Name == "datafile" || node.Name == "softwarelist"))
|
||||
{
|
||||
node = node.FirstChild;
|
||||
}
|
||||
|
||||
// Skip the header if it exists
|
||||
if (node != null && node.Name == "header")
|
||||
{
|
||||
// Check for SuperDAT mode
|
||||
if (node.SelectSingleNode("name").InnerText.Contains(" - SuperDAT"))
|
||||
{
|
||||
superdat = true;
|
||||
}
|
||||
|
||||
// Skip over anything that's not an element
|
||||
while (node.NodeType != XmlNodeType.Element)
|
||||
{
|
||||
node = node.NextSibling;
|
||||
}
|
||||
}
|
||||
|
||||
// Loop over the document until the end
|
||||
while (node != null)
|
||||
{
|
||||
if (node.NodeType == XmlNodeType.Element && (node.Name == "machine" || node.Name == "game" || node.Name == "software"))
|
||||
{
|
||||
string tempname = "";
|
||||
if (node.Name == "software")
|
||||
{
|
||||
tempname = node.SelectSingleNode("description").InnerText;
|
||||
}
|
||||
else
|
||||
{
|
||||
// There are rare cases where a malformed XML will not have the required attributes. We can only skip them.
|
||||
if (node.Attributes.Count == 0)
|
||||
{
|
||||
logger.Error("No attributes were found");
|
||||
node = node.NextSibling;
|
||||
continue;
|
||||
}
|
||||
tempname = node.Attributes["name"].Value;
|
||||
}
|
||||
|
||||
if (superdat)
|
||||
{
|
||||
tempname = Regex.Match(tempname, @".*?\\(.*)").Groups[1].Value;
|
||||
}
|
||||
|
||||
// Get the roms from the machine
|
||||
if (node.HasChildNodes)
|
||||
{
|
||||
// If this node has children, traverse the children
|
||||
foreach (XmlNode child in node.ChildNodes)
|
||||
{
|
||||
// If we find a rom or disk, add it
|
||||
if (child.NodeType == XmlNodeType.Element && (child.Name == "rom" || child.Name == "disk"))
|
||||
{
|
||||
// Take care of hex-sized files
|
||||
long size = -1;
|
||||
if (child.Attributes["size"] != null && child.Attributes["size"].Value.Contains("0x"))
|
||||
{
|
||||
size = Convert.ToInt64(child.Attributes["size"].Value, 16);
|
||||
}
|
||||
else if (child.Attributes["size"] != null)
|
||||
{
|
||||
Int64.TryParse(child.Attributes["size"].Value, out size);
|
||||
}
|
||||
|
||||
roms.Add(new RomData
|
||||
{
|
||||
Game = tempname,
|
||||
Name = child.Attributes["name"].Value,
|
||||
Type = child.Name,
|
||||
SystemID = sysid,
|
||||
SourceID = srcid,
|
||||
Size = size,
|
||||
CRC = (child.Attributes["crc"] != null ? child.Attributes["crc"].Value.ToLowerInvariant().Trim() : ""),
|
||||
MD5 = (child.Attributes["md5"] != null ? child.Attributes["md5"].Value.ToLowerInvariant().Trim() : ""),
|
||||
SHA1 = (child.Attributes["sha1"] != null ? child.Attributes["sha1"].Value.ToLowerInvariant().Trim() : ""),
|
||||
});
|
||||
}
|
||||
// If we find the signs of a software list, traverse the children
|
||||
else if (child.NodeType == XmlNodeType.Element && child.Name == "part" && child.HasChildNodes)
|
||||
{
|
||||
foreach (XmlNode part in child.ChildNodes)
|
||||
{
|
||||
// If we find a dataarea, traverse the children
|
||||
if (part.NodeType == XmlNodeType.Element && part.Name == "dataarea")
|
||||
{
|
||||
foreach (XmlNode data in part.ChildNodes)
|
||||
{
|
||||
// If we find a rom or disk, add it
|
||||
if (data.NodeType == XmlNodeType.Element && (data.Name == "rom" || data.Name == "disk") && data.Attributes["name"] != null)
|
||||
{
|
||||
// Take care of hex-sized files
|
||||
long size = -1;
|
||||
if (data.Attributes["size"] != null && data.Attributes["size"].Value.Contains("0x"))
|
||||
{
|
||||
size = Convert.ToInt64(data.Attributes["size"].Value, 16);
|
||||
}
|
||||
else if (data.Attributes["size"] != null)
|
||||
{
|
||||
Int64.TryParse(data.Attributes["size"].Value, out size);
|
||||
}
|
||||
|
||||
roms.Add(new RomData
|
||||
{
|
||||
Game = tempname,
|
||||
Name = data.Attributes["name"].Value,
|
||||
Type = data.Name,
|
||||
SystemID = sysid,
|
||||
SourceID = srcid,
|
||||
Size = size,
|
||||
CRC = (data.Attributes["crc"] != null ? data.Attributes["crc"].Value.ToLowerInvariant().Trim() : ""),
|
||||
MD5 = (data.Attributes["md5"] != null ? data.Attributes["md5"].Value.ToLowerInvariant().Trim() : ""),
|
||||
SHA1 = (data.Attributes["sha1"] != null ? data.Attributes["sha1"].Value.ToLowerInvariant().Trim() : ""),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
node = node.NextSibling;
|
||||
}
|
||||
|
||||
return roms;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parse a DAT and return all found games and roms within
|
||||
/// </summary>
|
||||
/// <param name="filename">Name of the file to be parsed</param>
|
||||
/// <param name="sysid">System ID for the DAT</param>
|
||||
/// <param name="srcid">Source ID for the DAT</param>
|
||||
/// <param name="logger">Logger object for console and/or file output</param>
|
||||
/// <returns>List of RomData objects representing the found data</returns>
|
||||
public static List<RomData> Parse2(string filename, int sysid, int srcid, Logger logger)
|
||||
{
|
||||
List<RomData> roms = new List<RomData>();
|
||||
XmlTextReader xtr = GetXmlTextReader(filename, logger);
|
||||
xtr.WhitespaceHandling = WhitespaceHandling.None;
|
||||
bool superdat = false;
|
||||
bool superdat = false, shouldbreak = false;
|
||||
string parent = "";
|
||||
if (xtr != null)
|
||||
{
|
||||
xtr.MoveToContent();
|
||||
while (true)
|
||||
while (xtr.NodeType != XmlNodeType.None)
|
||||
{
|
||||
// If we're at the end element, break
|
||||
if (xtr.NodeType == XmlNodeType.EndElement && xtr.Name == parent)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// We only want elements
|
||||
if (xtr.NodeType != XmlNodeType.Element && xtr.NodeType != XmlNodeType.Text)
|
||||
if (xtr.NodeType != XmlNodeType.Element)
|
||||
{
|
||||
xtr.Read();
|
||||
continue;
|
||||
}
|
||||
|
||||
Console.WriteLine(xtr.Name + " " + xtr.NodeType);
|
||||
|
||||
if (xtr.Name == "datafile" || xtr.Name == "softwarelist")
|
||||
switch (xtr.Name)
|
||||
{
|
||||
parent = xtr.Name;
|
||||
xtr.Read();
|
||||
}
|
||||
else if (xtr.Name == "header")
|
||||
{
|
||||
xtr.ReadToDescendant("name");
|
||||
superdat = (xtr.ReadElementContentAsString() != null ? xtr.ReadElementContentAsString().Contains(" - SuperDAT") : false);
|
||||
}
|
||||
else if (xtr.Name == "machine" || xtr.Name == "game" || xtr.Name == "software")
|
||||
{
|
||||
string temptype = xtr.Name;
|
||||
string tempname = "";
|
||||
if (temptype == "software")
|
||||
{
|
||||
xtr.ReadToDescendant("description");
|
||||
tempname = xtr.ReadElementContentAsString();
|
||||
}
|
||||
else
|
||||
{
|
||||
// There are rare cases where a malformed XML will not have the required attributes. We can only skip them.
|
||||
if (xtr.AttributeCount == 0)
|
||||
case "datafile":
|
||||
case "softwarelist":
|
||||
parent = xtr.Name;
|
||||
xtr.Read();
|
||||
break;
|
||||
case "header":
|
||||
xtr.ReadToDescendant("name");
|
||||
superdat = (xtr.ReadElementContentAsString() != null ? xtr.ReadElementContentAsString().Contains(" - SuperDAT") : false);
|
||||
while (xtr.Name != "header")
|
||||
{
|
||||
logger.Error("No attributes were found");
|
||||
xtr.ReadToNextSibling(xtr.Name);
|
||||
continue;
|
||||
xtr.Read();
|
||||
}
|
||||
tempname = xtr.GetAttribute("name");
|
||||
}
|
||||
xtr.Read();
|
||||
break;
|
||||
case "machine":
|
||||
case "game":
|
||||
case "software":
|
||||
string temptype = xtr.Name;
|
||||
string tempname = "";
|
||||
|
||||
if (superdat)
|
||||
{
|
||||
tempname = Regex.Match(tempname, @".*?\\(.*)").Groups[1].Value;
|
||||
}
|
||||
// We want to process the entire subtree of the game
|
||||
XmlReader subreader = xtr.ReadSubtree();
|
||||
|
||||
// Get the roms from the machine
|
||||
if (xtr.ReadInnerXml() != null && xtr.ReadInnerXml() != "")
|
||||
{
|
||||
while (!(xtr.Name == temptype && xtr.NodeType == XmlNodeType.EndElement))
|
||||
if (subreader != null)
|
||||
{
|
||||
// If we find a rom or disk, add it
|
||||
if (xtr.Name == "rom" || xtr.Name == "disk")
|
||||
if (temptype == "software" && subreader.ReadToFollowing("description"))
|
||||
{
|
||||
// Take care of hex-sized files
|
||||
long size = -1;
|
||||
if (xtr.GetAttribute("size") != null && xtr.GetAttribute("size").Contains("0x"))
|
||||
{
|
||||
size = Convert.ToInt64(xtr.GetAttribute("size"), 16);
|
||||
}
|
||||
else if (xtr.GetAttribute("size") != null)
|
||||
{
|
||||
Int64.TryParse(xtr.GetAttribute("size"), out size);
|
||||
}
|
||||
|
||||
roms.Add(new RomData
|
||||
{
|
||||
Game = tempname,
|
||||
Name = xtr.GetAttribute("name"),
|
||||
Type = xtr.Name,
|
||||
SystemID = sysid,
|
||||
SourceID = srcid,
|
||||
Size = size,
|
||||
CRC = (xtr.GetAttribute("crc") != null ? xtr.GetAttribute("crc").ToLowerInvariant().Trim() : ""),
|
||||
MD5 = (xtr.GetAttribute("md5") != null ? xtr.GetAttribute("md5").ToLowerInvariant().Trim() : ""),
|
||||
SHA1 = (xtr.GetAttribute("sha1") != null ? xtr.GetAttribute("sha1").ToLowerInvariant().Trim() : ""),
|
||||
});
|
||||
|
||||
xtr.Read();
|
||||
|
||||
// If we hit end of file, break
|
||||
if (xtr.EOF)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we find the signs of a software list, traverse the children
|
||||
else if (xtr.NodeType == XmlNodeType.Element && xtr.Name == "part" && xtr.ReadInnerXml() != null && xtr.ReadInnerXml() != "")
|
||||
{
|
||||
while (!(xtr.Name == "part" && xtr.NodeType == XmlNodeType.EndElement))
|
||||
{
|
||||
// If we find a dataarea, traverse the children
|
||||
if (xtr.NodeType == XmlNodeType.Element && xtr.Name == "dataarea")
|
||||
{
|
||||
while (!(xtr.Name == "dataarea" && xtr.NodeType == XmlNodeType.EndElement))
|
||||
{
|
||||
// If we find a rom or disk, add it
|
||||
if ((xtr.Name == "rom" || xtr.Name == "disk") && xtr.GetAttribute("name") != null)
|
||||
{
|
||||
// Take care of hex-sized files
|
||||
long size = -1;
|
||||
if (xtr.GetAttribute("size") != null && xtr.GetAttribute("size").Contains("0x"))
|
||||
{
|
||||
size = Convert.ToInt64(xtr.GetAttribute("size"), 16);
|
||||
}
|
||||
else if (xtr.GetAttribute("size") != null)
|
||||
{
|
||||
Int64.TryParse(xtr.GetAttribute("size"), out size);
|
||||
}
|
||||
|
||||
roms.Add(new RomData
|
||||
{
|
||||
Game = tempname,
|
||||
Name = xtr.GetAttribute("name"),
|
||||
Type = xtr.Name,
|
||||
SystemID = sysid,
|
||||
SourceID = srcid,
|
||||
Size = size,
|
||||
CRC = (xtr.GetAttribute("crc") != null ? xtr.GetAttribute("crc").ToLowerInvariant().Trim() : ""),
|
||||
MD5 = (xtr.GetAttribute("md5") != null ? xtr.GetAttribute("md5").ToLowerInvariant().Trim() : ""),
|
||||
SHA1 = (xtr.GetAttribute("sha1") != null ? xtr.GetAttribute("sha1").ToLowerInvariant().Trim() : ""),
|
||||
});
|
||||
}
|
||||
|
||||
xtr.Read();
|
||||
|
||||
// If we hit end of file, break
|
||||
if (xtr.EOF)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
xtr.Read();
|
||||
|
||||
// If we hit end of file, break
|
||||
if (xtr.EOF)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
tempname = subreader.ReadElementContentAsString();
|
||||
}
|
||||
else
|
||||
{
|
||||
xtr.Read();
|
||||
// There are rare cases where a malformed XML will not have the required attributes. We can only skip them.
|
||||
if (xtr.AttributeCount == 0)
|
||||
{
|
||||
logger.Error("No attributes were found");
|
||||
xtr.ReadToNextSibling(xtr.Name);
|
||||
continue;
|
||||
}
|
||||
tempname = xtr.GetAttribute("name");
|
||||
}
|
||||
|
||||
if (superdat)
|
||||
{
|
||||
tempname = Regex.Match(tempname, @".*?\\(.*)").Groups[1].Value;
|
||||
}
|
||||
|
||||
while (subreader.Read())
|
||||
{
|
||||
// We only want elements
|
||||
if (subreader.NodeType != XmlNodeType.Element)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get the roms from the machine
|
||||
switch (subreader.Name)
|
||||
{
|
||||
case "rom":
|
||||
case "disk":
|
||||
// Take care of hex-sized files
|
||||
long size = -1;
|
||||
if (xtr.GetAttribute("size") != null && xtr.GetAttribute("size").Contains("0x"))
|
||||
{
|
||||
size = Convert.ToInt64(xtr.GetAttribute("size"), 16);
|
||||
}
|
||||
else if (xtr.GetAttribute("size") != null)
|
||||
{
|
||||
Int64.TryParse(xtr.GetAttribute("size"), out size);
|
||||
}
|
||||
|
||||
roms.Add(new RomData
|
||||
{
|
||||
Game = tempname,
|
||||
Name = xtr.GetAttribute("name"),
|
||||
Type = xtr.Name,
|
||||
SystemID = sysid,
|
||||
SourceID = srcid,
|
||||
Size = size,
|
||||
CRC = (xtr.GetAttribute("crc") != null ? xtr.GetAttribute("crc").ToLowerInvariant().Trim() : ""),
|
||||
MD5 = (xtr.GetAttribute("md5") != null ? xtr.GetAttribute("md5").ToLowerInvariant().Trim() : ""),
|
||||
SHA1 = (xtr.GetAttribute("sha1") != null ? xtr.GetAttribute("sha1").ToLowerInvariant().Trim() : ""),
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
xtr.ReadToNextSibling(temptype);
|
||||
}
|
||||
else
|
||||
{
|
||||
xtr.Read();
|
||||
|
||||
// Read to next game
|
||||
if (!xtr.ReadToNextSibling(temptype))
|
||||
{
|
||||
shouldbreak = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
xtr.Read();
|
||||
break;
|
||||
}
|
||||
|
||||
// If we hit end of file, break
|
||||
if (xtr.EOF)
|
||||
{
|
||||
break;
|
||||
}
|
||||
// If we're at the end element, break
|
||||
if (xtr.NodeType == XmlNodeType.EndElement && xtr.Name == parent)
|
||||
// If we hit an endpoint, break out of the loop early
|
||||
if (shouldbreak)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.Log("Outputting roms");
|
||||
|
||||
return roms;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merge an arbitrary set of ROMs based on the supplied information
|
||||
|
||||
Reference in New Issue
Block a user