Replace Parse with mature second iteration

This code for parsing needed to be changed because of large file handling. XmlTextReader ended up being the best solution that doesn't crash at 500 MB in. A side effect of this change is that the code for processing a given DAT is very minimal now. The entire sub-block for SL-specific DAT info doesn't need to exist anymore. Small-scale tests have given 1:1 results with previous versions but further testing is needed for large files to make sure no data is lost.
This commit is contained in:
Matt Nadareski
2016-04-25 01:59:09 -07:00
parent 994a03826f
commit f8ed865eaa

View File

@@ -215,360 +215,134 @@ namespace SabreTools.Helper
/// <param name="logger">Logger object for console and/or file output</param>
/// <returns>List of RomData objects representing the found data</returns>
public static List<RomData> Parse(string filename, int sysid, int srcid, Logger logger)
{
List<RomData> roms = new List<RomData>();
bool superdat = false;
XmlDocument doc = GetXmlDocument(filename, logger);
// If the returned document is null, return the empty list
if (doc == null)
{
return roms;
}
// Experimental looping using only XML parsing
XmlNode node = doc.FirstChild;
if (node != null && node.Name == "xml")
{
// Skip over everything that's not an element
while (node.NodeType != XmlNodeType.Element)
{
node = node.NextSibling;
}
}
// Once we find the main body, enter it
if (node != null && (node.Name == "datafile" || node.Name == "softwarelist"))
{
node = node.FirstChild;
}
// Skip the header if it exists
if (node != null && node.Name == "header")
{
// Check for SuperDAT mode
if (node.SelectSingleNode("name").InnerText.Contains(" - SuperDAT"))
{
superdat = true;
}
// Skip over anything that's not an element
while (node.NodeType != XmlNodeType.Element)
{
node = node.NextSibling;
}
}
// Loop over the document until the end
while (node != null)
{
if (node.NodeType == XmlNodeType.Element && (node.Name == "machine" || node.Name == "game" || node.Name == "software"))
{
string tempname = "";
if (node.Name == "software")
{
tempname = node.SelectSingleNode("description").InnerText;
}
else
{
// There are rare cases where a malformed XML will not have the required attributes. We can only skip them.
if (node.Attributes.Count == 0)
{
logger.Error("No attributes were found");
node = node.NextSibling;
continue;
}
tempname = node.Attributes["name"].Value;
}
if (superdat)
{
tempname = Regex.Match(tempname, @".*?\\(.*)").Groups[1].Value;
}
// Get the roms from the machine
if (node.HasChildNodes)
{
// If this node has children, traverse the children
foreach (XmlNode child in node.ChildNodes)
{
// If we find a rom or disk, add it
if (child.NodeType == XmlNodeType.Element && (child.Name == "rom" || child.Name == "disk"))
{
// Take care of hex-sized files
long size = -1;
if (child.Attributes["size"] != null && child.Attributes["size"].Value.Contains("0x"))
{
size = Convert.ToInt64(child.Attributes["size"].Value, 16);
}
else if (child.Attributes["size"] != null)
{
Int64.TryParse(child.Attributes["size"].Value, out size);
}
roms.Add(new RomData
{
Game = tempname,
Name = child.Attributes["name"].Value,
Type = child.Name,
SystemID = sysid,
SourceID = srcid,
Size = size,
CRC = (child.Attributes["crc"] != null ? child.Attributes["crc"].Value.ToLowerInvariant().Trim() : ""),
MD5 = (child.Attributes["md5"] != null ? child.Attributes["md5"].Value.ToLowerInvariant().Trim() : ""),
SHA1 = (child.Attributes["sha1"] != null ? child.Attributes["sha1"].Value.ToLowerInvariant().Trim() : ""),
});
}
// If we find the signs of a software list, traverse the children
else if (child.NodeType == XmlNodeType.Element && child.Name == "part" && child.HasChildNodes)
{
foreach (XmlNode part in child.ChildNodes)
{
// If we find a dataarea, traverse the children
if (part.NodeType == XmlNodeType.Element && part.Name == "dataarea")
{
foreach (XmlNode data in part.ChildNodes)
{
// If we find a rom or disk, add it
if (data.NodeType == XmlNodeType.Element && (data.Name == "rom" || data.Name == "disk") && data.Attributes["name"] != null)
{
// Take care of hex-sized files
long size = -1;
if (data.Attributes["size"] != null && data.Attributes["size"].Value.Contains("0x"))
{
size = Convert.ToInt64(data.Attributes["size"].Value, 16);
}
else if (data.Attributes["size"] != null)
{
Int64.TryParse(data.Attributes["size"].Value, out size);
}
roms.Add(new RomData
{
Game = tempname,
Name = data.Attributes["name"].Value,
Type = data.Name,
SystemID = sysid,
SourceID = srcid,
Size = size,
CRC = (data.Attributes["crc"] != null ? data.Attributes["crc"].Value.ToLowerInvariant().Trim() : ""),
MD5 = (data.Attributes["md5"] != null ? data.Attributes["md5"].Value.ToLowerInvariant().Trim() : ""),
SHA1 = (data.Attributes["sha1"] != null ? data.Attributes["sha1"].Value.ToLowerInvariant().Trim() : ""),
});
}
}
}
}
}
}
}
}
node = node.NextSibling;
}
return roms;
}
/// <summary>
/// Parse a DAT and return all found games and roms within
/// </summary>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="sysid">System ID for the DAT</param>
/// <param name="srcid">Source ID for the DAT</param>
/// <param name="logger">Logger object for console and/or file output</param>
/// <returns>List of RomData objects representing the found data</returns>
public static List<RomData> Parse2(string filename, int sysid, int srcid, Logger logger)
{
List<RomData> roms = new List<RomData>();
XmlTextReader xtr = GetXmlTextReader(filename, logger);
xtr.WhitespaceHandling = WhitespaceHandling.None;
bool superdat = false;
bool superdat = false, shouldbreak = false;
string parent = "";
if (xtr != null)
{
xtr.MoveToContent();
while (true)
while (xtr.NodeType != XmlNodeType.None)
{
// If we're at the end element, break
if (xtr.NodeType == XmlNodeType.EndElement && xtr.Name == parent)
{
break;
}
// We only want elements
if (xtr.NodeType != XmlNodeType.Element && xtr.NodeType != XmlNodeType.Text)
if (xtr.NodeType != XmlNodeType.Element)
{
xtr.Read();
continue;
}
Console.WriteLine(xtr.Name + " " + xtr.NodeType);
if (xtr.Name == "datafile" || xtr.Name == "softwarelist")
switch (xtr.Name)
{
parent = xtr.Name;
xtr.Read();
}
else if (xtr.Name == "header")
{
xtr.ReadToDescendant("name");
superdat = (xtr.ReadElementContentAsString() != null ? xtr.ReadElementContentAsString().Contains(" - SuperDAT") : false);
}
else if (xtr.Name == "machine" || xtr.Name == "game" || xtr.Name == "software")
{
string temptype = xtr.Name;
string tempname = "";
if (temptype == "software")
{
xtr.ReadToDescendant("description");
tempname = xtr.ReadElementContentAsString();
}
else
{
// There are rare cases where a malformed XML will not have the required attributes. We can only skip them.
if (xtr.AttributeCount == 0)
case "datafile":
case "softwarelist":
parent = xtr.Name;
xtr.Read();
break;
case "header":
xtr.ReadToDescendant("name");
superdat = (xtr.ReadElementContentAsString() != null ? xtr.ReadElementContentAsString().Contains(" - SuperDAT") : false);
while (xtr.Name != "header")
{
logger.Error("No attributes were found");
xtr.ReadToNextSibling(xtr.Name);
continue;
xtr.Read();
}
tempname = xtr.GetAttribute("name");
}
xtr.Read();
break;
case "machine":
case "game":
case "software":
string temptype = xtr.Name;
string tempname = "";
if (superdat)
{
tempname = Regex.Match(tempname, @".*?\\(.*)").Groups[1].Value;
}
// We want to process the entire subtree of the game
XmlReader subreader = xtr.ReadSubtree();
// Get the roms from the machine
if (xtr.ReadInnerXml() != null && xtr.ReadInnerXml() != "")
{
while (!(xtr.Name == temptype && xtr.NodeType == XmlNodeType.EndElement))
if (subreader != null)
{
// If we find a rom or disk, add it
if (xtr.Name == "rom" || xtr.Name == "disk")
if (temptype == "software" && subreader.ReadToFollowing("description"))
{
// Take care of hex-sized files
long size = -1;
if (xtr.GetAttribute("size") != null && xtr.GetAttribute("size").Contains("0x"))
{
size = Convert.ToInt64(xtr.GetAttribute("size"), 16);
}
else if (xtr.GetAttribute("size") != null)
{
Int64.TryParse(xtr.GetAttribute("size"), out size);
}
roms.Add(new RomData
{
Game = tempname,
Name = xtr.GetAttribute("name"),
Type = xtr.Name,
SystemID = sysid,
SourceID = srcid,
Size = size,
CRC = (xtr.GetAttribute("crc") != null ? xtr.GetAttribute("crc").ToLowerInvariant().Trim() : ""),
MD5 = (xtr.GetAttribute("md5") != null ? xtr.GetAttribute("md5").ToLowerInvariant().Trim() : ""),
SHA1 = (xtr.GetAttribute("sha1") != null ? xtr.GetAttribute("sha1").ToLowerInvariant().Trim() : ""),
});
xtr.Read();
// If we hit end of file, break
if (xtr.EOF)
{
break;
}
}
// If we find the signs of a software list, traverse the children
else if (xtr.NodeType == XmlNodeType.Element && xtr.Name == "part" && xtr.ReadInnerXml() != null && xtr.ReadInnerXml() != "")
{
while (!(xtr.Name == "part" && xtr.NodeType == XmlNodeType.EndElement))
{
// If we find a dataarea, traverse the children
if (xtr.NodeType == XmlNodeType.Element && xtr.Name == "dataarea")
{
while (!(xtr.Name == "dataarea" && xtr.NodeType == XmlNodeType.EndElement))
{
// If we find a rom or disk, add it
if ((xtr.Name == "rom" || xtr.Name == "disk") && xtr.GetAttribute("name") != null)
{
// Take care of hex-sized files
long size = -1;
if (xtr.GetAttribute("size") != null && xtr.GetAttribute("size").Contains("0x"))
{
size = Convert.ToInt64(xtr.GetAttribute("size"), 16);
}
else if (xtr.GetAttribute("size") != null)
{
Int64.TryParse(xtr.GetAttribute("size"), out size);
}
roms.Add(new RomData
{
Game = tempname,
Name = xtr.GetAttribute("name"),
Type = xtr.Name,
SystemID = sysid,
SourceID = srcid,
Size = size,
CRC = (xtr.GetAttribute("crc") != null ? xtr.GetAttribute("crc").ToLowerInvariant().Trim() : ""),
MD5 = (xtr.GetAttribute("md5") != null ? xtr.GetAttribute("md5").ToLowerInvariant().Trim() : ""),
SHA1 = (xtr.GetAttribute("sha1") != null ? xtr.GetAttribute("sha1").ToLowerInvariant().Trim() : ""),
});
}
xtr.Read();
// If we hit end of file, break
if (xtr.EOF)
{
break;
}
}
}
xtr.Read();
// If we hit end of file, break
if (xtr.EOF)
{
break;
}
}
tempname = subreader.ReadElementContentAsString();
}
else
{
xtr.Read();
// There are rare cases where a malformed XML will not have the required attributes. We can only skip them.
if (xtr.AttributeCount == 0)
{
logger.Error("No attributes were found");
xtr.ReadToNextSibling(xtr.Name);
continue;
}
tempname = xtr.GetAttribute("name");
}
if (superdat)
{
tempname = Regex.Match(tempname, @".*?\\(.*)").Groups[1].Value;
}
while (subreader.Read())
{
// We only want elements
if (subreader.NodeType != XmlNodeType.Element)
{
continue;
}
// Get the roms from the machine
switch (subreader.Name)
{
case "rom":
case "disk":
// Take care of hex-sized files
long size = -1;
if (xtr.GetAttribute("size") != null && xtr.GetAttribute("size").Contains("0x"))
{
size = Convert.ToInt64(xtr.GetAttribute("size"), 16);
}
else if (xtr.GetAttribute("size") != null)
{
Int64.TryParse(xtr.GetAttribute("size"), out size);
}
roms.Add(new RomData
{
Game = tempname,
Name = xtr.GetAttribute("name"),
Type = xtr.Name,
SystemID = sysid,
SourceID = srcid,
Size = size,
CRC = (xtr.GetAttribute("crc") != null ? xtr.GetAttribute("crc").ToLowerInvariant().Trim() : ""),
MD5 = (xtr.GetAttribute("md5") != null ? xtr.GetAttribute("md5").ToLowerInvariant().Trim() : ""),
SHA1 = (xtr.GetAttribute("sha1") != null ? xtr.GetAttribute("sha1").ToLowerInvariant().Trim() : ""),
});
break;
}
}
}
}
xtr.ReadToNextSibling(temptype);
}
else
{
xtr.Read();
// Read to next game
if (!xtr.ReadToNextSibling(temptype))
{
shouldbreak = true;
}
break;
default:
xtr.Read();
break;
}
// If we hit end of file, break
if (xtr.EOF)
{
break;
}
// If we're at the end element, break
if (xtr.NodeType == XmlNodeType.EndElement && xtr.Name == parent)
// If we hit an endpoint, break out of the loop early
if (shouldbreak)
{
break;
}
}
}
logger.Log("Outputting roms");
return roms;
}
}
/// <summary>
/// Merge an arbitrary set of ROMs based on the supplied information