Add preliminary support for Archive.org file lists (fixes #38)

2025-12-16 19:14:27 +00:00 · 2021-07-19 10:39:21 -07:00
parent 08598d1e8b
commit 343ea67bd8
11 changed files with 463 additions and 21 deletions
--- a/SabreTools.DatFiles/DatFile.cs
+++ b/SabreTools.DatFiles/DatFile.cs
@@ -73,6 +73,7 @@ namespace SabreTools.DatFiles
        {
            return datFormat switch
            {
+                DatFormat.ArchiveDotOrg => new ArchiveDotOrg(baseDat),
                DatFormat.AttractMode => new AttractMode(baseDat),
                DatFormat.ClrMamePro => new ClrMamePro(baseDat, quotes),
                DatFormat.CSV => new SeparatedValue(baseDat, ','),
--- a/SabreTools.DatFiles/DatHeader.cs
+++ b/SabreTools.DatFiles/DatHeader.cs
@@ -1029,6 +1029,21 @@ namespace SabreTools.DatFiles
                }
            }

+            // Archive.org
+            if (DatFormat.HasFlag(DatFormat.ArchiveDotOrg))
+            {
+                if (usedExtensions.Contains(".xml"))
+                {
+                    outfileNames.Add(DatFormat.ArchiveDotOrg, CreateOutFileNamesHelper(outDir, ".ado.xml", overwrite));
+                    usedExtensions.Add(".ado.xml");
+                }
+                else
+                {
+                    outfileNames.Add(DatFormat.ArchiveDotOrg, CreateOutFileNamesHelper(outDir, ".xml", overwrite));
+                    usedExtensions.Add(".xml");
+                }
+            }
+
            #endregion

            return outfileNames;
--- a/SabreTools.DatFiles/Enums.cs
+++ b/SabreTools.DatFiles/Enums.cs
@@ -45,6 +45,11 @@ namespace SabreTools.DatFiles
        /// </summary>
        OpenMSX = 1 << 6,

+        /// <summary>
+        /// Archive.org file list XML
+        /// </summary>
+        ArchiveDotOrg = 1 << 7,
+
        #endregion

        #region Propietary Formats
@@ -52,22 +57,22 @@ namespace SabreTools.DatFiles
        /// <summary>
        /// ClrMamePro custom
        /// </summary>
-        ClrMamePro = 1 << 7,
+        ClrMamePro = 1 << 8,

        /// <summary>
        /// RomCenter INI-based
        /// </summary>
-        RomCenter = 1 << 8,
+        RomCenter = 1 << 9,

        /// <summary>
        /// DOSCenter custom
        /// </summary>
-        DOSCenter = 1 << 9,
+        DOSCenter = 1 << 10,

        /// <summary>
        /// AttractMode custom
        /// </summary>
-        AttractMode = 1 << 10,
+        AttractMode = 1 << 11,

        #endregion

@@ -76,37 +81,37 @@ namespace SabreTools.DatFiles
        /// <summary>
        /// ClrMamePro missfile
        /// </summary>
-        MissFile = 1 << 11,
+        MissFile = 1 << 12,

        /// <summary>
        /// Comma-Separated Values (standardized)
        /// </summary>
-        CSV = 1 << 12,
+        CSV = 1 << 13,

        /// <summary>
        /// Semicolon-Separated Values (standardized)
        /// </summary>
-        SSV = 1 << 13,
+        SSV = 1 << 14,

        /// <summary>
        /// Tab-Separated Values (standardized)
        /// </summary>
-        TSV = 1 << 14,
+        TSV = 1 << 15,

        /// <summary>
        /// MAME Listrom output
        /// </summary>
-        Listrom = 1 << 15,
+        Listrom = 1 << 16,

        /// <summary>
        /// Everdrive Packs SMDB
        /// </summary>
-        EverdriveSMDB = 1 << 16,
+        EverdriveSMDB = 1 << 17,

        /// <summary>
        /// SabreJSON
        /// </summary>
-        SabreJSON = 1 << 17,
+        SabreJSON = 1 << 18,

        #endregion

@@ -115,37 +120,37 @@ namespace SabreTools.DatFiles
        /// <summary>
        /// CRC32 hash list
        /// </summary>
-        RedumpSFV = 1 << 18,
+        RedumpSFV = 1 << 19,

        /// <summary>
        /// MD5 hash list
        /// </summary>
-        RedumpMD5 = 1 << 19,
+        RedumpMD5 = 1 << 20,

        /// <summary>
        /// SHA-1 hash list
        /// </summary>
-        RedumpSHA1 = 1 << 20,
+        RedumpSHA1 = 1 << 21,

        /// <summary>
        /// SHA-256 hash list
        /// </summary>
-        RedumpSHA256 = 1 << 21,
+        RedumpSHA256 = 1 << 22,

        /// <summary>
        /// SHA-384 hash list
        /// </summary>
-        RedumpSHA384 = 1 << 22,
+        RedumpSHA384 = 1 << 23,

        /// <summary>
        /// SHA-512 hash list
        /// </summary>
-        RedumpSHA512 = 1 << 23,
+        RedumpSHA512 = 1 << 24,

        /// <summary>
        /// SpamSum hash list
        /// </summary>
-        RedumpSpamSum = 1 << 24,
+        RedumpSpamSum = 1 << 25,

        #endregion

--- a/SabreTools.DatFiles/Formats/ArchiveDotOrg.cs
+++ b/SabreTools.DatFiles/Formats/ArchiveDotOrg.cs
@@ -0,0 +1,396 @@
+using System;
+using System.IO;
+using System.Text;
+using System.Xml;
+using System.Xml.Schema;
+
+using SabreTools.Core;
+using SabreTools.Core.Tools;
+using SabreTools.DatItems;
+using SabreTools.DatItems.Formats;
+using SabreTools.IO;
+
+namespace SabreTools.DatFiles.Formats
+{
+    /// <summary>
+    /// Represents parsing and writing of a Archive.org file list
+    /// </summary>
+    internal class ArchiveDotOrg : DatFile
+    {
+        /// <summary>
+        /// Constructor designed for casting a base DatFile
+        /// </summary>
+        /// <param name="datFile">Parent DatFile to copy from</param>
+        public ArchiveDotOrg(DatFile datFile)
+            : base(datFile)
+        {
+        }
+
+        /// <inheritdoc/>
+        public override void ParseFile(string filename, int indexId, bool keep, bool statsOnly = false, bool throwOnError = false)
+        {
+            // Prepare all internal variables
+            XmlReader xtr = XmlReader.Create(filename, new XmlReaderSettings
+            {
+                CheckCharacters = false,
+                DtdProcessing = DtdProcessing.Ignore,
+                IgnoreComments = true,
+                IgnoreWhitespace = true,
+                ValidationFlags = XmlSchemaValidationFlags.None,
+                ValidationType = ValidationType.None,
+            });
+
+            // If we got a null reader, just return
+            if (xtr == null)
+                return;
+
+            // Otherwise, read the file to the end
+            try
+            {
+                xtr.MoveToContent();
+                while (!xtr.EOF)
+                {
+                    // We only want elements
+                    if (xtr.NodeType != XmlNodeType.Element)
+                    {
+                        xtr.Read();
+                        continue;
+                    }
+
+                    switch (xtr.Name)
+                    {
+                        case "files":
+                            ReadFiles(xtr.ReadSubtree(), statsOnly, filename, indexId, keep);
+
+                            // Skip the machine now that we've processed it
+                            xtr.Skip();
+                            break;
+
+                        default:
+                            xtr.Read();
+                            break;
+                    }
+                }
+            }
+            catch (Exception ex) when (!throwOnError)
+            {
+                logger.Warning(ex, $"Exception found while parsing '{filename}'");
+
+                // For XML errors, just skip the affected node
+                xtr?.Read();
+            }
+
+            xtr.Dispose();
+        }
+
+        /// <summary>
+        /// Read files information
+        /// </summary>
+        /// <param name="reader">XmlReader to use to parse the machine</param>
+        /// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
+        /// <param name="filename">Name of the file to be parsed</param>
+        /// <param name="indexId">Index ID for the DAT</param>
+        /// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
+        private void ReadFiles(
+            XmlReader reader,
+            bool statsOnly,
+
+            // Standard Dat parsing
+            string filename,
+            int indexId,
+
+            // Miscellaneous
+            bool keep)
+        {
+            // If we have an empty machine, skip it
+            if (reader == null)
+                return;
+
+            // Otherwise, add what is possible
+            reader.MoveToContent();
+
+            while (!reader.EOF)
+            {
+                // We only want elements
+                if (reader.NodeType != XmlNodeType.Element)
+                {
+                    reader.Read();
+                    continue;
+                }
+
+                // Get the files from the list
+                switch (reader.Name)
+                {
+                    case "file":
+                        ReadFile(reader.ReadSubtree(), statsOnly, filename, indexId, keep);
+
+                        // Skip the file node now that we've processed it
+                        reader.Skip();
+                        break;
+
+                    default:
+                        reader.Read();
+                        break;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Read file information
+        /// </summary>
+        /// <param name="reader">XmlReader to use to parse the machine</param>
+        /// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
+        /// <param name="filename">Name of the file to be parsed</param>
+        /// <param name="indexId">Index ID for the DAT</param>
+        /// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
+        private void ReadFile(
+            XmlReader reader,
+            bool statsOnly,
+
+            // Standard Dat parsing
+            string filename,
+            int indexId,
+
+            // Miscellaneous
+            bool keep)
+        {
+            // If we have an empty machine, skip it
+            if (reader == null)
+                return;
+
+            // Otherwise, add what is possible
+            reader.MoveToContent();
+
+            // Create the Rom to store the info
+            Rom rom = new Rom
+            {
+                Name = reader.GetAttribute("name"),
+                Value = reader.GetAttribute("source"), // TODO: Create new field for this
+
+                // TODO: Derive from path, if possible
+                Machine = new Machine
+                {
+                    Name = "Default",
+                    Description = "Default",
+                },
+
+                Source = new Source
+                {
+                    Index = indexId,
+                    Name = filename,
+                }
+            };
+
+            // TODO: Handle SuperDAT
+            //if (Header.Type == "SuperDAT" && !keep)
+            //{
+            //    string tempout = Regex.Match(machine.Name, @".*?\\(.*)").Groups[1].Value;
+            //    if (!string.IsNullOrWhiteSpace(tempout))
+            //        machine.Name = tempout;
+            //}
+
+            while (!reader.EOF)
+            {
+                // We only want elements
+                if (reader.NodeType != XmlNodeType.Element)
+                {
+                    reader.Read();
+                    continue;
+                }
+
+                // Get the roms from the machine
+                switch (reader.Name)
+                {
+                    case "crc32":
+                        rom.CRC = reader.ReadElementContentAsString();
+                        break;
+
+                    case "md5":
+                        rom.MD5 = reader.ReadElementContentAsString();
+                        break;
+
+                    case "mtime":
+                        rom.Date = reader.ReadElementContentAsString();
+                        break;
+
+                    case "sha1":
+                        rom.SHA1 = reader.ReadElementContentAsString();
+                        break;
+
+                    case "size":
+                        rom.Size = Utilities.CleanLong(reader.ReadElementContentAsString());
+                        break;
+
+                    // TODO: Create new field for this
+                    case "format":
+                        string format = reader.ReadElementContentAsString();
+                        break;
+
+                    // TODO: Create new field for this
+                    case "original":
+                        string original = reader.ReadElementContentAsString();
+                        break;
+
+                    // TODO: Create new field for this, Int32?
+                    case "rotation":
+                        string rotation = reader.ReadElementContentAsString();
+                        break;
+
+                    // TODO: Create new field for this
+                    case "summation":
+                        string summation = reader.ReadElementContentAsString();
+                        break;
+
+                    default:
+                        reader.Read();
+                        break;
+                }
+            }
+
+            // Now process and add the rom
+            ParseAddHelper(rom, statsOnly);
+        }
+
+        /// <inheritdoc/>
+        protected override ItemType[] GetSupportedTypes()
+        {
+            return new ItemType[]
+            {
+                ItemType.Rom,
+            };
+        }
+
+        /// <inheritdoc/>
+        public override bool WriteToFile(string outfile, bool ignoreblanks = false, bool throwOnError = false)
+        {
+            try
+            {
+                logger.User($"Writing to '{outfile}'...");
+                FileStream fs = File.Create(outfile);
+
+                // If we get back null for some reason, just log and return
+                if (fs == null)
+                {
+                    logger.Warning($"File '{outfile}' could not be created for writing! Please check to see if the file is writable");
+                    return false;
+                }
+
+                XmlTextWriter xtw = new XmlTextWriter(fs, new UTF8Encoding(false))
+                {
+                    Formatting = Formatting.Indented,
+                    IndentChar = '\t',
+                    Indentation = 1
+                };
+
+                // Write out the header
+                WriteHeader(xtw);
+
+                // Write out each of the machines and roms
+                string lastgame = null;
+
+                // Use a sorted list of games to output
+                foreach (string key in Items.SortedKeys)
+                {
+                    ConcurrentList<DatItem> datItems = Items.FilteredItems(key);
+
+                    // If this machine doesn't contain any writable items, skip
+                    if (!ContainsWritable(datItems))
+                        continue;
+
+                    // Resolve the names in the block
+                    datItems = DatItem.ResolveNames(datItems);
+
+                    for (int index = 0; index < datItems.Count; index++)
+                    {
+                        DatItem datItem = datItems[index];
+
+                        // Check for a "null" item
+                        datItem = ProcessNullifiedItem(datItem);
+
+                        // Write out the item if we're not ignoring
+                        if (!ShouldIgnore(datItem, ignoreblanks))
+                            WriteDatItem(xtw, datItem);
+
+                        // Set the new data to compare against
+                        lastgame = datItem.Machine.Name;
+                    }
+                }
+
+                // Write the file footer out
+                WriteFooter(xtw);
+
+                logger.User($"'{outfile}' written!{Environment.NewLine}");
+                xtw.Dispose();
+                fs.Dispose();
+            }
+            catch (Exception ex) when (!throwOnError)
+            {
+                logger.Error(ex);
+                return false;
+            }
+
+            return true;
+        }
+
+        /// <summary>
+        /// Write out DAT header using the supplied StreamWriter
+        /// </summary>
+        /// <param name="xtw">XmlTextWriter to output to</param>
+        private void WriteHeader(XmlTextWriter xtw)
+        {
+            xtw.WriteStartDocument();
+
+            xtw.WriteStartElement("files");
+
+            xtw.Flush();
+        }
+
+        /// <summary>
+        /// Write out DatItem using the supplied StreamWriter
+        /// </summary>
+        /// <param name="xtw">XmlTextWriter to output to</param>
+        /// <param name="datItem">DatItem object to be output</param>
+        private void WriteDatItem(XmlTextWriter xtw, DatItem datItem)
+        {
+            // Pre-process the item name
+            ProcessItemName(datItem, true);
+
+            // Build the state
+            switch (datItem.ItemType)
+            {
+                case ItemType.Rom:
+                    var rom = datItem as Rom;
+                    xtw.WriteStartElement("file");
+                    xtw.WriteOptionalAttributeString("source", rom.Value);
+
+                    xtw.WriteOptionalElementString("mtime", rom.Date);
+                    xtw.WriteOptionalElementString("size", rom.Size?.ToString());
+                    xtw.WriteOptionalElementString("md5", rom.MD5?.ToLowerInvariant());
+                    xtw.WriteOptionalElementString("crc32", rom.CRC?.ToLowerInvariant());
+                    xtw.WriteOptionalElementString("sha1", rom.SHA1?.ToLowerInvariant());
+                    //xtw.WriteOptionalElementString("format", rom.Format);
+                    //xtw.WriteOptionalElementString("original", rom.Original);
+                    //xtw.WriteOptionalElementString("rotation", rom.Rotation?.ToString());
+                    //xtw.WriteOptionalElementString("summation", rom.Summation);
+
+                    // End file
+                    xtw.WriteEndElement();
+                    break;
+            }
+
+            xtw.Flush();
+        }
+
+        /// <summary>
+        /// Write out DAT footer using the supplied StreamWriter
+        /// </summary>
+        /// <param name="xtw">XmlTextWriter to output to</param>
+        private void WriteFooter(XmlTextWriter xtw)
+        {
+            // End files
+            xtw.WriteEndElement();
+
+            xtw.Flush();
+        }
+    }
+}