using System; using System.IO; using System.Linq; using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.DatItems; using SabreTools.DatItems.Formats; namespace SabreTools.DatFiles.Formats { /// /// Represents parsing a Archive.org file list /// internal partial class ArchiveDotOrg : DatFile { /// public override void ParseFile(string filename, int indexId, bool keep, bool statsOnly = false, bool throwOnError = false) { try { // Deserialize the input file var files = Serialization.ArchiveDotOrg.Deserialize(filename); // Convert the files data to the internal format ConvertFiles(files?.File, filename, indexId, statsOnly); } catch (Exception ex) when (!throwOnError) { string message = $"'{filename}' - An error occurred during parsing"; logger.Error(ex, message); } } #region Converters /// /// Create a machine from the filename /// /// Filename to derive from /// Filled machine and new filename on success, null on error private static (Machine?, string?) DeriveMachine(string filename) { // If the filename is missing, we can't do anything if (string.IsNullOrWhiteSpace(filename)) return (null, null); string machineName = Path.GetFileNameWithoutExtension(filename); if (filename.Contains('/')) { string[] split = filename.Split('/'); machineName = split[0]; filename = filename[(machineName.Length + 1)..]; } else if (filename.Contains('\\')) { string[] split = filename.Split('\\'); machineName = split[0]; filename = filename[(machineName.Length + 1)..]; } var machine = new Machine { Name = machineName }; return (machine, filename); } /// /// Convert files information /// /// Array of deserialized models to convert /// Name of the file to be parsed /// Index ID for the DAT /// True to only add item statistics while parsing, false otherwise private void ConvertFiles(Models.ArchiveDotOrg.File[]? files, string filename, int indexId, bool statsOnly) { // If the files array is missing, we can't do anything if (files == null || !files.Any()) return; // Loop through the rows and add foreach (var file in files) { ConvertFile(file, filename, indexId, statsOnly); } } /// /// Convert file information /// /// Deserialized model to convert /// Name of the file to be parsed /// Index ID for the DAT /// True to only add item statistics while parsing, false otherwise private void ConvertFile(Models.ArchiveDotOrg.File? file, string filename, int indexId, bool statsOnly) { // If the file is missing, we can't do anything if (file == null) return; (var machine, string name) = DeriveMachine(file.Name); if (machine == null) machine = new Machine { Name = Path.GetFileNameWithoutExtension(file.Name) }; machine.Publisher = file.Publisher; machine.Comment = file.Comment; var rom = new Rom() { Name = name, ArchiveDotOrgSource = file.Source, //BitTorrentMagnetHash = file.BitTorrentMagnetHash, // TODO: Add to internal model Date = file.LastModifiedTime?.ToString(), Size = Utilities.CleanLong(file.Size), MD5 = file.MD5, CRC = file.CRC32, SHA1 = file.SHA1, //FileCount = file.FileCount, // TODO: Add to internal model ArchiveDotOrgFormat = file.Format, //Original = file.Original, // TODO: Add to internal model Summation = file.Summation, //MatrixNumber = file.MatrixNumber, // TODO: Add to internal model //CollectionCatalogNumber = file.CollectionCatalogNumber, // TODO: Add to internal model // ASR-Related //ASRDetectedLang = file.ASRDetectedLang, // TODO: Add to internal model //ASRDetectedLangConf = file.ASRDetectedLangConf, // TODO: Add to internal model //ASRTranscribedLang = file.ASRTranscribedLang, // TODO: Add to internal model //WhisperASRModuleVersion = file.WhisperASRModuleVersion, // TODO: Add to internal model //WhisperModelHash = file.WhisperModelHash, // TODO: Add to internal model //WhisperModelName = file.WhisperModelName, // TODO: Add to internal model //WhisperVersion = file.WhisperVersion, // TODO: Add to internal model // OCR-Related //ClothCoverDetectionModuleVersion = file.ClothCoverDetectionModuleVersions, // TODO: Add to internal model //hOCRCharToWordhOCRVersion = file.hOCRCharToWordhOCRVersion, // TODO: Add to internal model //hOCRCharToWordModuleVersion = file.hOCRCharToWordModuleVersion, // TODO: Add to internal model //hOCRFtsTexthOCRVersion = file.hOCRFtsTexthOCRVersion, // TODO: Add to internal model //hOCRFtsTextModuleVersion = file.hOCRFtsTextModuleVersion, // TODO: Add to internal model //hOCRPageIndexhOCRVersion = file.hOCRPageIndexhOCRVersion, // TODO: Add to internal model //hOCRPageIndexModuleVersion = file.hOCRPageIndexModuleVersion, // TODO: Add to internal model //TesseractOCR = file.TesseractOCR, // TODO: Add to internal model //TesseractOCRConverted = file.TesseractOCRConverted, // TODO: Add to internal model //TesseractOCRDetectedLang = file.TesseractOCRDetectedLang, // TODO: Add to internal model //TesseractOCRDetectedLangConf = file.TesseractOCRDetectedLangConf, // TODO: Add to internal model //TesseractOCRDetectedScript = file.TesseractOCRDetectedScript, // TODO: Add to internal model //TesseractOCRDetectedScriptConf = file.TesseractOCRDetectedScriptConf, // TODO: Add to internal model //TesseractOCRParameters = file.TesseractOCRParameters, // TODO: Add to internal model //TesseractOCRModuleVersion = file.TesseractOCRModuleVersion, // TODO: Add to internal model //PDFModuleVersion = file.PDFModuleVersion, // TODO: Add to internal model //WordConfidenceInterval0To10 = file.WordConfidenceInterval0To10, // TODO: Add to internal model //WordConfidenceInterval11To20 = file.WordConfidenceInterval11To20, // TODO: Add to internal model //WordConfidenceInterval21To30 = file.WordConfidenceInterval21To30, // TODO: Add to internal model //WordConfidenceInterval31To40 = file.WordConfidenceInterval31To40, // TODO: Add to internal model //WordConfidenceInterval41To50 = file.WordConfidenceInterval41To50, // TODO: Add to internal model //WordConfidenceInterval51To60 = file.WordConfidenceInterval51To60, // TODO: Add to internal model //WordConfidenceInterval61To70 = file.WordConfidenceInterval61To70, // TODO: Add to internal model //WordConfidenceInterval71To80 = file.WordConfidenceInterval71To80, // TODO: Add to internal model //WordConfidenceInterval81To90 = file.WordConfidenceInterval81To90, // TODO: Add to internal model //WordConfidenceInterval91To100 = file.WordConfidenceInterval91To100, // TODO: Add to internal model // Media-Related //Album = file.Album, // TODO: Add to internal model //Artist = file.Artist, // TODO: Add to internal model //Bitrate = file.Bitrate, // TODO: Add to internal model //Creator = file.Creator, // TODO: Add to internal model //Height = file.Height, // TODO: Add to internal model //Length = file.Length, // TODO: Add to internal model //PreviewImage = file.PreviewImage, // TODO: Add to internal model //Rotation = file.Rotation, // TODO: Add to internal model //Title = file.Title, // TODO: Add to internal model //Track = file.Track, // TODO: Add to internal model //Width = file.Width, // TODO: Add to internal model ItemStatus = ItemStatus.None, Source = new Source { Index = indexId, Name = filename, }, }; // Now process and add the rom rom.CopyMachineInformation(machine); ParseAddHelper(rom, statsOnly); } #endregion } }