using System; using System.IO; using System.Linq; using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.DatItems; using SabreTools.DatItems.Formats; namespace SabreTools.DatFiles.Formats { /// /// Represents parsing a Archive.org file list /// internal partial class ArchiveDotOrg : DatFile { /// public override void ParseFile(string filename, int indexId, bool keep, bool statsOnly = false, bool throwOnError = false) { try { // Deserialize the input file var files = new Serialization.Files.ArchiveDotOrg().Deserialize(filename); var metadata = new Serialization.CrossModel.ArchiveDotOrg().Serialize(files); // Convert the files data to the internal format ConvertFiles(files?.File, filename, indexId, statsOnly); } catch (Exception ex) when (!throwOnError) { string message = $"'{filename}' - An error occurred during parsing"; logger.Error(ex, message); } } #region Converters /// /// Create a machine from the filename /// /// Filename to derive from /// Filled machine and new filename on success, null on error private static (Machine?, string?) DeriveMachine(string? filename) { // If the filename is missing, we can't do anything if (string.IsNullOrEmpty(filename)) return (null, null); string machineName = Path.GetFileNameWithoutExtension(filename); if (filename.Contains('/')) { string[] split = filename!.Split('/'); machineName = split[0]; filename = filename.Substring(machineName.Length + 1); } else if (filename.Contains('\\')) { string[] split = filename!.Split('\\'); machineName = split[0]; filename = filename.Substring(machineName.Length + 1); } var machine = new Machine { Name = machineName }; return (machine, filename); } /// /// Convert files information /// /// Array of deserialized models to convert /// Name of the file to be parsed /// Index ID for the DAT /// True to only add item statistics while parsing, false otherwise private void ConvertFiles(Models.ArchiveDotOrg.File?[]? files, string filename, int indexId, bool statsOnly) { // If the files array is missing, we can't do anything if (files == null || !files.Any()) return; // Loop through the rows and add foreach (var file in files) { ConvertFile(file, filename, indexId, statsOnly); } } /// /// Convert file information /// /// Deserialized model to convert /// Name of the file to be parsed /// Index ID for the DAT /// True to only add item statistics while parsing, false otherwise private void ConvertFile(Models.ArchiveDotOrg.File? file, string filename, int indexId, bool statsOnly) { // If the file is missing, we can't do anything if (file == null) return; (var machine, string? name) = DeriveMachine(file.Name); machine ??= new Machine { Name = Path.GetFileNameWithoutExtension(file.Name) }; machine.Publisher = file.Publisher; machine.Comment = file.Comment; var rom = new Rom() { Source = new Source { Index = indexId, Name = filename }, }; rom.SetName(name); rom.SetFieldValue(Models.Metadata.Rom.AlbumKey, file.Album); rom.SetFieldValue(Models.Metadata.Rom.ArtistKey, file.Artist); rom.SetFieldValue(Models.Metadata.Rom.ASRDetectedLangKey, file.ASRDetectedLang); rom.SetFieldValue(Models.Metadata.Rom.ASRDetectedLangConfKey, file.ASRDetectedLangConf); rom.SetFieldValue(Models.Metadata.Rom.ASRTranscribedLangKey, file.ASRTranscribedLang); rom.SetFieldValue(Models.Metadata.Rom.BitrateKey, file.Bitrate); rom.SetFieldValue(Models.Metadata.Rom.BitTorrentMagnetHashKey, file.BitTorrentMagnetHash); rom.SetFieldValue(Models.Metadata.Rom.ClothCoverDetectionModuleVersionKey, file.ClothCoverDetectionModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.CollectionCatalogNumberKey, file.CollectionCatalogNumber); rom.SetFieldValue(Models.Metadata.Rom.CRCKey, file.CRC32); rom.SetFieldValue(Models.Metadata.Rom.CreatorKey, file.Creator); rom.SetFieldValue(Models.Metadata.Rom.DateKey, file.LastModifiedTime?.ToString()); rom.SetFieldValue(Models.Metadata.Rom.FileCountKey, file.FileCount); rom.SetFieldValue(Models.Metadata.Rom.FormatKey, file.Format); rom.SetFieldValue(Models.Metadata.Rom.HeightKey, file.Height); rom.SetFieldValue(Models.Metadata.Rom.hOCRCharToWordhOCRVersionKey, file.hOCRCharToWordhOCRVersion); rom.SetFieldValue(Models.Metadata.Rom.hOCRCharToWordModuleVersionKey, file.hOCRCharToWordModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.hOCRFtsTexthOCRVersionKey, file.hOCRFtsTexthOCRVersion); rom.SetFieldValue(Models.Metadata.Rom.hOCRFtsTextModuleVersionKey, file.hOCRFtsTextModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.hOCRPageIndexhOCRVersionKey, file.hOCRPageIndexhOCRVersion); rom.SetFieldValue(Models.Metadata.Rom.hOCRPageIndexModuleVersionKey, file.hOCRPageIndexModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.LengthKey, file.Length); rom.SetFieldValue(Models.Metadata.Rom.MatrixNumberKey, file.MatrixNumber); rom.SetFieldValue(Models.Metadata.Rom.MD5Key, file.MD5); rom.SetFieldValue(Models.Metadata.Rom.OriginalKey, file.Original); rom.SetFieldValue(Models.Metadata.Rom.PDFModuleVersionKey, file.PDFModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.PreviewImageKey, file.PreviewImage); rom.SetFieldValue(Models.Metadata.Rom.RotationKey, file.Rotation); rom.SetFieldValue(Models.Metadata.Rom.SizeKey, NumberHelper.ConvertToInt64(file.Size)); rom.SetFieldValue(Models.Metadata.Rom.SHA1Key, file.SHA1); rom.SetFieldValue(Models.Metadata.Rom.SourceKey, file.Source); rom.SetFieldValue(Models.Metadata.Rom.StatusKey, ItemStatus.None); rom.SetFieldValue(Models.Metadata.Rom.SummationKey, file.Summation); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRKey, file.TesseractOCR); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRConvertedKey, file.TesseractOCRConverted); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRDetectedLangKey, file.TesseractOCRDetectedLang); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRDetectedLangConfKey, file.TesseractOCRDetectedLangConf); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRDetectedScriptKey, file.TesseractOCRDetectedScript); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRDetectedScriptConfKey, file.TesseractOCRDetectedScriptConf); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRModuleVersionKey, file.TesseractOCRModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.TesseractOCRParametersKey, file.TesseractOCRParameters); rom.SetFieldValue(Models.Metadata.Rom.TitleKey, file.Title); rom.SetFieldValue(Models.Metadata.Rom.TrackKey, file.Track); rom.SetFieldValue(Models.Metadata.Rom.WhisperASRModuleVersionKey, file.WhisperASRModuleVersion); rom.SetFieldValue(Models.Metadata.Rom.WhisperModelHashKey, file.WhisperModelHash); rom.SetFieldValue(Models.Metadata.Rom.WhisperModelNameKey, file.WhisperModelName); rom.SetFieldValue(Models.Metadata.Rom.WhisperVersionKey, file.WhisperVersion); rom.SetFieldValue(Models.Metadata.Rom.WidthKey, file.Width); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval0To10Key, file.WordConfidenceInterval0To10); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval11To20Key, file.WordConfidenceInterval11To20); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval21To30Key, file.WordConfidenceInterval21To30); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval31To40Key, file.WordConfidenceInterval31To40); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval41To50Key, file.WordConfidenceInterval41To50); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval51To60Key, file.WordConfidenceInterval51To60); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval61To70Key, file.WordConfidenceInterval61To70); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval71To80Key, file.WordConfidenceInterval71To80); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval81To90Key, file.WordConfidenceInterval81To90); rom.SetFieldValue(Models.Metadata.Rom.WordConfidenceInterval91To100Key, file.WordConfidenceInterval91To100); // Now process and add the rom rom.CopyMachineInformation(machine); ParseAddHelper(rom, statsOnly); } #endregion } }