Files
SabreTools/SabreTools.DatFiles/Formats/ArchiveDotOrg.Reader.cs
Matt Nadareski b37aed389e Add nullable context to SabreTools.DatItems
This change also starts migrating the internals of the DatItem formats to the new internal models. Right now, it's basically just acting like a wrapper around those models.
2023-08-14 13:17:51 -04:00

188 lines
9.4 KiB
C#

using System;
using System.IO;
using System.Linq;
using SabreTools.Core;
using SabreTools.Core.Tools;
using SabreTools.DatItems;
using SabreTools.DatItems.Formats;
namespace SabreTools.DatFiles.Formats
{
/// <summary>
/// Represents parsing a Archive.org file list
/// </summary>
internal partial class ArchiveDotOrg : DatFile
{
/// <inheritdoc/>
public override void ParseFile(string filename, int indexId, bool keep, bool statsOnly = false, bool throwOnError = false)
{
try
{
// Deserialize the input file
var files = Serialization.ArchiveDotOrg.Deserialize(filename);
// Convert the files data to the internal format
ConvertFiles(files?.File, filename, indexId, statsOnly);
}
catch (Exception ex) when (!throwOnError)
{
string message = $"'{filename}' - An error occurred during parsing";
logger.Error(ex, message);
}
}
#region Converters
/// <summary>
/// Create a machine from the filename
/// </summary>
/// <param name="filename">Filename to derive from</param>
/// <returns>Filled machine and new filename on success, null on error</returns>
private static (Machine?, string?) DeriveMachine(string? filename)
{
// If the filename is missing, we can't do anything
if (string.IsNullOrWhiteSpace(filename))
return (null, null);
string machineName = Path.GetFileNameWithoutExtension(filename);
if (filename.Contains('/'))
{
string[] split = filename.Split('/');
machineName = split[0];
filename = filename[(machineName.Length + 1)..];
}
else if (filename.Contains('\\'))
{
string[] split = filename.Split('\\');
machineName = split[0];
filename = filename[(machineName.Length + 1)..];
}
var machine = new Machine { Name = machineName };
return (machine, filename);
}
/// <summary>
/// Convert files information
/// </summary>
/// <param name="files">Array of deserialized models to convert</param>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="indexId">Index ID for the DAT</param>
/// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
private void ConvertFiles(Models.ArchiveDotOrg.File[]? files, string filename, int indexId, bool statsOnly)
{
// If the files array is missing, we can't do anything
if (files == null || !files.Any())
return;
// Loop through the rows and add
foreach (var file in files)
{
ConvertFile(file, filename, indexId, statsOnly);
}
}
/// <summary>
/// Convert file information
/// </summary>
/// <param name="file">Deserialized model to convert</param>
/// <param name="filename">Name of the file to be parsed</param>
/// <param name="indexId">Index ID for the DAT</param>
/// <param name="statsOnly">True to only add item statistics while parsing, false otherwise</param>
private void ConvertFile(Models.ArchiveDotOrg.File? file, string filename, int indexId, bool statsOnly)
{
// If the file is missing, we can't do anything
if (file == null)
return;
(var machine, string? name) = DeriveMachine(file.Name);
machine ??= new Machine { Name = Path.GetFileNameWithoutExtension(file.Name) };
machine.Publisher = file.Publisher;
machine.Comment = file.Comment;
var rom = new Rom()
{
Name = name,
ArchiveDotOrgSource = file.Source,
//BitTorrentMagnetHash = file.BitTorrentMagnetHash, // TODO: Add to internal model
Date = file.LastModifiedTime?.ToString(),
Size = NumberHelper.ConvertToInt64(file.Size),
MD5 = file.MD5,
CRC = file.CRC32,
SHA1 = file.SHA1,
//FileCount = file.FileCount, // TODO: Add to internal model
ArchiveDotOrgFormat = file.Format,
OriginalFilename = file.Original,
Summation = file.Summation,
//MatrixNumber = file.MatrixNumber, // TODO: Add to internal model
//CollectionCatalogNumber = file.CollectionCatalogNumber, // TODO: Add to internal model
// ASR-Related
//ASRDetectedLang = file.ASRDetectedLang, // TODO: Add to internal model
//ASRDetectedLangConf = file.ASRDetectedLangConf, // TODO: Add to internal model
//ASRTranscribedLang = file.ASRTranscribedLang, // TODO: Add to internal model
//WhisperASRModuleVersion = file.WhisperASRModuleVersion, // TODO: Add to internal model
//WhisperModelHash = file.WhisperModelHash, // TODO: Add to internal model
//WhisperModelName = file.WhisperModelName, // TODO: Add to internal model
//WhisperVersion = file.WhisperVersion, // TODO: Add to internal model
// OCR-Related
//ClothCoverDetectionModuleVersion = file.ClothCoverDetectionModuleVersions, // TODO: Add to internal model
//hOCRCharToWordhOCRVersion = file.hOCRCharToWordhOCRVersion, // TODO: Add to internal model
//hOCRCharToWordModuleVersion = file.hOCRCharToWordModuleVersion, // TODO: Add to internal model
//hOCRFtsTexthOCRVersion = file.hOCRFtsTexthOCRVersion, // TODO: Add to internal model
//hOCRFtsTextModuleVersion = file.hOCRFtsTextModuleVersion, // TODO: Add to internal model
//hOCRPageIndexhOCRVersion = file.hOCRPageIndexhOCRVersion, // TODO: Add to internal model
//hOCRPageIndexModuleVersion = file.hOCRPageIndexModuleVersion, // TODO: Add to internal model
//TesseractOCR = file.TesseractOCR, // TODO: Add to internal model
//TesseractOCRConverted = file.TesseractOCRConverted, // TODO: Add to internal model
//TesseractOCRDetectedLang = file.TesseractOCRDetectedLang, // TODO: Add to internal model
//TesseractOCRDetectedLangConf = file.TesseractOCRDetectedLangConf, // TODO: Add to internal model
//TesseractOCRDetectedScript = file.TesseractOCRDetectedScript, // TODO: Add to internal model
//TesseractOCRDetectedScriptConf = file.TesseractOCRDetectedScriptConf, // TODO: Add to internal model
//TesseractOCRParameters = file.TesseractOCRParameters, // TODO: Add to internal model
//TesseractOCRModuleVersion = file.TesseractOCRModuleVersion, // TODO: Add to internal model
//PDFModuleVersion = file.PDFModuleVersion, // TODO: Add to internal model
//WordConfidenceInterval0To10 = file.WordConfidenceInterval0To10, // TODO: Add to internal model
//WordConfidenceInterval11To20 = file.WordConfidenceInterval11To20, // TODO: Add to internal model
//WordConfidenceInterval21To30 = file.WordConfidenceInterval21To30, // TODO: Add to internal model
//WordConfidenceInterval31To40 = file.WordConfidenceInterval31To40, // TODO: Add to internal model
//WordConfidenceInterval41To50 = file.WordConfidenceInterval41To50, // TODO: Add to internal model
//WordConfidenceInterval51To60 = file.WordConfidenceInterval51To60, // TODO: Add to internal model
//WordConfidenceInterval61To70 = file.WordConfidenceInterval61To70, // TODO: Add to internal model
//WordConfidenceInterval71To80 = file.WordConfidenceInterval71To80, // TODO: Add to internal model
//WordConfidenceInterval81To90 = file.WordConfidenceInterval81To90, // TODO: Add to internal model
//WordConfidenceInterval91To100 = file.WordConfidenceInterval91To100, // TODO: Add to internal model
// Media-Related
//Album = file.Album, // TODO: Add to internal model
//Artist = file.Artist, // TODO: Add to internal model
//Bitrate = file.Bitrate, // TODO: Add to internal model
//Creator = file.Creator, // TODO: Add to internal model
//Height = file.Height, // TODO: Add to internal model
//Length = file.Length, // TODO: Add to internal model
//PreviewImage = file.PreviewImage, // TODO: Add to internal model
//Rotation = file.Rotation, // TODO: Add to internal model
//Title = file.Title, // TODO: Add to internal model
//Track = file.Track, // TODO: Add to internal model
//Width = file.Width, // TODO: Add to internal model
ItemStatus = ItemStatus.None,
Source = new Source
{
Index = indexId,
Name = filename,
},
};
// Now process and add the rom
rom.CopyMachineInformation(machine);
ParseAddHelper(rom, statsOnly);
}
#endregion
}
}