mirror of
https://github.com/claunia/SabreTools.git
synced 2025-12-16 19:14:27 +00:00
Add archive.org deserialization test, fix issues
This commit is contained in:
@@ -49,18 +49,56 @@ namespace SabreTools.Models.ArchiveDotOrg
|
||||
[XmlElement("rotation")]
|
||||
public long? Rotation { get; set; }
|
||||
|
||||
[XmlElement("hocr_char_to_word_module_version")]
|
||||
public string? hOCRCharToWordModuleVersion { get; set; }
|
||||
#region OCR-Related
|
||||
|
||||
[XmlElement("cloth_cover_detection_module_version")]
|
||||
public string? ClothCoverDetectionModuleVersion { get; set; }
|
||||
|
||||
[XmlElement("hocr_char_to_word_hocr_version")]
|
||||
public string? hOCRCharToWordhOCRVersion { get; set; }
|
||||
|
||||
[XmlElement("ocr_module_version")]
|
||||
public string? TesseractOCRModuleVersion { get; set; }
|
||||
[XmlElement("hocr_char_to_word_module_version")]
|
||||
public string? hOCRCharToWordModuleVersion { get; set; }
|
||||
|
||||
[XmlElement("hocr_fts_text_hocr_version")]
|
||||
public string? hOCRFtsTexthOCRVersion { get; set; }
|
||||
|
||||
[XmlElement("hocr_fts_text_module_version")]
|
||||
public string? hOCRFtsTextModuleVersion { get; set; }
|
||||
|
||||
[XmlElement("hocr_pageindex_hocr_version")]
|
||||
public string? hOCRPageIndexhOCRVersion { get; set; }
|
||||
|
||||
[XmlElement("hocr_pageindex_module_version")]
|
||||
public string? hOCRPageIndexModuleVersion { get; set; }
|
||||
|
||||
[XmlElement("ocr")]
|
||||
public string? TesseractOCR { get; set; }
|
||||
|
||||
[XmlElement("ocr_converted")]
|
||||
public string? TesseractOCRConverted { get; set; }
|
||||
|
||||
[XmlElement("ocr_detected_lang")]
|
||||
public string? TesseractOCRDetectedLang { get; set; }
|
||||
|
||||
[XmlElement("ocr_detected_lang_conf")]
|
||||
public string? TesseractOCRDetectedLangConf { get; set; }
|
||||
|
||||
[XmlElement("ocr_detected_script")]
|
||||
public string? TesseractOCRDetectedScript { get; set; }
|
||||
|
||||
[XmlElement("ocr_detected_script_conf")]
|
||||
public string? TesseractOCRDetectedScriptConf { get; set; }
|
||||
|
||||
[XmlElement("ocr_parameters")]
|
||||
public string? TesseractOCRParameters { get; set; }
|
||||
|
||||
[XmlElement("ocr_module_version")]
|
||||
public string? TesseractOCRModuleVersion { get; set; }
|
||||
|
||||
[XmlElement("pdf_module_version")]
|
||||
public string? PDFModuleVersion { get; set; }
|
||||
|
||||
[XmlElement("word_conf_0_10")]
|
||||
public long? WordConfidenceInterval0To10 { get; set; }
|
||||
|
||||
@@ -90,5 +128,19 @@ namespace SabreTools.Models.ArchiveDotOrg
|
||||
|
||||
[XmlElement("word_conf_91_100")]
|
||||
public long? WordConfidenceInterval91To100 { get; set; }
|
||||
|
||||
#endregion
|
||||
|
||||
#region DO NOT USE IN PRODUCTION
|
||||
|
||||
/// <remarks>Should be empty</remarks>
|
||||
[XmlAnyAttribute]
|
||||
public XmlAttribute[]? ADDITIONAL_ATTRIBUTES { get; set; }
|
||||
|
||||
/// <remarks>Should be empty</remarks>
|
||||
[XmlAnyElement]
|
||||
public object[]? ADDITIONAL_ELEMENTS { get; set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,19 @@ namespace SabreTools.Models.ArchiveDotOrg
|
||||
[XmlRoot("files")]
|
||||
public class Files
|
||||
{
|
||||
[XmlElement]
|
||||
[XmlElement("file")]
|
||||
public File[]? File { get; set; }
|
||||
|
||||
#region DO NOT USE IN PRODUCTION
|
||||
|
||||
/// <remarks>Should be empty</remarks>
|
||||
[XmlAnyAttribute]
|
||||
public XmlAttribute[]? ADDITIONAL_ATTRIBUTES { get; set; }
|
||||
|
||||
/// <remarks>Should be empty</remarks>
|
||||
[XmlAnyElement]
|
||||
public object[]? ADDITIONAL_ELEMENTS { get; set; }
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,6 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
using SabreTools.DatFiles;
|
||||
using SabreTools.DatTools;
|
||||
using Xunit;
|
||||
|
||||
namespace SabreTools.Test.DatTools
|
||||
@@ -43,7 +41,7 @@ namespace SabreTools.Test.DatTools
|
||||
if (filename != null)
|
||||
filename = Path.Combine(Environment.CurrentDirectory, "TestData", filename);
|
||||
|
||||
var datFile = Parser.CreateAndParse(filename, throwOnError: true);
|
||||
var datFile = SabreTools.DatTools.Parser.CreateAndParse(filename, throwOnError: true);
|
||||
Assert.Equal(datFormat, datFile.Header.DatFormat);
|
||||
Assert.Equal(totalCount, datFile.Items.TotalCount);
|
||||
}
|
||||
|
||||
37
SabreTools.Test/Parser/SerializationTests.cs
Normal file
37
SabreTools.Test/Parser/SerializationTests.cs
Normal file
@@ -0,0 +1,37 @@
|
||||
using System;
|
||||
using System.Xml.Serialization;
|
||||
using Xunit;
|
||||
|
||||
namespace SabreTools.Test.Parser
|
||||
{
|
||||
public class SerializationTests
|
||||
{
|
||||
[Fact]
|
||||
public void ArchiveDotOrgDeserializeTest()
|
||||
{
|
||||
// Open the file for reading
|
||||
string filename = System.IO.Path.Combine(Environment.CurrentDirectory, "TestData", "test-archivedotorg-files.xml");
|
||||
using var fs = System.IO.File.OpenRead(filename);
|
||||
|
||||
// Setup the serializer
|
||||
var serializer = new XmlSerializer(typeof(Models.ArchiveDotOrg.Files));
|
||||
|
||||
// Deserialize the file
|
||||
var dat = serializer.Deserialize(fs) as Models.ArchiveDotOrg.Files;
|
||||
|
||||
// Validate the values
|
||||
Assert.NotNull(dat);
|
||||
Assert.NotNull(dat.File);
|
||||
Assert.Equal(22, dat.File.Length);
|
||||
|
||||
// Validate we're not missing any attributes or elements
|
||||
Assert.Null(dat.ADDITIONAL_ATTRIBUTES);
|
||||
Assert.Null(dat.ADDITIONAL_ELEMENTS);
|
||||
foreach (var file in dat.File)
|
||||
{
|
||||
Assert.Null(file.ADDITIONAL_ATTRIBUTES);
|
||||
Assert.Null(file.ADDITIONAL_ELEMENTS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@
|
||||
<ProjectReference Include="..\SabreTools.FileTypes\SabreTools.FileTypes.csproj" />
|
||||
<ProjectReference Include="..\SabreTools.Filtering\SabreTools.Filtering.csproj" />
|
||||
<ProjectReference Include="..\SabreTools.IO\SabreTools.IO.csproj" />
|
||||
<ProjectReference Include="..\SabreTools.Models\SabreTools.Models.csproj" />
|
||||
<ProjectReference Include="..\SabreTools.Skippers\SabreTools.Skippers.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
228
SabreTools.Test/TestData/test-archivedotorg-files.xml
Normal file
228
SabreTools.Test/TestData/test-archivedotorg-files.xml
Normal file
@@ -0,0 +1,228 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--
|
||||
Source: https://ia902607.us.archive.org/4/items/adventuresofsher00doylrich/adventuresofsher00doylrich_files.xml
|
||||
-->
|
||||
<files>
|
||||
<file name="__ia_thumb.jpg" source="original">
|
||||
<mtime>1657820174</mtime>
|
||||
<size>23393</size>
|
||||
<md5>33ca910055358d18e116ea66a0abdf04</md5>
|
||||
<crc32>aad0f2dc</crc32>
|
||||
<sha1>fc6d673ef7fbe974bba88b8275c737eeb20b5ba8</sha1>
|
||||
<format>Item Tile</format>
|
||||
<rotation>0</rotation>
|
||||
</file>
|
||||
<file name="_cloth_detection.log" source="derivative">
|
||||
<cloth_cover_detection_module_version>1.2</cloth_cover_detection_module_version>
|
||||
<format>Cloth Cover Detection Log</format>
|
||||
<original>scandata.zip</original>
|
||||
<mtime>1682523106</mtime>
|
||||
<size>717</size>
|
||||
<md5>3ed8a80448263ebfad242b8fa5a1a18d</md5>
|
||||
<crc32>d886dce1</crc32>
|
||||
<sha1>4f8d103cf07c29e1d3bb8273870f1daca4a89f26</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich.djvu" source="derivative">
|
||||
<format>DjVu</format>
|
||||
<original>adventuresofsher00doylrich_djvu.xml</original>
|
||||
<mtime>1288609616</mtime>
|
||||
<size>11176526</size>
|
||||
<md5>52ccad905110bc7f3c0fe032ef744fa1</md5>
|
||||
<crc32>91973508</crc32>
|
||||
<sha1>aa5214400b60e3b68f5eaa9733e10af4bcfe4e8d</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich.gif" source="derivative">
|
||||
<format>Animated GIF</format>
|
||||
<original>adventuresofsher00doylrich_jp2.zip</original>
|
||||
<mtime>1288593173</mtime>
|
||||
<size>334379</size>
|
||||
<md5>4d56403c12771ed7e396f6187d95502d</md5>
|
||||
<crc32>1a8585c1</crc32>
|
||||
<sha1>6062155959dd69f279a489b3761afa0a1638173d</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich.pdf" source="derivative">
|
||||
<pdf_module_version>0.0.22</pdf_module_version>
|
||||
<format>Text PDF</format>
|
||||
<original>adventuresofsher00doylrich_page_numbers.json</original>
|
||||
<mtime>1682525473</mtime>
|
||||
<size>26126569</size>
|
||||
<md5>98ad55b9989671f823228055bfbca465</md5>
|
||||
<crc32>cdc5f40d</crc32>
|
||||
<sha1>e358dd22e04f7efcd4dfc8a6c2d714a0e0a8297c</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_archive.torrent" source="metadata">
|
||||
<btih>4b5bbf39f9e489f66e0c70a9ac45af495b31a2ba</btih>
|
||||
<mtime>1688944719</mtime>
|
||||
<size>17197</size>
|
||||
<md5>91aa4822d1552ef8bd08b27b14d8af4d</md5>
|
||||
<crc32>10a3d397</crc32>
|
||||
<sha1>9f5d4b652ac6903e218c8ff8e854482ab3d31b99</sha1>
|
||||
<format>Archive BitTorrent</format>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_chocr.html.gz" source="derivative">
|
||||
<ocr>tesseract 5.3.0-3-g9920</ocr>
|
||||
<ocr_parameters>lang-eng;two-pass-disabled;pass-dpi-disabled;autonomous-mode-disabled;binarisation-method-otsu</ocr_parameters>
|
||||
<ocr_module_version>0.0.21</ocr_module_version>
|
||||
<ocr_detected_script>Latin</ocr_detected_script>
|
||||
<ocr_detected_script_conf>0.8867</ocr_detected_script_conf>
|
||||
<ocr_detected_lang>en</ocr_detected_lang>
|
||||
<ocr_detected_lang_conf>1.0000</ocr_detected_lang_conf>
|
||||
<format>chOCR</format>
|
||||
<original>adventuresofsher00doylrich_jp2.zip</original>
|
||||
<mtime>1682523054</mtime>
|
||||
<size>8326755</size>
|
||||
<md5>cda085dadcc74ca4bbeb768b69e7b28b</md5>
|
||||
<crc32>faa3bbc1</crc32>
|
||||
<sha1>0ceffd2d58757e30ccfcc84ff6805093e32607b3</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_dc.xml" source="original">
|
||||
<format>Dublin Core</format>
|
||||
<mtime>1682518166</mtime>
|
||||
<size>1712</size>
|
||||
<md5>bb3294746c4d5bb8102f8426454b26fb</md5>
|
||||
<crc32>7c20c9c9</crc32>
|
||||
<sha1>2bc134939b5f044396c56af740ad55029978859e</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_djvu.txt" source="derivative">
|
||||
<format>DjVuTXT</format>
|
||||
<original>adventuresofsher00doylrich_djvu.xml</original>
|
||||
<mtime>1682523254</mtime>
|
||||
<size>613882</size>
|
||||
<md5>022b57b4017bbd8e6564c6e0968ad063</md5>
|
||||
<crc32>2828f839</crc32>
|
||||
<sha1>1535e2f509fc3a81c377d2b7270d21a95af71d65</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_djvu.xml" source="derivative">
|
||||
<format>Djvu XML</format>
|
||||
<original>adventuresofsher00doylrich_hocr.html</original>
|
||||
<mtime>1682523185</mtime>
|
||||
<size>8593952</size>
|
||||
<md5>6faee3cd4ba7b3eaca3a2f5f4e8414f6</md5>
|
||||
<crc32>51713c61</crc32>
|
||||
<sha1>16dc73c2bdce4705c37d86a798d83fb780929928</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_files.xml" source="original">
|
||||
<format>Metadata</format>
|
||||
<md5>f39ff302a7965a1a077f3c4962f5d2e9</md5>
|
||||
<summation>md5</summation>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_hocr.html" source="derivative">
|
||||
<hocr_char_to_word_module_version>1.1.0</hocr_char_to_word_module_version>
|
||||
<hocr_char_to_word_hocr_version>1.1.15</hocr_char_to_word_hocr_version>
|
||||
<ocr_parameters>lang-eng;two-pass-disabled;pass-dpi-disabled;autonomous-mode-disabled;binarisation-method-otsu</ocr_parameters>
|
||||
<ocr_module_version>0.0.21</ocr_module_version>
|
||||
<ocr_detected_script>Latin</ocr_detected_script>
|
||||
<ocr_detected_script_conf>0.8867</ocr_detected_script_conf>
|
||||
<ocr_detected_lang>en</ocr_detected_lang>
|
||||
<ocr_detected_lang_conf>1.0000</ocr_detected_lang_conf>
|
||||
<format>hOCR</format>
|
||||
<original>adventuresofsher00doylrich_chocr.html.gz</original>
|
||||
<mtime>1682523150</mtime>
|
||||
<size>15785810</size>
|
||||
<md5>afbb8a9c1e6c25b3755fa6345153b13d</md5>
|
||||
<crc32>e79c31fd</crc32>
|
||||
<sha1>8b7cd7fd2010544783277f502c4103310dc2e2c4</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_hocr_pageindex.json.gz" source="derivative">
|
||||
<hocr_pageindex_module_version>1.0.0</hocr_pageindex_module_version>
|
||||
<hocr_pageindex_hocr_version>1.1.15</hocr_pageindex_hocr_version>
|
||||
<format>OCR Page Index</format>
|
||||
<original>adventuresofsher00doylrich_hocr.html</original>
|
||||
<mtime>1682523219</mtime>
|
||||
<size>4232</size>
|
||||
<md5>aec425c66ef34149cff679b8998c2a36</md5>
|
||||
<crc32>9a6e2f76</crc32>
|
||||
<sha1>cc91c1eb055652ab6c9de878c6b54e8b95bddad1</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_hocr_searchtext.txt.gz" source="derivative">
|
||||
<hocr_fts_text_module_version>1.1.0</hocr_fts_text_module_version>
|
||||
<hocr_fts_text_hocr_version>1.1.15</hocr_fts_text_hocr_version>
|
||||
<word_conf_0_10>531</word_conf_0_10>
|
||||
<word_conf_11_20>242</word_conf_11_20>
|
||||
<word_conf_21_30>298</word_conf_21_30>
|
||||
<word_conf_31_40>321</word_conf_31_40>
|
||||
<word_conf_41_50>389</word_conf_41_50>
|
||||
<word_conf_51_60>450</word_conf_51_60>
|
||||
<word_conf_61_70>671</word_conf_61_70>
|
||||
<word_conf_71_80>1059</word_conf_71_80>
|
||||
<word_conf_81_90>3102</word_conf_81_90>
|
||||
<word_conf_91_100>103350</word_conf_91_100>
|
||||
<format>OCR Search Text</format>
|
||||
<original>adventuresofsher00doylrich_hocr.html</original>
|
||||
<mtime>1682523242</mtime>
|
||||
<size>224594</size>
|
||||
<md5>2e1011def53909a247f73af66420e0c3</md5>
|
||||
<crc32>32dddf59</crc32>
|
||||
<sha1>f303467ce8f109a1d9cf49f2038c9a1d43e3c63e</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_jp2.zip" source="derivative">
|
||||
<format>Single Page Processed JP2 ZIP</format>
|
||||
<original>adventuresofsher00doylrich_raw_jp2.zip</original>
|
||||
<mtime>1288593129</mtime>
|
||||
<size>203319748</size>
|
||||
<md5>2acbe6b61d35d8bd093127f0cef724c9</md5>
|
||||
<crc32>66bcf599</crc32>
|
||||
<sha1>a8801760eec5442176281584bd6242868f81cde5</sha1>
|
||||
<filecount>364</filecount>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_marc.xml" source="original">
|
||||
<format>MARC</format>
|
||||
<md5>e951175fb1ee03c706ab6ee34754a92c</md5>
|
||||
<mtime>1682518056</mtime>
|
||||
<size>7679</size>
|
||||
<crc32>3538b620</crc32>
|
||||
<sha1>8fd493fa0971c2f7c2fdb1d84e0ec4234b878ff4</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_meta.xml" source="original">
|
||||
<format>Metadata</format>
|
||||
<mtime>1682525557</mtime>
|
||||
<size>3790</size>
|
||||
<md5>e98cb44acdb4e4f070fddb8b1f328d93</md5>
|
||||
<crc32>04ee4346</crc32>
|
||||
<sha1>c687fc00d20b516b0cf19225296e984cbfe7c315</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_metasource.xml" source="original">
|
||||
<format>MARC Source</format>
|
||||
<md5>7099c2013db04ef8a7277386ce74de54</md5>
|
||||
<mtime>1682518056</mtime>
|
||||
<size>527</size>
|
||||
<crc32>6fa5d977</crc32>
|
||||
<sha1>34ed3e366ed809a1a39a265373b9cf5b8cc06f0c</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_page_numbers.json" source="derivative">
|
||||
<format>Page Numbers JSON</format>
|
||||
<original>adventuresofsher00doylrich_djvu.xml</original>
|
||||
<mtime>1682523259</mtime>
|
||||
<size>67337</size>
|
||||
<md5>f9179733924fa4566f0e9a05c8c276dd</md5>
|
||||
<crc32>2bd8369f</crc32>
|
||||
<sha1>e4104152caa990c72f986eb63c660239f0111feb</sha1>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_raw_jp2.zip" source="original">
|
||||
<format>Single Page Raw JP2 ZIP</format>
|
||||
<md5>d2a1137d4f8fed61be945f878444923b</md5>
|
||||
<mtime>1204986411</mtime>
|
||||
<size>370667543</size>
|
||||
<crc32>12eda51c</crc32>
|
||||
<sha1>402b01b5a734ae4f857991c0977ba629a8606932</sha1>
|
||||
<filecount>364</filecount>
|
||||
</file>
|
||||
<file name="adventuresofsher00doylrich_reviews.xml" source="original">
|
||||
<mtime>1688944715</mtime>
|
||||
<size>9412</size>
|
||||
<md5>ff0c9653a7aa865ea01e032e8b657736</md5>
|
||||
<crc32>7634dbbc</crc32>
|
||||
<sha1>f93a8206d6836f08c19dd023eb552bed2106cad9</sha1>
|
||||
<format>Metadata</format>
|
||||
</file>
|
||||
<file name="scandata.zip" source="original">
|
||||
<format>Scribe Scandata ZIP</format>
|
||||
<md5>506fdf9aa054fbc7e9e2577d05d5e7cf</md5>
|
||||
<mtime>1682523105</mtime>
|
||||
<size>55175976</size>
|
||||
<crc32>232ce248</crc32>
|
||||
<sha1>9adf53bc1b8e7be5afe3526d2ab64f9696058488</sha1>
|
||||
<filecount>11</filecount>
|
||||
</file>
|
||||
</files>
|
||||
Reference in New Issue
Block a user