mirror of
https://github.com/SabreTools/SabreTools.Compression.git
synced 2026-02-13 05:35:33 +00:00
922 lines
36 KiB
C#
922 lines
36 KiB
C#
using System;
|
|
using System.Linq;
|
|
using System.Runtime.InteropServices;
|
|
using static SabreTools.Compression.libmspack.CHM.Constants;
|
|
using static SabreTools.Compression.libmspack.macros;
|
|
using static SabreTools.Compression.libmspack.system;
|
|
|
|
namespace SabreTools.Compression.libmspack.CHM
|
|
{
|
|
/// <summary>
|
|
/// A decompressor for .CHM (Microsoft HTMLHelp) files
|
|
///
|
|
/// All fields are READ ONLY.
|
|
/// </summary>
|
|
/// <see cref="mspack.DestroyCHMDecomperssor(Decompressor)"/>
|
|
public unsafe class Decompressor : BaseDecompressor
|
|
{
|
|
public mschmd_decompress_state d { get; private set; }
|
|
|
|
// Filenames of the system files used for decompression.
|
|
// Content and ControlData are essential.
|
|
// ResetTable is preferred, but SpanInfo can be used if not available
|
|
private const string content_name = "::DataSpace/Storage/MSCompressed/Content";
|
|
private const string control_name = "::DataSpace/Storage/MSCompressed/ControlData";
|
|
private const string spaninfo_name = "::DataSpace/Storage/MSCompressed/SpanInfo";
|
|
private const string rtable_name = "::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable";
|
|
|
|
// The GUIDs found in CHM header
|
|
private static readonly byte[] guids = new byte[32]
|
|
{
|
|
// {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC}
|
|
0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
|
|
0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC,
|
|
|
|
// {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC}
|
|
0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
|
|
0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC
|
|
};
|
|
|
|
/// <summary>
|
|
/// Creates a new CHM decompressor
|
|
/// </summary>
|
|
public Decompressor()
|
|
{
|
|
this.system = new mspack_default_system();
|
|
error = MSPACK_ERR.MSPACK_ERR_OK;
|
|
d = null;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Destroys an existing CHM decompressor
|
|
/// </summary>
|
|
~Decompressor()
|
|
{
|
|
mspack_system sys = this.system;
|
|
if (this.d != null)
|
|
{
|
|
if (this.d.infh != null) sys.close(this.d.infh);
|
|
if (this.d.state != null) lzxd_free(this.d.state);
|
|
//sys.free(this.d);
|
|
}
|
|
//sys.free(this);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Opens a CHM helpfile and reads its contents.
|
|
///
|
|
/// If the file opened is a valid CHM helpfile, all headers will be read
|
|
/// and a mschmd_header structure will be returned, with a full list of
|
|
/// files.
|
|
///
|
|
/// In the case of an error occuring, null is returned and the error code
|
|
/// is available from last_error().
|
|
///
|
|
/// The filename pointer should be considered "in use" until close() is
|
|
/// called on the CHM helpfile.
|
|
/// </summary>
|
|
/// <param name="filename">
|
|
/// The filename of the CHM helpfile. This is passed
|
|
/// directly to mspack_system::open().
|
|
/// </param>
|
|
/// <returns>A pointer to a mschmd_header structure, or null on failure</returns>
|
|
/// <see cref="close(mschmd_header)"/>
|
|
public mschmd_header open(in string filename)
|
|
{
|
|
return chmd_real_open(filename, 1);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Closes a previously opened CHM helpfile.
|
|
///
|
|
/// This closes a CHM helpfile, frees the mschmd_header and all
|
|
/// mschmd_file structures associated with it (if any). This works on
|
|
/// both helpfiles opened with open() and helpfiles opened with
|
|
/// fast_open().
|
|
///
|
|
/// The CHM header pointer is now invalid and cannot be used again. All
|
|
/// mschmd_file pointers referencing that CHM are also now invalid, and
|
|
/// cannot be used again.
|
|
/// </summary>
|
|
/// <param name="chm">The CHM helpfile to close</param>
|
|
/// <see cref="open(in string)"/>
|
|
/// <see cref="fast_open(in string)"/>
|
|
public void close(mschmd_header chm)
|
|
{
|
|
mschmd_file fi, nfi;
|
|
mspack_system sys;
|
|
uint i;
|
|
|
|
sys = this.system;
|
|
|
|
this.error = MSPACK_ERR.MSPACK_ERR_OK;
|
|
|
|
// Free files
|
|
for (fi = chm.files; fi != null; fi = nfi)
|
|
{
|
|
nfi = fi.next;
|
|
//sys.free(fi);
|
|
}
|
|
for (fi = chm.sysfiles; fi != null; fi = nfi)
|
|
{
|
|
nfi = fi.next;
|
|
//sys.free(fi);
|
|
}
|
|
|
|
// If this CHM was being decompressed, free decompression state
|
|
if (this.d != null && (this.d.chm == chm))
|
|
{
|
|
if (this.d.infh != null) sys.close(this.d.infh);
|
|
if (this.d.state != null) lzxd_free(this.d.state);
|
|
//sys.free(this.d);
|
|
this.d = null;
|
|
}
|
|
|
|
// If this CHM had a chunk cache, free it and contents
|
|
if (chm.chunk_cache != null)
|
|
{
|
|
for (i = 0; i < chm.num_chunks; i++) sys.free(chm.chunk_cache[i]);
|
|
sys.free(chm.chunk_cache);
|
|
}
|
|
|
|
//sys.free(chm);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reads the basic CHM file headers. If the "entire" parameter is
|
|
/// non-zero, all file entries will also be read. fills out a pre-existing
|
|
/// mschmd_header structure, allocates memory for files as necessary
|
|
/// </summary>
|
|
private MSPACK_ERR chmd_read_headers(mspack_system sys, mspack_file fh, mschmd_header chm, int entire)
|
|
{
|
|
uint errors, num_chunks;
|
|
FixedArray<byte> buf = new FixedArray<byte>(0x54);
|
|
FixedArray<byte> chunk = null;
|
|
byte* name, p, end;
|
|
mschmd_file fi, link = null;
|
|
long offset_hs0, filelen;
|
|
int num_entries;
|
|
MSPACK_ERR err = MSPACK_ERR.MSPACK_ERR_OK;
|
|
|
|
// Initialise pointers
|
|
chm.files = null;
|
|
chm.sysfiles = null;
|
|
chm.chunk_cache = null;
|
|
chm.sec0.chm = chm;
|
|
chm.sec0.id = 0;
|
|
chm.sec1.chm = chm;
|
|
chm.sec1.id = 1;
|
|
chm.sec1.content = null;
|
|
chm.sec1.control = null;
|
|
chm.sec1.spaninfo = null;
|
|
chm.sec1.rtable = null;
|
|
|
|
// Read the first header
|
|
if (sys.read(fh, buf, chmhead_SIZEOF) != chmhead_SIZEOF)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_READ;
|
|
}
|
|
|
|
// Check ITSF signature
|
|
if (EndGetI32(buf, chmhead_Signature) != 0x46535449)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_SIGNATURE;
|
|
}
|
|
|
|
// Check both header GUIDs
|
|
if (!buf.ToArray().Skip(chmhead_GUID1).Take(guids.Length).SequenceEqual(guids))
|
|
{
|
|
Console.Error.WriteLine("Incorrect GUIDs");
|
|
return MSPACK_ERR.MSPACK_ERR_SIGNATURE;
|
|
}
|
|
|
|
chm.version = EndGetI32(buf, chmhead_Version);
|
|
chm.timestamp = EndGetM32(buf, chmhead_Timestamp);
|
|
chm.language = EndGetI32(buf, chmhead_LanguageID);
|
|
if (chm.version > 3)
|
|
{
|
|
sys.message(fh, "WARNING; CHM version > 3");
|
|
}
|
|
|
|
// Read the header section table
|
|
if (sys.read(fh, buf, chmhst3_SIZEOF) != chmhst3_SIZEOF)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_READ;
|
|
}
|
|
|
|
// chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files.
|
|
// The offset will be corrected later, once HS1 is read.
|
|
if (read_off64(&offset_hs0, &buf[chmhst_OffsetHS0], sys, fh) ||
|
|
read_off64(&chm.dir_offset, &buf[chmhst_OffsetHS1], sys, fh) ||
|
|
read_off64(&chm.sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh))
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
// Seek to header section 0
|
|
if (sys.seek(fh, offset_hs0, MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_START) != 0)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_SEEK;
|
|
}
|
|
|
|
// Read header section 0
|
|
if (sys.read(fh, buf, chmhs0_SIZEOF) != chmhs0_SIZEOF)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_READ;
|
|
}
|
|
|
|
if (read_off64(&chm.length, &buf[chmhs0_FileLen], sys, fh))
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
// Compare declared CHM file size against actual size
|
|
if (mspack_sys_filelen(sys, fh, &filelen) == 0)
|
|
{
|
|
if (chm.length > filelen)
|
|
{
|
|
sys.message(fh, $"WARNING; file possibly truncated by {chm.length - filelen} bytes");
|
|
}
|
|
else if (chm.length < filelen)
|
|
{
|
|
sys.message(fh, $"WARNING; possible {filelen - chm.length} extra bytes at end of file");
|
|
}
|
|
}
|
|
|
|
// Seek to header section 1
|
|
if (sys.seek(fh, chm.dir_offset, MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_START) != 0)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_SEEK;
|
|
}
|
|
|
|
// Read header section 1
|
|
if (sys.read(fh, buf, chmhs1_SIZEOF) != chmhs1_SIZEOF)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_READ;
|
|
}
|
|
|
|
chm.dir_offset = sys.tell(fh);
|
|
chm.chunk_size = EndGetI32(buf, chmhs1_ChunkSize);
|
|
chm.density = EndGetI32(buf, chmhs1_Density);
|
|
chm.depth = EndGetI32(buf, chmhs1_Depth);
|
|
chm.index_root = EndGetI32(buf, chmhs1_IndexRoot);
|
|
chm.num_chunks = EndGetI32(buf, chmhs1_NumChunks);
|
|
chm.first_pmgl = EndGetI32(buf, chmhs1_FirstPMGL);
|
|
chm.last_pmgl = EndGetI32(buf, chmhs1_LastPMGL);
|
|
|
|
if (chm.version < 3)
|
|
{
|
|
// Versions before 3 don't have chmhst3_OffsetCS0
|
|
chm.sec0.offset = chm.dir_offset + (chm.chunk_size * chm.num_chunks);
|
|
}
|
|
|
|
// Check if content offset or file size is wrong
|
|
if (chm.sec0.offset > chm.length)
|
|
{
|
|
Console.Error.WriteLine("content section begins after file has ended");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
// Ensure there are chunks and that chunk size is
|
|
// large enough for signature and num_entries
|
|
if (chm.chunk_size < (pmgl_Entries + 2))
|
|
{
|
|
Console.Error.WriteLine("chunk size not large enough");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
if (chm.num_chunks == 0)
|
|
{
|
|
Console.Error.WriteLine("no chunks");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
// The chunk_cache data structure is not great; large values for num_chunks
|
|
// or num_chunks*chunk_size can exhaust all memory. Until a better chunk
|
|
// cache is implemented, put arbitrary limits on num_chunks and chunk size.
|
|
if (chm.num_chunks > 100000)
|
|
{
|
|
Console.Error.WriteLine("more than 100,000 chunks");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
if (chm.chunk_size > 8192)
|
|
{
|
|
Console.Error.WriteLine("chunk size over 8192 (get in touch if this is valid)");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
if ((long)chm.chunk_size * (long)chm.num_chunks > chm.length)
|
|
{
|
|
Console.Error.WriteLine("chunks larger than entire file");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
// Common sense checks on header section 1 fields
|
|
if (chm.chunk_size != 4096)
|
|
{
|
|
sys.message(fh, "WARNING; chunk size is not 4096");
|
|
}
|
|
if (chm.first_pmgl != 0)
|
|
{
|
|
sys.message(fh, "WARNING; first PMGL chunk is not zero");
|
|
}
|
|
if (chm.first_pmgl > chm.last_pmgl)
|
|
{
|
|
Console.Error.WriteLine("first pmgl chunk is after last pmgl chunk");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
if (chm.index_root != 0xFFFFFFFF && chm.index_root >= chm.num_chunks)
|
|
{
|
|
Console.Error.WriteLine("index_root outside valid range");
|
|
return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
// If we are doing a quick read, stop here!
|
|
if (entire == 0)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_OK;
|
|
}
|
|
|
|
// Seek to the first PMGL chunk, and reduce the number of chunks to read
|
|
if (chm.first_pmgl != 0)
|
|
{
|
|
long pmgl_offset = (long)chm.first_pmgl * (long)chm.chunk_size;
|
|
if (sys.seek(fh, pmgl_offset, MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_CUR) != 0)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_SEEK;
|
|
}
|
|
}
|
|
num_chunks = chm.last_pmgl - chm.first_pmgl + 1;
|
|
|
|
chunk = new FixedArray<byte>((int)chm.chunk_size);
|
|
|
|
// Read and process all chunks from FirstPMGL to LastPMGL
|
|
errors = 0;
|
|
while (num_chunks-- > 0)
|
|
{
|
|
// Read next chunk
|
|
if (sys.read(fh, chunk, (int)chm.chunk_size) != (int)chm.chunk_size)
|
|
{
|
|
sys.free(chunk);
|
|
return MSPACK_ERR.MSPACK_ERR_READ;
|
|
}
|
|
|
|
// Process only directory (PMGL) chunks
|
|
if (EndGetI32(chunk, pmgl_Signature) != 0x4C474D50) continue;
|
|
|
|
if (EndGetI32(chunk, pmgl_QuickRefSize) < 2)
|
|
{
|
|
sys.message(fh, "WARNING; PMGL quickref area is too small");
|
|
}
|
|
if (EndGetI32(chunk, pmgl_QuickRefSize) >
|
|
(chm.chunk_size - pmgl_Entries))
|
|
{
|
|
sys.message(fh, "WARNING; PMGL quickref area is too large");
|
|
}
|
|
|
|
p = (byte*)chunk.Pointer + pmgl_Entries;
|
|
end = (byte*)chunk.Pointer + chm.chunk_size - 2;
|
|
num_entries = EndGetI16(chunk, (int)(chm.chunk_size - 2));
|
|
|
|
while (num_entries-- > 0)
|
|
{
|
|
uint name_len, section;
|
|
long offset, length;
|
|
name_len = read_encint(&p, end, &err);
|
|
if (err != MSPACK_ERR.MSPACK_ERR_OK || (name_len > (uint)(end - p))) goto encint_err;
|
|
name = p; p += name_len;
|
|
section = read_encint(&p, end, &err);
|
|
offset = read_encint(&p, end, &err);
|
|
length = read_encint(&p, end, &err);
|
|
if (err != MSPACK_ERR.MSPACK_ERR_OK) goto encint_err;
|
|
|
|
// Ignore blank or one-char (e.g. "/") filenames we'd return as blank */
|
|
if (name_len < 2 || name[0] == 0x00 || name[1] == 0x00) continue;
|
|
|
|
// Empty files and directory names are stored as a file entry at
|
|
// offset 0 with length 0. We want to keep empty files, but not
|
|
// directory names, which end with a "/"
|
|
if ((offset == 0) && (length == 0))
|
|
{
|
|
if ((name_len > 0) && (name[name_len - 1] == '/')) continue;
|
|
}
|
|
|
|
if (section > 1)
|
|
{
|
|
sys.message(fh, $"Invalid section number '{section}'.");
|
|
continue;
|
|
}
|
|
|
|
fi = new mschmd_file();
|
|
fi.next = null;
|
|
fi.section = (section == 0 ? (mschmd_section)chm.sec0 : (mschmd_section)chm.sec1);
|
|
fi.offset = offset;
|
|
fi.length = length;
|
|
|
|
char[] filenameArr = new char[name_len];
|
|
Marshal.Copy((IntPtr)name, filenameArr, 0, (int)name_len);
|
|
filenameArr[(int)name_len] = '\0';
|
|
fi.filename = new string(filenameArr);
|
|
|
|
if (name[0] == ':' && name[1] == ':')
|
|
{
|
|
// System file
|
|
if (name_len == 40 && fi.filename.StartsWith(content_name))
|
|
{
|
|
chm.sec1.content = fi;
|
|
}
|
|
else if (name_len == 44 && fi.filename.StartsWith(control_name))
|
|
{
|
|
chm.sec1.control = fi;
|
|
}
|
|
else if (name_len == 41 && fi.filename.StartsWith(spaninfo_name))
|
|
{
|
|
chm.sec1.spaninfo = fi;
|
|
}
|
|
else if (name_len == 105 && fi.filename.StartsWith(rtable_name))
|
|
{
|
|
chm.sec1.rtable = fi;
|
|
}
|
|
fi.next = chm.sysfiles;
|
|
chm.sysfiles = fi;
|
|
}
|
|
else
|
|
{
|
|
// Normal file
|
|
if (link != null) link.next = fi; else chm.files = fi;
|
|
link = fi;
|
|
}
|
|
}
|
|
|
|
// This is reached either when num_entries runs out, or if
|
|
// an ENCINT is badly encoded
|
|
encint_err:
|
|
if (num_entries >= 0)
|
|
{
|
|
Console.Error.WriteLine("bad encint before all entries could be read");
|
|
errors++;
|
|
}
|
|
}
|
|
|
|
sys.free(chunk);
|
|
return (errors > 0) ? MSPACK_ERR.MSPACK_ERR_DATAFORMAT : MSPACK_ERR.MSPACK_ERR_OK;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Extracts a file from a CHM helpfile.
|
|
///
|
|
/// This extracts a file from a CHM helpfile and writes it to the given
|
|
/// filename. The filename of the file, mscabd_file::filename, is not
|
|
/// used by extract(), but can be used by the caller as a guide for
|
|
/// constructing an appropriate filename.
|
|
///
|
|
/// This method works both with files found in the mschmd_header::files
|
|
/// and mschmd_header::sysfiles list and mschmd_file structures generated
|
|
/// on the fly by fast_find().
|
|
/// </summary>
|
|
/// <param name="file">The file to be decompressed</param>
|
|
/// <param name="filename">The filename of the file being written to</param>
|
|
/// <returns>An error code, or MSPACK_ERR_OK if successful</returns>
|
|
public MSPACK_ERR extract(mschmd_file file, in string filename) => throw new NotImplementedException();
|
|
|
|
/// <summary>
|
|
/// Returns the error code set by the most recently called method.
|
|
///
|
|
/// This is useful for open() and fast_open(), which do not return an
|
|
/// error code directly.
|
|
/// </summary>
|
|
/// <returns>The most recent error code</returns>
|
|
/// <see cref="open(in string)"/>
|
|
/// <see cref="extract(mschmd_file, in string)"/>
|
|
public MSPACK_ERR last_error() => throw new NotImplementedException();
|
|
|
|
/// <summary>
|
|
/// Opens a CHM helpfile quickly.
|
|
///
|
|
/// If the file opened is a valid CHM helpfile, only essential headers
|
|
/// will be read. A mschmd_header structure will be still be returned, as
|
|
/// with open(), but the mschmd_header::files field will be null. No
|
|
/// files details will be automatically read. The fast_find() method
|
|
/// must be used to obtain file details.
|
|
///
|
|
/// In the case of an error occuring, null is returned and the error code
|
|
/// is available from last_error().
|
|
///
|
|
/// The filename pointer should be considered "in use" until close() is
|
|
/// called on the CHM helpfile.
|
|
/// </summary>
|
|
/// <param name="filename">
|
|
/// The filename of the CHM helpfile. This is passed
|
|
/// directly to mspack_system::open().
|
|
/// </param>
|
|
/// <returns>A pointer to a mschmd_header structure, or null on failure</returns>
|
|
/// <see cref="open(in string)"/>
|
|
/// <see cref="close(mschmd_header)"/>
|
|
/// <see cref="fast_find(mschmd_header, in string, ref mschmd_file, int)"/>
|
|
/// <see cref="extract(mschmd_file, in string)"/>
|
|
public mschmd_header fast_open(in string filename)
|
|
{
|
|
return chmd_real_open(filename, 0);
|
|
}
|
|
|
|
/// <summary>
|
|
/// The real implementation of chmd_open() and chmd_fast_open(). It simply
|
|
/// passes the "entire" parameter to chmd_read_headers(), which will then
|
|
/// either read all headers, or a bare mininum.
|
|
/// </summary>
|
|
private mschmd_header chmd_real_open(in string filename, int entire)
|
|
{
|
|
mschmd_header chm = null;
|
|
MSPACK_ERR error;
|
|
|
|
mspack_system sys = this.system;
|
|
|
|
mspack_file fh;
|
|
if ((fh = sys.open(filename, MSPACK_SYS_OPEN.MSPACK_SYS_OPEN_READ)) != null)
|
|
{
|
|
chm = new mschmd_header();
|
|
chm.filename = filename;
|
|
error = chmd_read_headers(sys, fh, chm, entire);
|
|
if (error != MSPACK_ERR.MSPACK_ERR_OK)
|
|
{
|
|
// If the error is DATAFORMAT, and there are some results, return
|
|
// partial results with a warning, rather than nothing
|
|
if (error == MSPACK_ERR.MSPACK_ERR_DATAFORMAT && (chm.files != null || chm.sysfiles != null))
|
|
{
|
|
sys.message(fh, "WARNING; contents are corrupt");
|
|
error = MSPACK_ERR.MSPACK_ERR_OK;
|
|
}
|
|
else
|
|
{
|
|
close(chm);
|
|
chm = null;
|
|
}
|
|
}
|
|
|
|
this.error = error;
|
|
sys.close(fh);
|
|
}
|
|
else
|
|
{
|
|
this.error = MSPACK_ERR.MSPACK_ERR_OPEN;
|
|
}
|
|
return chm;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Finds file details quickly.
|
|
///
|
|
/// Instead of reading all CHM helpfile headers and building a list of
|
|
/// files, fast_open() and fast_find() are intended for finding file
|
|
/// details only when they are needed. The CHM file format includes an
|
|
/// on-disk file index to allow this.
|
|
///
|
|
/// Given a case-sensitive filename, fast_find() will search the on-disk
|
|
/// index for that file.
|
|
///
|
|
/// If the file was found, the caller-provided mschmd_file structure will
|
|
/// be filled out like so:
|
|
/// - section: the correct value for the found file
|
|
/// - offset: the correct value for the found file
|
|
/// - length: the correct value for the found file
|
|
/// - all other structure elements: null or 0
|
|
///
|
|
/// If the file was not found, MSPACK_ERR_OK will still be returned as the
|
|
/// result, but the caller-provided structure will be filled out like so:
|
|
/// - section: null
|
|
/// - offset: 0
|
|
/// - length: 0
|
|
/// - all other structure elements: null or 0
|
|
///
|
|
/// This method is intended to be used in conjunction with CHM helpfiles
|
|
/// opened with fast_open(), but it also works with helpfiles opened
|
|
/// using the regular open().
|
|
/// </summary>
|
|
/// <param name="chm">The CHM helpfile to search for the file</param>
|
|
/// <param name="filename">The filename of the file to search for</param>
|
|
/// <param name="f_ptr">A pointer to a caller-provded mschmd_file structure</param>
|
|
/// <param name="f_size"><tt>sizeof(mschmd_file)</tt></param>
|
|
/// <returns>An error code, or MSPACK_ERR_OK if successful</returns>
|
|
/// <see cref="open(in string)"/>
|
|
/// <see cref="close(mschmd_header)"/>
|
|
/// <see cref="fast_find(mschmd_header, in string, ref mschmd_file, int)"/>
|
|
/// <see cref="extract(mschmd_file, in string)"/>
|
|
public MSPACK_ERR fast_find(mschmd_header chm, in string filename, ref mschmd_file f_ptr, int f_size)
|
|
{
|
|
mspack_system sys;
|
|
mspack_file fh;
|
|
|
|
// p and end are initialised to prevent MSVC warning about "potentially"
|
|
// uninitialised usage. This is provably untrue, but MS won't fix:
|
|
// https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html
|
|
FixedArray<byte> chunk;
|
|
byte* p = null, end = null;
|
|
MSPACK_ERR err = MSPACK_ERR.MSPACK_ERR_OK;
|
|
int result = -1;
|
|
uint n, sec;
|
|
|
|
if (chm == null || f_ptr == null)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_ARGS;
|
|
}
|
|
|
|
sys = this.system;
|
|
|
|
// Clear the results structure
|
|
f_ptr = new mschmd_file();
|
|
|
|
if ((fh = sys.open(chm.filename, MSPACK_SYS_OPEN.MSPACK_SYS_OPEN_READ)) == null)
|
|
{
|
|
return MSPACK_ERR.MSPACK_ERR_OPEN;
|
|
}
|
|
|
|
// Go through PMGI chunk hierarchy to reach PMGL chunk
|
|
if (chm.index_root < chm.num_chunks)
|
|
{
|
|
n = chm.index_root;
|
|
for (; ; )
|
|
{
|
|
if ((chunk = read_chunk(chm, fh, n)) == null)
|
|
{
|
|
sys.close(fh);
|
|
return this.error;
|
|
}
|
|
|
|
// Search PMGI/PMGL chunk. exit early if no entry found
|
|
if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0)
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Found result. loop around for next chunk if this is PMGI
|
|
if (chunk[3] == 0x4C) break;
|
|
|
|
n = read_encint(&p, end, &err);
|
|
if (err != MSPACK_ERR.MSPACK_ERR_OK) goto encint_err;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// PMGL chunks only, search from first_pmgl to last_pmgl
|
|
for (n = chm.first_pmgl; n <= chm.last_pmgl; n = EndGetI32(chunk, pmgl_NextChunk))
|
|
{
|
|
if ((chunk = read_chunk(chm, fh, n)) == null)
|
|
{
|
|
err = this.error;
|
|
break;
|
|
}
|
|
|
|
// Search PMGL chunk. exit if file found
|
|
if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0)
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Stop simple infinite loops: can't visit the same chunk twice
|
|
if (n == EndGetI32(chunk, pmgl_NextChunk))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we found a file, read it
|
|
if (result > 0)
|
|
{
|
|
sec = read_encint(&p, end, &err);
|
|
f_ptr.section = (sec == 0) ? (mschmd_section)chm.sec0 : (mschmd_section)chm.sec1;
|
|
f_ptr.offset = read_encint(&p, end, &err);
|
|
f_ptr.length = read_encint(&p, end, &err);
|
|
if (err != MSPACK_ERR.MSPACK_ERR_OK) goto encint_err;
|
|
}
|
|
else if (result < 0)
|
|
{
|
|
err = MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
sys.close(fh);
|
|
return this.error = err;
|
|
|
|
encint_err:
|
|
Console.Error.WriteLine("Bad encint in PGMI/PGML chunk");
|
|
sys.close(fh);
|
|
return this.error = MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Reads the given chunk into memory, storing it in a chunk cache
|
|
/// so it doesn't need to be read from disk more than once
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
private FixedArray<byte> read_chunk(mschmd_header chm, mspack_file fh, uint chunk_num)
|
|
{
|
|
mspack_system sys = this.system;
|
|
FixedArray<byte> buf;
|
|
|
|
// Check arguments - most are already checked by chmd_fast_find
|
|
if (chunk_num >= chm.num_chunks) return null;
|
|
|
|
// Ensure chunk cache is available
|
|
if (chm.chunk_cache == null)
|
|
{
|
|
chm.chunk_cache = new FixedArray<byte>[chm.num_chunks];
|
|
if (chm.chunk_cache == null)
|
|
{
|
|
this.error = MSPACK_ERR.MSPACK_ERR_NOMEMORY;
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Try to answer out of chunk cache
|
|
if (chm.chunk_cache[chunk_num] != null) return chm.chunk_cache[chunk_num];
|
|
|
|
// Need to read chunk - allocate memory for it
|
|
buf = new FixedArray<byte>((int)chm.chunk_size);
|
|
|
|
// Seek to block and read it
|
|
if (sys.seek(fh, chm.dir_offset + (chunk_num * chm.chunk_size), MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_START) != 0)
|
|
{
|
|
this.error = MSPACK_ERR.MSPACK_ERR_SEEK;
|
|
sys.free(buf);
|
|
return null;
|
|
}
|
|
if (sys.read(fh, buf, (int)chm.chunk_size) != (int)chm.chunk_size)
|
|
{
|
|
this.error = MSPACK_ERR.MSPACK_ERR_READ;
|
|
sys.free(buf);
|
|
return null;
|
|
}
|
|
|
|
// Check the signature. Is is PMGL or PMGI?
|
|
if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) && ((buf[3] == 0x4C) || (buf[3] == 0x49))))
|
|
{
|
|
this.error = MSPACK_ERR.MSPACK_ERR_SEEK;
|
|
sys.free(buf);
|
|
return null;
|
|
}
|
|
|
|
// All OK. Store chunk in cache and return it
|
|
return chm.chunk_cache[chunk_num] = buf;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Searches a PMGI/PMGL chunk for a given filename entry. Returns -1 on
|
|
/// data format error, 0 if entry definitely not found, 1 if entry
|
|
/// found. In the latter case, *result and *result_end are set pointing
|
|
/// to that entry's data (either the "next chunk" ENCINT for a PMGI or
|
|
/// the section, offset and length ENCINTs for a PMGL).
|
|
///
|
|
/// In the case of PMGL chunks, the entry has definitely been
|
|
/// found. In the case of PMGI chunks, the entry which points to the
|
|
/// chunk that may eventually contain that entry has been found.
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
private int search_chunk(mschmd_header chm, in FixedArray<byte> chunk, in string filename, byte** result, byte** result_end)
|
|
{
|
|
byte* p;
|
|
uint qr_size, num_entries, qr_entries, qr_density, name_len;
|
|
uint L, R, M, entries_off, is_pmgl;
|
|
int cmp;
|
|
MSPACK_ERR err = MSPACK_ERR.MSPACK_ERR_OK;
|
|
|
|
uint fname_len = (uint)filename.Length;
|
|
|
|
// PMGL chunk or PMGI chunk? (note: read_chunk() has already
|
|
// checked the rest of the characters in the chunk signature)
|
|
if (chunk[3] == 0x4C)
|
|
{
|
|
is_pmgl = 1;
|
|
entries_off = pmgl_Entries;
|
|
}
|
|
else
|
|
{
|
|
is_pmgl = 0;
|
|
entries_off = pmgi_Entries;
|
|
}
|
|
|
|
// Step 1: binary search first filename of each QR entry
|
|
// - target filename == entry
|
|
// found file
|
|
// - target filename < all entries
|
|
// file not found
|
|
// - target filename > all entries
|
|
// proceed to step 2 using final entry
|
|
// - target filename between two searched entries
|
|
// proceed to step 2
|
|
qr_size = EndGetI32(chunk, pmgl_QuickRefSize);
|
|
int start = (int)(chm.chunk_size - 2);
|
|
int end = (int)(chm.chunk_size - qr_size);
|
|
num_entries = EndGetI16(chunk, (int)(chm.chunk_size - 2));
|
|
qr_density = (uint)(1 + (1 << (int)chm.density));
|
|
qr_entries = (num_entries + qr_density - 1) / qr_density;
|
|
|
|
if (num_entries == 0)
|
|
{
|
|
Console.Error.WriteLine("Chunk has no entries");
|
|
return -1;
|
|
}
|
|
|
|
if (qr_size > chm.chunk_size)
|
|
{
|
|
Console.Error.WriteLine("Quickref size > chunk size");
|
|
return -1;
|
|
}
|
|
|
|
*result_end = &chunk[end];
|
|
|
|
if (((int)qr_entries * 2) > (start - end))
|
|
{
|
|
Console.Error.WriteLine("WARNING; more quickrefs than quickref space");
|
|
qr_entries = 0; // But we can live with it
|
|
}
|
|
|
|
if (qr_entries > 0)
|
|
{
|
|
L = 0;
|
|
R = qr_entries - 1;
|
|
do
|
|
{
|
|
// Pick new midpoint
|
|
M = (L + R) >> 1;
|
|
|
|
// Compare filename with entry QR points to
|
|
p = &chunk[entries_off + (M != 0 ? EndGetI16(chunk, start - (int)(M << 1)) : 0)];
|
|
name_len = read_encint(&p, end, &err);
|
|
if (err != MSPACK_ERR.MSPACK_ERR_OK || (name_len > (uint)(end - p))) goto encint_err;
|
|
cmp = compare(filename, (char*)p, fname_len, name_len);
|
|
|
|
if (cmp == 0) break;
|
|
else if (cmp < 0) { if (M) R = M - 1; else return 0; }
|
|
else if (cmp > 0) L = M + 1;
|
|
} while (L <= R);
|
|
M = (L + R) >> 1;
|
|
|
|
if (cmp == 0)
|
|
{
|
|
/* exact match! */
|
|
p += name_len;
|
|
*result = p;
|
|
return 1;
|
|
}
|
|
|
|
/* otherwise, read the group of entries for QR entry M */
|
|
p = &chunk[entries_off + (M ? EndGetI16(chunk, start - (M << 1)) : 0)];
|
|
num_entries -= (M * qr_density);
|
|
if (num_entries > qr_density) num_entries = qr_density;
|
|
}
|
|
else
|
|
{
|
|
p = &chunk[entries_off];
|
|
}
|
|
|
|
/* Step 2: linear search through the set of entries reached in step 1.
|
|
* - filename == any entry
|
|
* found entry
|
|
* - filename < all entries (PMGI) or any entry (PMGL)
|
|
* entry not found, stop now
|
|
* - filename > all entries
|
|
* entry not found (PMGL) / maybe found (PMGI)
|
|
* -
|
|
*/
|
|
*result = null;
|
|
while (num_entries-- > 0)
|
|
{
|
|
name_len = read_encint(&p, end, &err);
|
|
if (err || (name_len > (uint)(end - p))) goto encint_err;
|
|
cmp = compare(filename, (char*)p, fname_len, name_len);
|
|
p += name_len;
|
|
|
|
if (cmp == 0)
|
|
{
|
|
/* entry found */
|
|
*result = p;
|
|
return 1;
|
|
}
|
|
|
|
if (cmp < 0)
|
|
{
|
|
/* entry not found (PMGL) / maybe found (PMGI) */
|
|
break;
|
|
}
|
|
|
|
/* read and ignore the rest of this entry */
|
|
if (is_pmgl)
|
|
{
|
|
while (p < end && (*p++ & 0x80)) ; /* skip section ENCINT */
|
|
while (p < end && (*p++ & 0x80)) ; /* skip offset ENCINT */
|
|
while (p < end && (*p++ & 0x80)) ; /* skip length ENCINT */
|
|
}
|
|
else
|
|
{
|
|
*result = p; /* store potential final result */
|
|
while (p < end && (*p++ & 0x80)) ; /* skip chunk number ENCINT */
|
|
}
|
|
}
|
|
|
|
/* PMGL? not found. PMGI? maybe found */
|
|
return (is_pmgl) ? 0 : (*result ? 1 : 0);
|
|
|
|
encint_err:
|
|
Console.Error.WriteLine("bad encint while searching");
|
|
return -1;
|
|
}
|
|
}
|
|
} |