SabreTools.Compression/libmspack/CHM/Decompressor.cs

using System;
using System.Linq;
using System.Runtime.InteropServices;
using static SabreTools.Compression.libmspack.CHM.Constants;
using static SabreTools.Compression.libmspack.macros;
using static SabreTools.Compression.libmspack.system;

namespace SabreTools.Compression.libmspack.CHM
{
    /// <summary>
    /// A decompressor for .CHM (Microsoft HTMLHelp) files
    ///
    /// All fields are READ ONLY.
    /// </summary>
    /// <see cref="mspack.DestroyCHMDecomperssor(Decompressor)"/>
    public unsafe class Decompressor : BaseDecompressor
    {
        public mschmd_decompress_state d { get; private set; }

        // Filenames of the system files used for decompression.
        // Content and ControlData are essential.
        // ResetTable is preferred, but SpanInfo can be used if not available
        private const string content_name = "::DataSpace/Storage/MSCompressed/Content";
        private const string control_name = "::DataSpace/Storage/MSCompressed/ControlData";
        private const string spaninfo_name = "::DataSpace/Storage/MSCompressed/SpanInfo";
        private const string rtable_name = "::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable";

        // The GUIDs found in CHM header
        private static readonly byte[] guids = new byte[32]
        {
            // {7C01FD10-7BAA-11D0-9E0C-00A0-C922-E6EC}
            0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
            0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC,

            // {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC}
            0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
            0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC
        };

        /// <summary>
        /// Creates a new CHM decompressor
        /// </summary>
        public Decompressor()
        {
            this.system = new mspack_default_system();
            error = MSPACK_ERR.MSPACK_ERR_OK;
            d = null;
        }

        /// <summary>
        /// Destroys an existing CHM decompressor
        /// </summary>
        ~Decompressor()
        {
            mspack_system sys = this.system;
            if (this.d != null)
            {
                if (this.d.infh != null) sys.close(this.d.infh);
                if (this.d.state != null) lzxd_free(this.d.state);
                //sys.free(this.d);
            }
            //sys.free(this);
        }

        /// <summary>
        /// Opens a CHM helpfile and reads its contents.
        ///
        /// If the file opened is a valid CHM helpfile, all headers will be read
        /// and a mschmd_header structure will be returned, with a full list of
        /// files.
        ///
        /// In the case of an error occuring, null is returned and the error code
        /// is available from last_error().
        ///
        /// The filename pointer should be considered "in use" until close() is
        /// called on the CHM helpfile.
        /// </summary>
        /// <param name="filename">
        /// The filename of the CHM helpfile. This is passed
        /// directly to mspack_system::open().
        /// </param>
        /// <returns>A pointer to a mschmd_header structure, or null on failure</returns>
        /// <see cref="close(mschmd_header)"/>
        public mschmd_header open(in string filename)
        {
            return chmd_real_open(filename, 1);
        }

        /// <summary>
        /// Closes a previously opened CHM helpfile.
        ///
        /// This closes a CHM helpfile, frees the mschmd_header and all
        /// mschmd_file structures associated with it (if any). This works on
        /// both helpfiles opened with open() and helpfiles opened with
        /// fast_open().
        ///
        /// The CHM header pointer is now invalid and cannot be used again. All
        /// mschmd_file pointers referencing that CHM are also now invalid, and
        /// cannot be used again.
        /// </summary>
        /// <param name="chm">The CHM helpfile to close</param>
        /// <see cref="open(in string)"/>
        /// <see cref="fast_open(in string)"/>
        public void close(mschmd_header chm)
        {
            mschmd_file fi, nfi;
            mspack_system sys;
            uint i;

            sys = this.system;

            this.error = MSPACK_ERR.MSPACK_ERR_OK;

            // Free files
            for (fi = chm.files; fi != null; fi = nfi)
            {
                nfi = fi.next;
                //sys.free(fi);
            }
            for (fi = chm.sysfiles; fi != null; fi = nfi)
            {
                nfi = fi.next;
                //sys.free(fi);
            }

            // If this CHM was being decompressed, free decompression state
            if (this.d != null && (this.d.chm == chm))
            {
                if (this.d.infh != null) sys.close(this.d.infh);
                if (this.d.state != null) lzxd_free(this.d.state);
                //sys.free(this.d);
                this.d = null;
            }

            // If this CHM had a chunk cache, free it and contents
            if (chm.chunk_cache != null)
            {
                for (i = 0; i < chm.num_chunks; i++) sys.free(chm.chunk_cache[i]);
                sys.free(chm.chunk_cache);
            }

            //sys.free(chm);
        }

        /// <summary>
        /// Reads the basic CHM file headers. If the "entire" parameter is
        /// non-zero, all file entries will also be read. fills out a pre-existing
        /// mschmd_header structure, allocates memory for files as necessary
        /// </summary>
        private MSPACK_ERR chmd_read_headers(mspack_system sys, mspack_file fh, mschmd_header chm, int entire)
        {
            uint errors, num_chunks;
            FixedArray<byte> buf = new FixedArray<byte>(0x54);
            FixedArray<byte> chunk = null;
            byte* name, p, end;
            mschmd_file fi, link = null;
            long offset_hs0, filelen;
            int num_entries;
            MSPACK_ERR err = MSPACK_ERR.MSPACK_ERR_OK;

            // Initialise pointers
            chm.files = null;
            chm.sysfiles = null;
            chm.chunk_cache = null;
            chm.sec0.chm = chm;
            chm.sec0.id = 0;
            chm.sec1.chm = chm;
            chm.sec1.id = 1;
            chm.sec1.content = null;
            chm.sec1.control = null;
            chm.sec1.spaninfo = null;
            chm.sec1.rtable = null;

            // Read the first header
            if (sys.read(fh, buf, chmhead_SIZEOF) != chmhead_SIZEOF)
            {
                return MSPACK_ERR.MSPACK_ERR_READ;
            }

            // Check ITSF signature
            if (EndGetI32(buf, chmhead_Signature) != 0x46535449)
            {
                return MSPACK_ERR.MSPACK_ERR_SIGNATURE;
            }

            // Check both header GUIDs
            if (!buf.ToArray().Skip(chmhead_GUID1).Take(guids.Length).SequenceEqual(guids))
            {
                Console.Error.WriteLine("Incorrect GUIDs");
                return MSPACK_ERR.MSPACK_ERR_SIGNATURE;
            }

            chm.version = EndGetI32(buf, chmhead_Version);
            chm.timestamp = EndGetM32(buf, chmhead_Timestamp);
            chm.language = EndGetI32(buf, chmhead_LanguageID);
            if (chm.version > 3)
            {
                sys.message(fh, "WARNING; CHM version > 3");
            }

            // Read the header section table
            if (sys.read(fh, buf, chmhst3_SIZEOF) != chmhst3_SIZEOF)
            {
                return MSPACK_ERR.MSPACK_ERR_READ;
            }

            // chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files.
            // The offset will be corrected later, once HS1 is read.
            if (read_off64(&offset_hs0, &buf[chmhst_OffsetHS0], sys, fh) ||
                read_off64(&chm.dir_offset, &buf[chmhst_OffsetHS1], sys, fh) ||
                read_off64(&chm.sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh))
            {
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            // Seek to header section 0
            if (sys.seek(fh, offset_hs0, MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_START) != 0)
            {
                return MSPACK_ERR.MSPACK_ERR_SEEK;
            }

            // Read header section 0
            if (sys.read(fh, buf, chmhs0_SIZEOF) != chmhs0_SIZEOF)
            {
                return MSPACK_ERR.MSPACK_ERR_READ;
            }

            if (read_off64(&chm.length, &buf[chmhs0_FileLen], sys, fh))
            {
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            // Compare declared CHM file size against actual size
            if (mspack_sys_filelen(sys, fh, &filelen) == 0)
            {
                if (chm.length > filelen)
                {
                    sys.message(fh, $"WARNING; file possibly truncated by {chm.length - filelen} bytes");
                }
                else if (chm.length < filelen)
                {
                    sys.message(fh, $"WARNING; possible {filelen - chm.length} extra bytes at end of file");
                }
            }

            // Seek to header section 1
            if (sys.seek(fh, chm.dir_offset, MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_START) != 0)
            {
                return MSPACK_ERR.MSPACK_ERR_SEEK;
            }

            // Read header section 1
            if (sys.read(fh, buf, chmhs1_SIZEOF) != chmhs1_SIZEOF)
            {
                return MSPACK_ERR.MSPACK_ERR_READ;
            }

            chm.dir_offset = sys.tell(fh);
            chm.chunk_size = EndGetI32(buf, chmhs1_ChunkSize);
            chm.density = EndGetI32(buf, chmhs1_Density);
            chm.depth = EndGetI32(buf, chmhs1_Depth);
            chm.index_root = EndGetI32(buf, chmhs1_IndexRoot);
            chm.num_chunks = EndGetI32(buf, chmhs1_NumChunks);
            chm.first_pmgl = EndGetI32(buf, chmhs1_FirstPMGL);
            chm.last_pmgl = EndGetI32(buf, chmhs1_LastPMGL);

            if (chm.version < 3)
            {
                // Versions before 3 don't have chmhst3_OffsetCS0
                chm.sec0.offset = chm.dir_offset + (chm.chunk_size * chm.num_chunks);
            }

            // Check if content offset or file size is wrong
            if (chm.sec0.offset > chm.length)
            {
                Console.Error.WriteLine("content section begins after file has ended");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            // Ensure there are chunks and that chunk size is
            // large enough for signature and num_entries
            if (chm.chunk_size < (pmgl_Entries + 2))
            {
                Console.Error.WriteLine("chunk size not large enough");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }
            if (chm.num_chunks == 0)
            {
                Console.Error.WriteLine("no chunks");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            // The chunk_cache data structure is not great; large values for num_chunks
            // or num_chunks*chunk_size can exhaust all memory. Until a better chunk
            // cache is implemented, put arbitrary limits on num_chunks and chunk size.
            if (chm.num_chunks > 100000)
            {
                Console.Error.WriteLine("more than 100,000 chunks");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }
            if (chm.chunk_size > 8192)
            {
                Console.Error.WriteLine("chunk size over 8192 (get in touch if this is valid)");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }
            if ((long)chm.chunk_size * (long)chm.num_chunks > chm.length)
            {
                Console.Error.WriteLine("chunks larger than entire file");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            // Common sense checks on header section 1 fields
            if (chm.chunk_size != 4096)
            {
                sys.message(fh, "WARNING; chunk size is not 4096");
            }
            if (chm.first_pmgl != 0)
            {
                sys.message(fh, "WARNING; first PMGL chunk is not zero");
            }
            if (chm.first_pmgl > chm.last_pmgl)
            {
                Console.Error.WriteLine("first pmgl chunk is after last pmgl chunk");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }
            if (chm.index_root != 0xFFFFFFFF && chm.index_root >= chm.num_chunks)
            {
                Console.Error.WriteLine("index_root outside valid range");
                return MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            // If we are doing a quick read, stop here!
            if (entire == 0)
            {
                return MSPACK_ERR.MSPACK_ERR_OK;
            }

            // Seek to the first PMGL chunk, and reduce the number of chunks to read
            if (chm.first_pmgl != 0)
            {
                long pmgl_offset = (long)chm.first_pmgl * (long)chm.chunk_size;
                if (sys.seek(fh, pmgl_offset, MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_CUR) != 0)
                {
                    return MSPACK_ERR.MSPACK_ERR_SEEK;
                }
            }
            num_chunks = chm.last_pmgl - chm.first_pmgl + 1;

            chunk = new FixedArray<byte>((int)chm.chunk_size);

            // Read and process all chunks from FirstPMGL to LastPMGL
            errors = 0;
            while (num_chunks-- > 0)
            {
                // Read next chunk
                if (sys.read(fh, chunk, (int)chm.chunk_size) != (int)chm.chunk_size)
                {
                    sys.free(chunk);
                    return MSPACK_ERR.MSPACK_ERR_READ;
                }

                // Process only directory (PMGL) chunks
                if (EndGetI32(chunk, pmgl_Signature) != 0x4C474D50) continue;

                if (EndGetI32(chunk, pmgl_QuickRefSize) < 2)
                {
                    sys.message(fh, "WARNING; PMGL quickref area is too small");
                }
                if (EndGetI32(chunk, pmgl_QuickRefSize) >
                    (chm.chunk_size - pmgl_Entries))
                {
                    sys.message(fh, "WARNING; PMGL quickref area is too large");
                }

                p = (byte*)chunk.Pointer + pmgl_Entries;
                end = (byte*)chunk.Pointer + chm.chunk_size - 2;
                num_entries = EndGetI16(chunk, (int)(chm.chunk_size - 2));

                while (num_entries-- > 0)
                {
                    uint name_len, section;
                    long offset, length;
                    name_len = read_encint(&p, end, &err);
                    if (err != MSPACK_ERR.MSPACK_ERR_OK || (name_len > (uint)(end - p))) goto encint_err;
                    name = p; p += name_len;
                    section = read_encint(&p, end, &err);
                    offset = read_encint(&p, end, &err);
                    length = read_encint(&p, end, &err);
                    if (err != MSPACK_ERR.MSPACK_ERR_OK) goto encint_err;

                    // Ignore blank or one-char (e.g. "/") filenames we'd return as blank */
                    if (name_len < 2 || name[0] == 0x00 || name[1] == 0x00) continue;

                    // Empty files and directory names are stored as a file entry at
                    // offset 0 with length 0. We want to keep empty files, but not
                    // directory names, which end with a "/"
                    if ((offset == 0) && (length == 0))
                    {
                        if ((name_len > 0) && (name[name_len - 1] == '/')) continue;
                    }

                    if (section > 1)
                    {
                        sys.message(fh, $"Invalid section number '{section}'.");
                        continue;
                    }

                    fi = new mschmd_file();
                    fi.next = null;
                    fi.section = (section == 0 ? (mschmd_section)chm.sec0 : (mschmd_section)chm.sec1);
                    fi.offset = offset;
                    fi.length = length;

                    char[] filenameArr = new char[name_len];
                    Marshal.Copy((IntPtr)name, filenameArr, 0, (int)name_len);
                    filenameArr[(int)name_len] = '\0';
                    fi.filename = new string(filenameArr);

                    if (name[0] == ':' && name[1] == ':')
                    {
                        // System file
                        if (name_len == 40 && fi.filename.StartsWith(content_name))
                        {
                            chm.sec1.content = fi;
                        }
                        else if (name_len == 44 && fi.filename.StartsWith(control_name))
                        {
                            chm.sec1.control = fi;
                        }
                        else if (name_len == 41 && fi.filename.StartsWith(spaninfo_name))
                        {
                            chm.sec1.spaninfo = fi;
                        }
                        else if (name_len == 105 && fi.filename.StartsWith(rtable_name))
                        {
                            chm.sec1.rtable = fi;
                        }
                        fi.next = chm.sysfiles;
                        chm.sysfiles = fi;
                    }
                    else
                    {
                        // Normal file
                        if (link != null) link.next = fi; else chm.files = fi;
                        link = fi;
                    }
                }

            // This is reached either when num_entries runs out, or if
            // an ENCINT is badly encoded
            encint_err:
                if (num_entries >= 0)
                {
                    Console.Error.WriteLine("bad encint before all entries could be read");
                    errors++;
                }
            }

            sys.free(chunk);
            return (errors > 0) ? MSPACK_ERR.MSPACK_ERR_DATAFORMAT : MSPACK_ERR.MSPACK_ERR_OK;
        }

        /// <summary>
        /// Extracts a file from a CHM helpfile.
        ///
        /// This extracts a file from a CHM helpfile and writes it to the given
        /// filename. The filename of the file, mscabd_file::filename, is not
        /// used by extract(), but can be used by the caller as a guide for
        /// constructing an appropriate filename.
        ///
        /// This method works both with files found in the mschmd_header::files
        /// and mschmd_header::sysfiles list and mschmd_file structures generated
        /// on the fly by fast_find().
        /// </summary>
        /// <param name="file">The file to be decompressed</param>
        /// <param name="filename">The filename of the file being written to</param>
        /// <returns>An error code, or MSPACK_ERR_OK if successful</returns>
        public MSPACK_ERR extract(mschmd_file file, in string filename) => throw new NotImplementedException();

        /// <summary>
        /// Returns the error code set by the most recently called method.
        ///
        /// This is useful for open() and fast_open(), which do not return an
        /// error code directly.
        /// </summary>
        /// <returns>The most recent error code</returns>
        /// <see cref="open(in string)"/>
        /// <see cref="extract(mschmd_file, in string)"/>
        public MSPACK_ERR last_error() => throw new NotImplementedException();

        /// <summary>
        /// Opens a CHM helpfile quickly.
        ///
        /// If the file opened is a valid CHM helpfile, only essential headers
        /// will be read. A mschmd_header structure will be still be returned, as
        /// with open(), but the mschmd_header::files field will be null. No
        /// files details will be automatically read.  The fast_find() method
        /// must be used to obtain file details.
        ///
        /// In the case of an error occuring, null is returned and the error code
        /// is available from last_error().
        ///
        /// The filename pointer should be considered "in use" until close() is
        /// called on the CHM helpfile.
        /// </summary>
        /// <param name="filename">
        /// The filename of the CHM helpfile. This is passed
        /// directly to mspack_system::open().
        /// </param>
        /// <returns>A pointer to a mschmd_header structure, or null on failure</returns>
        /// <see cref="open(in string)"/>
        /// <see cref="close(mschmd_header)"/>
        /// <see cref="fast_find(mschmd_header, in string, ref mschmd_file, int)"/>
        /// <see cref="extract(mschmd_file, in string)"/>
        public mschmd_header fast_open(in string filename)
        {
            return chmd_real_open(filename, 0);
        }

        /// <summary>
        /// The real implementation of chmd_open() and chmd_fast_open(). It simply
        /// passes the "entire" parameter to chmd_read_headers(), which will then
        /// either read all headers, or a bare mininum.
        /// </summary>
        private mschmd_header chmd_real_open(in string filename, int entire)
        {
            mschmd_header chm = null;
            MSPACK_ERR error;

            mspack_system sys = this.system;

            mspack_file fh;
            if ((fh = sys.open(filename, MSPACK_SYS_OPEN.MSPACK_SYS_OPEN_READ)) != null)
            {
                chm = new mschmd_header();
                chm.filename = filename;
                error = chmd_read_headers(sys, fh, chm, entire);
                if (error != MSPACK_ERR.MSPACK_ERR_OK)
                {
                    // If the error is DATAFORMAT, and there are some results, return
                    // partial results with a warning, rather than nothing
                    if (error == MSPACK_ERR.MSPACK_ERR_DATAFORMAT && (chm.files != null || chm.sysfiles != null))
                    {
                        sys.message(fh, "WARNING; contents are corrupt");
                        error = MSPACK_ERR.MSPACK_ERR_OK;
                    }
                    else
                    {
                        close(chm);
                        chm = null;
                    }
                }

                this.error = error;
                sys.close(fh);
            }
            else
            {
                this.error = MSPACK_ERR.MSPACK_ERR_OPEN;
            }
            return chm;
        }

        /// <summary>
        /// Finds file details quickly.
        ///
        /// Instead of reading all CHM helpfile headers and building a list of
        /// files, fast_open() and fast_find() are intended for finding file
        /// details only when they are needed. The CHM file format includes an
        /// on-disk file index to allow this.
        ///
        /// Given a case-sensitive filename, fast_find() will search the on-disk
        /// index for that file.
        ///
        /// If the file was found, the caller-provided mschmd_file structure will
        /// be filled out like so:
        /// - section: the correct value for the found file
        /// - offset: the correct value for the found file
        /// - length: the correct value for the found file
        /// - all other structure elements: null or 0
        ///
        /// If the file was not found, MSPACK_ERR_OK will still be returned as the
        /// result, but the caller-provided structure will be filled out like so:
        /// - section: null
        /// - offset: 0
        /// - length: 0
        /// - all other structure elements: null or 0
        ///
        /// This method is intended to be used in conjunction with CHM helpfiles
        /// opened with fast_open(), but it also works with helpfiles opened
        /// using the regular open().
        /// </summary>
        /// <param name="chm">The CHM helpfile to search for the file</param>
        /// <param name="filename">The filename of the file to search for</param>
        /// <param name="f_ptr">A pointer to a caller-provded mschmd_file structure</param>
        /// <param name="f_size"><tt>sizeof(mschmd_file)</tt></param>
        /// <returns>An error code, or MSPACK_ERR_OK if successful</returns>
        /// <see cref="open(in string)"/>
        /// <see cref="close(mschmd_header)"/>
        /// <see cref="fast_find(mschmd_header, in string, ref mschmd_file, int)"/>
        /// <see cref="extract(mschmd_file, in string)"/>
        public MSPACK_ERR fast_find(mschmd_header chm, in string filename, ref mschmd_file f_ptr, int f_size)
        {
            mspack_system sys;
            mspack_file fh;

            // p and end are initialised to prevent MSVC warning about "potentially"
            // uninitialised usage. This is provably untrue, but MS won't fix:
            // https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html
            FixedArray<byte> chunk;
            byte* p = null, end = null;
            MSPACK_ERR err = MSPACK_ERR.MSPACK_ERR_OK;
            int result = -1;
            uint n, sec;

            if (chm == null || f_ptr == null)
            {
                return MSPACK_ERR.MSPACK_ERR_ARGS;
            }

            sys = this.system;

            // Clear the results structure
            f_ptr = new mschmd_file();

            if ((fh = sys.open(chm.filename, MSPACK_SYS_OPEN.MSPACK_SYS_OPEN_READ)) == null)
            {
                return MSPACK_ERR.MSPACK_ERR_OPEN;
            }

            // Go through PMGI chunk hierarchy to reach PMGL chunk
            if (chm.index_root < chm.num_chunks)
            {
                n = chm.index_root;
                for (; ; )
                {
                    if ((chunk = read_chunk(chm, fh, n)) == null)
                    {
                        sys.close(fh);
                        return this.error;
                    }

                    // Search PMGI/PMGL chunk. exit early if no entry found
                    if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0)
                    {
                        break;
                    }

                    // Found result. loop around for next chunk if this is PMGI
                    if (chunk[3] == 0x4C) break;

                    n = read_encint(&p, end, &err);
                    if (err != MSPACK_ERR.MSPACK_ERR_OK) goto encint_err;
                }
            }
            else
            {
                // PMGL chunks only, search from first_pmgl to last_pmgl
                for (n = chm.first_pmgl; n <= chm.last_pmgl; n = EndGetI32(chunk, pmgl_NextChunk))
                {
                    if ((chunk = read_chunk(chm, fh, n)) == null)
                    {
                        err = this.error;
                        break;
                    }

                    // Search PMGL chunk. exit if file found
                    if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0)
                    {
                        break;
                    }

                    // Stop simple infinite loops: can't visit the same chunk twice
                    if (n == EndGetI32(chunk, pmgl_NextChunk))
                    {
                        break;
                    }
                }
            }

            // If we found a file, read it
            if (result > 0)
            {
                sec = read_encint(&p, end, &err);
                f_ptr.section = (sec == 0) ? (mschmd_section)chm.sec0 : (mschmd_section)chm.sec1;
                f_ptr.offset = read_encint(&p, end, &err);
                f_ptr.length = read_encint(&p, end, &err);
                if (err != MSPACK_ERR.MSPACK_ERR_OK) goto encint_err;
            }
            else if (result < 0)
            {
                err = MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
            }

            sys.close(fh);
            return this.error = err;

        encint_err:
            Console.Error.WriteLine("Bad encint in PGMI/PGML chunk");
            sys.close(fh);
            return this.error = MSPACK_ERR.MSPACK_ERR_DATAFORMAT;
        }

        /// <summary>
        /// Reads the given chunk into memory, storing it in a chunk cache
        /// so it doesn't need to be read from disk more than once
        /// </summary>
        /// <returns></returns>
        private FixedArray<byte> read_chunk(mschmd_header chm, mspack_file fh, uint chunk_num)
        {
            mspack_system sys = this.system;
            FixedArray<byte> buf;

            // Check arguments - most are already checked by chmd_fast_find
            if (chunk_num >= chm.num_chunks) return null;

            // Ensure chunk cache is available
            if (chm.chunk_cache == null)
            {
                chm.chunk_cache = new FixedArray<byte>[chm.num_chunks];
                if (chm.chunk_cache == null)
                {
                    this.error = MSPACK_ERR.MSPACK_ERR_NOMEMORY;
                    return null;
                }
            }

            // Try to answer out of chunk cache
            if (chm.chunk_cache[chunk_num] != null) return chm.chunk_cache[chunk_num];

            // Need to read chunk - allocate memory for it
            buf = new FixedArray<byte>((int)chm.chunk_size);

            // Seek to block and read it
            if (sys.seek(fh, chm.dir_offset + (chunk_num * chm.chunk_size), MSPACK_SYS_SEEK.MSPACK_SYS_SEEK_START) != 0)
            {
                this.error = MSPACK_ERR.MSPACK_ERR_SEEK;
                sys.free(buf);
                return null;
            }
            if (sys.read(fh, buf, (int)chm.chunk_size) != (int)chm.chunk_size)
            {
                this.error = MSPACK_ERR.MSPACK_ERR_READ;
                sys.free(buf);
                return null;
            }

            // Check the signature. Is is PMGL or PMGI?
            if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) && ((buf[3] == 0x4C) || (buf[3] == 0x49))))
            {
                this.error = MSPACK_ERR.MSPACK_ERR_SEEK;
                sys.free(buf);
                return null;
            }

            // All OK. Store chunk in cache and return it
            return chm.chunk_cache[chunk_num] = buf;
        }

        /// <summary>
        /// Searches a PMGI/PMGL chunk for a given filename entry. Returns -1 on
        /// data format error, 0 if entry definitely not found, 1 if entry
        /// found. In the latter case, *result and *result_end are set pointing
        /// to that entry's data (either the "next chunk" ENCINT for a PMGI or
        /// the section, offset and length ENCINTs for a PMGL).
        ///
        /// In the case of PMGL chunks, the entry has definitely been
        /// found. In the case of PMGI chunks, the entry which points to the
        /// chunk that may eventually contain that entry has been found.
        /// </summary>
        /// <returns></returns>
        private int search_chunk(mschmd_header chm, in FixedArray<byte> chunk, in string filename, byte** result, byte** result_end)
        {
            byte* p;
            uint qr_size, num_entries, qr_entries, qr_density, name_len;
            uint L, R, M, entries_off, is_pmgl;
            int cmp;
            MSPACK_ERR err = MSPACK_ERR.MSPACK_ERR_OK;

            uint fname_len = (uint)filename.Length;

            // PMGL chunk or PMGI chunk? (note: read_chunk() has already
            // checked the rest of the characters in the chunk signature)
            if (chunk[3] == 0x4C)
            {
                is_pmgl = 1;
                entries_off = pmgl_Entries;
            }
            else
            {
                is_pmgl = 0;
                entries_off = pmgi_Entries;
            }

            //  Step 1: binary search first filename of each QR entry
            //  - target filename == entry
            //    found file
            //  - target filename < all entries
            //    file not found
            //  - target filename > all entries
            //    proceed to step 2 using final entry
            //  - target filename between two searched entries
            //    proceed to step 2
            qr_size = EndGetI32(chunk, pmgl_QuickRefSize);
            int start = (int)(chm.chunk_size - 2);
            int end = (int)(chm.chunk_size - qr_size);
            num_entries = EndGetI16(chunk, (int)(chm.chunk_size - 2));
            qr_density = (uint)(1 + (1 << (int)chm.density));
            qr_entries = (num_entries + qr_density - 1) / qr_density;

            if (num_entries == 0)
            {
                Console.Error.WriteLine("Chunk has no entries");
                return -1;
            }

            if (qr_size > chm.chunk_size)
            {
                Console.Error.WriteLine("Quickref size > chunk size");
                return -1;
            }

            *result_end = &chunk[end];

            if (((int)qr_entries * 2) > (start - end))
            {
                Console.Error.WriteLine("WARNING; more quickrefs than quickref space");
                qr_entries = 0; // But we can live with it
            }

            if (qr_entries > 0)
            {
                L = 0;
                R = qr_entries - 1;
                do
                {
                    // Pick new midpoint
                    M = (L + R) >> 1;

                    // Compare filename with entry QR points to
                    p = &chunk[entries_off + (M != 0 ? EndGetI16(chunk, start - (int)(M << 1)) : 0)];
                    name_len = read_encint(&p, end, &err);
                    if (err != MSPACK_ERR.MSPACK_ERR_OK || (name_len > (uint)(end - p))) goto encint_err;
                    cmp = compare(filename, (char*)p, fname_len, name_len);

                    if (cmp == 0) break;
                    else if (cmp < 0) { if (M) R = M - 1; else return 0; }
                    else if (cmp > 0) L = M + 1;
                } while (L <= R);
                M = (L + R) >> 1;

                if (cmp == 0)
                {
                    /* exact match! */
                    p += name_len;
                    *result = p;
                    return 1;
                }

                /* otherwise, read the group of entries for QR entry M */
                p = &chunk[entries_off + (M ? EndGetI16(chunk, start - (M << 1)) : 0)];
                num_entries -= (M * qr_density);
                if (num_entries > qr_density) num_entries = qr_density;
            }
            else
            {
                p = &chunk[entries_off];
            }

            /* Step 2: linear search through the set of entries reached in step 1.
             * - filename == any entry
             *   found entry
             * - filename < all entries (PMGI) or any entry (PMGL)
             *   entry not found, stop now
             * - filename > all entries
             *   entry not found (PMGL) / maybe found (PMGI)
             * -
             */
            *result = null;
            while (num_entries-- > 0)
            {
                name_len = read_encint(&p, end, &err);
                if (err || (name_len > (uint)(end - p))) goto encint_err;
                cmp = compare(filename, (char*)p, fname_len, name_len);
                p += name_len;

                if (cmp == 0)
                {
                    /* entry found */
                    *result = p;
                    return 1;
                }

                if (cmp < 0)
                {
                    /* entry not found (PMGL) / maybe found (PMGI) */
                    break;
                }

                /* read and ignore the rest of this entry */
                if (is_pmgl)
                {
                    while (p < end && (*p++ & 0x80)) ; /* skip section ENCINT */
                    while (p < end && (*p++ & 0x80)) ; /* skip offset ENCINT */
                    while (p < end && (*p++ & 0x80)) ; /* skip length ENCINT */
                }
                else
                {
                    *result = p; /* store potential final result */
                    while (p < end && (*p++ & 0x80)) ; /* skip chunk number ENCINT */
                }
            }

            /* PMGL? not found. PMGI? maybe found */
            return (is_pmgl) ? 0 : (*result ? 1 : 0);

        encint_err:
            Console.Error.WriteLine("bad encint while searching");
            return -1;
        }
    }
}