diff --git a/BurnOutSharp/FileType/MicrosoftCAB.LZX.cs b/BurnOutSharp/FileType/MicrosoftCAB.LZX.cs new file mode 100644 index 00000000..159a157f --- /dev/null +++ b/BurnOutSharp/FileType/MicrosoftCAB.LZX.cs @@ -0,0 +1,11 @@ +namespace BurnOutSharp.FileType +{ + #region TEMPORARY AREA FOR LZX COMPRESSION FORMAT + + // See the following for details about implementation (there is no open spec): + // https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzx.h + // https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxc.c + // https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxd.c + + #endregion +} diff --git a/BurnOutSharp/FileType/MicrosoftCAB.MSCAB.cs b/BurnOutSharp/FileType/MicrosoftCAB.MSCAB.cs new file mode 100644 index 00000000..3ea847ea --- /dev/null +++ b/BurnOutSharp/FileType/MicrosoftCAB.MSCAB.cs @@ -0,0 +1,1172 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using BurnOutSharp.Tools; + +namespace BurnOutSharp.FileType +{ + /// + + // TODO: Add multi-cabinet reading + internal class MSCABCabinet + { + #region Constants + + /// + /// A maximum uncompressed size of an input file to store in CAB + /// + public const uint MaximumUncompressedFileSize = 0x7FFF8000; + + /// + /// A maximum file COUNT + /// + public const ushort MaximumFileCount = 0xFFFF; + + /// + /// A maximum size of a created CAB (compressed) + /// + public const uint MaximumCabSize = 0x7FFFFFFF; + + /// + /// A maximum CAB-folder COUNT + /// + public const ushort MaximumFolderCount = 0xFFFF; + + /// + /// A maximum uncompressed data size in a CAB-folder + /// + public const uint MaximumUncompressedFolderSize = 0x7FFF8000; + + #endregion + + #region Properties + + /// + /// Cabinet header + /// + public CFHEADER Header { get; private set; } + + /// + /// One or more CFFOLDER entries + /// + public CFFOLDER[] Folders { get; private set; } + + /// + /// A series of one or more cabinet file (CFFILE) entries + /// + public CFFILE[] Files { get; private set; } + + #endregion + + #region Serialization + + /// + /// Deserialize at into a MSCABCabinet object + /// + public static MSCABCabinet Deserialize(byte[] data, ref int dataPtr) + { + if (data == null || dataPtr < 0) + return null; + + int basePtr = dataPtr; + MSCABCabinet cabinet = new MSCABCabinet(); + + // Start with the header + cabinet.Header = CFHEADER.Deserialize(data, ref dataPtr); + if (cabinet.Header == null) + return null; + + // Then retrieve all folder headers + cabinet.Folders = new CFFOLDER[cabinet.Header.FolderCount]; + for (int i = 0; i < cabinet.Header.FolderCount; i++) + { + cabinet.Folders[i] = CFFOLDER.Deserialize(data, ref dataPtr, basePtr, cabinet.Header.FolderReservedSize, cabinet.Header.DataReservedSize); + if (cabinet.Folders[i] == null) + return null; + } + + // We need to move to where the file headers are stored + dataPtr = basePtr + (int)cabinet.Header.FilesOffset; + + // Then retrieve all file headers + cabinet.Files = new CFFILE[cabinet.Header.FileCount]; + for (int i = 0; i < cabinet.Header.FileCount; i++) + { + cabinet.Files[i] = CFFILE.Deserialize(data, ref dataPtr); + if (cabinet.Files[i] == null) + return null; + } + + return cabinet; + } + + #endregion + + #region Public Functionality + + /// + /// Find the start of an MS-CAB cabinet in a set of data, if possible + /// + public int FindCabinet(byte[] data) + { + if (data == null || data.Length < CFHEADER.SignatureBytes.Length) + return -1; + + bool found = data.FirstPosition(CFHEADER.SignatureBytes, out int index); + return found ? index : -1; + } + + /// + /// Extract all files from the archive to + /// + public bool ExtractAllFiles(string outputDirectory) + { + // Perform sanity checks + if (Header == null || Files == null || Files.Length == 0) + return false; + + // Loop through and extract all files + foreach (CFFILE file in Files) + { + // Create the output path + string outputPath = Path.Combine(outputDirectory, file.NameAsString); + + // Get the associated folder, if possible + CFFOLDER folder = null; + if (file.FolderIndex != FolderIndex.CONTINUED_FROM_PREV && file.FolderIndex != FolderIndex.CONTINUED_TO_NEXT && file.FolderIndex != FolderIndex.CONTINUED_PREV_AND_NEXT) + folder = Folders[(int)file.FolderIndex]; + + // If we don't have a folder, we can't continue + if (folder == null) + return false; + + // TODO: We don't keep the stream open or accessible here to seek + // TODO: We don't check for other cabinets here yet + // TODO: Read and decompress data blocks + } + + return true; + } + + /// + /// Extract a single file from the archive to + /// + public bool ExtractFile(string filePath, string outputDirectory, bool exact = false) + { + // Perform sanity checks + if (Header == null || Files == null || Files.Length == 0) + return false; + + // Check the file exists + int fileIndex = -1; + for (int i = 0; i < Files.Length; i++) + { + CFFILE tempFile = Files[i]; + if (tempFile == null) + continue; + + // Check for a match + if (exact ? tempFile.NameAsString == filePath : tempFile.NameAsString.EndsWith(filePath, StringComparison.OrdinalIgnoreCase)) + { + fileIndex = i; + break; + } + } + + // -1 is an invalid file index + if (fileIndex == -1) + return false; + + // Get the file to extract + CFFILE file = Files[fileIndex]; + + // Create the output path + string outputPath = Path.Combine(outputDirectory, file.NameAsString); + + // Get the associated folder, if possible + CFFOLDER folder = null; + if (file.FolderIndex != FolderIndex.CONTINUED_FROM_PREV && file.FolderIndex != FolderIndex.CONTINUED_TO_NEXT && file.FolderIndex != FolderIndex.CONTINUED_PREV_AND_NEXT) + folder = Folders[(int)file.FolderIndex]; + + // If we don't have a folder, we can't continue + if (folder == null) + return false; + + // TODO: We don't keep the stream open or accessible here to seek + // TODO: We don't check for other cabinets here yet + // TODO: Read and decompress data blocks + + return true; + } + + /// + /// Print all info about the cabinet file + /// + public void PrintInfo() + { + #region CFHEADER + + if (Header == null) + { + Console.WriteLine("There is no header associated with this cabinet."); + return; + } + + Header.PrintInfo(); + + #endregion + + #region CFFOLDER + + if (Folders == null || Folders.Length == 0) + { + Console.WriteLine("There are no folders associated with this cabinet."); + return; + } + + Console.WriteLine("CFFOLDER INFORMATION:"); + Console.WriteLine("--------------------------------------------"); + for (int i = 0; i < Folders.Length; i++) + { + CFFOLDER folder = Folders[i]; + Console.WriteLine($" CFFOLDER {i:X4}:"); + + if (folder == null) + { + Console.WriteLine($" Not found or null"); + Console.WriteLine(); + continue; + } + + folder.PrintInfo(); + } + + Console.WriteLine(); + + #endregion + + #region CFFILE + + if (Files == null || Files.Length == 0) + { + Console.WriteLine("There are no files associated with this cabinet."); + return; + } + + Console.WriteLine("CFFILE INFORMATION:"); + Console.WriteLine("--------------------------------------------"); + for (int i = 0; i < Files.Length; i++) + { + CFFILE file = Files[i]; + Console.WriteLine($" CFFILE {i:X4}:"); + + if (file == null) + { + Console.WriteLine($" Not found or null"); + Console.WriteLine(); + continue; + } + + file.PrintInfo(); + } + + Console.WriteLine(); + + #endregion + } + + #endregion + + #region Internal Functionality + + /// + /// Get a null-terminated string as a byte array from input data + /// + internal static byte[] GetNullTerminatedString(byte[] data, ref int dataPtr) + { + int nullIndex = Array.IndexOf(data, 0x00, dataPtr, 0xFF); + int stringSize = nullIndex - dataPtr; + if (stringSize < 0 || stringSize > 256) + return null; + + byte[] str = new byte[stringSize]; + Array.Copy(data, dataPtr, str, 0, stringSize); + dataPtr += stringSize + 1; + return str; + } + + #endregion + } + + /// + /// The CFHEADER structure shown in the following packet diagram provides information about this + /// cabinet (.cab) file. + /// + internal class CFHEADER + { + #region Constants + + /// + /// Human-readable signature + /// + public static readonly string SignatureString = "MSCF"; + + /// + /// Signature as an unsigned Int32 value + /// + public const uint SignatureValue = 0x4643534D; + + /// + /// Signature as a byte array + /// + public static readonly byte[] SignatureBytes = new byte[] { 0x4D, 0x53, 0x43, 0x46 }; + + #endregion + + #region Properties + + /// + /// Contains the characters "M", "S", "C", and "F" (bytes 0x4D, 0x53, 0x43, + /// 0x46). This field is used to ensure that the file is a cabinet(.cab) file. + /// + public uint Signature { get; private set; } + + /// + /// Reserved field; MUST be set to 0 (zero). + /// + public uint Reserved1 { get; private set; } + + /// + /// Specifies the total size of the cabinet file, in bytes. + /// + public uint CabinetSize { get; private set; } + + /// + /// Reserved field; MUST be set to 0 (zero). + /// + public uint Reserved2 { get; private set; } + + /// + /// Specifies the absolute file offset, in bytes, of the first CFFILE field entry. + /// + public uint FilesOffset { get; private set; } + + /// + /// Reserved field; MUST be set to 0 (zero). + /// + public uint Reserved3 { get; private set; } + + /// + /// Specifies the minor cabinet file format version. This value MUST be set to 3 (three). + /// + public byte VersionMinor { get; private set; } + + /// + /// Specifies the major cabinet file format version. This value MUST be set to 1 (one). + /// + public byte VersionMajor { get; private set; } + + /// + /// Specifies the number of CFFOLDER field entries in this cabinet file. + /// + public ushort FolderCount { get; private set; } + + /// + /// Specifies the number of CFFILE field entries in this cabinet file. + /// + public ushort FileCount { get; private set; } + + /// + /// Specifies bit-mapped values that indicate the presence of optional data. + /// + public HeaderFlags Flags { get; private set; } + + /// + /// Specifies an arbitrarily derived (random) value that binds a collection of linked cabinet files + /// together.All cabinet files in a set will contain the same setID field value.This field is used by + /// cabinet file extractors to ensure that cabinet files are not inadvertently mixed.This value has no + /// meaning in a cabinet file that is not in a set. + /// + public ushort SetID { get; private set; } + + /// + /// Specifies the sequential number of this cabinet in a multicabinet set. The first cabinet has + /// iCabinet=0. This field, along with the setID field, is used by cabinet file extractors to ensure that + /// this cabinet is the correct continuation cabinet when spanning cabinet files. + /// + public ushort CabinetIndex { get; private set; } + + /// + /// If the flags.cfhdrRESERVE_PRESENT field is not set, this field is not + /// present, and the value of cbCFHeader field MUST be zero.Indicates the size, in bytes, of the + /// abReserve field in this CFHEADER structure.Values for cbCFHeader field MUST be between 0- + /// 60,000. + /// + public ushort HeaderReservedSize { get; private set; } + + /// + /// If the flags.cfhdrRESERVE_PRESENT field is not set, this field is not + /// present, and the value of cbCFFolder field MUST be zero.Indicates the size, in bytes, of the + /// abReserve field in each CFFOLDER field entry.Values for fhe cbCFFolder field MUST be between + /// 0-255. + /// + public byte FolderReservedSize { get; private set; } + + /// + /// If the flags.cfhdrRESERVE_PRESENT field is not set, this field is not + /// present, and the value for the cbCFDATA field MUST be zero.The cbCFDATA field indicates the + /// size, in bytes, of the abReserve field in each CFDATA field entry. Values for the cbCFDATA field + /// MUST be between 0 - 255. + /// + public byte DataReservedSize { get; private set; } + + /// + /// If the flags.cfhdrRESERVE_PRESENT field is set and the + /// cbCFHeader field is non-zero, this field contains per-cabinet-file application information. This field + /// is defined by the application, and is used for application-defined purposes. + /// + public byte[] ReservedData { get; private set; } + + /// + /// If the flags.cfhdrPREV_CABINET field is not set, this + /// field is not present.This is a null-terminated ASCII string that contains the file name of the + /// logically previous cabinet file. The string can contain up to 255 bytes, plus the null byte. Note that + /// this gives the name of the most recently preceding cabinet file that contains the initial instance of a + /// file entry.This might not be the immediately previous cabinet file, when the most recent file spans + /// multiple cabinet files.If searching in reverse for a specific file entry, or trying to extract a file that is + /// reported to begin in the "previous cabinet," the szCabinetPrev field would indicate the name of the + /// cabinet to examine. + /// + public byte[] CabinetPrev { get; private set; } + + /// + /// If the flags.cfhdrPREV_CABINET field is not set, then this + /// field is not present.This is a null-terminated ASCII string that contains a descriptive name for the + /// media that contains the file named in the szCabinetPrev field, such as the text on the disk label. + /// This string can be used when prompting the user to insert a disk. The string can contain up to 255 + /// bytes, plus the null byte. + /// + public byte[] DiskPrev { get; private set; } + + /// + /// If the flags.cfhdrNEXT_CABINET field is not set, this + /// field is not present.This is a null-terminated ASCII string that contains the file name of the next + /// cabinet file in a set. The string can contain up to 255 bytes, plus the null byte. Files that extend + /// beyond the end of the current cabinet file are continued in the named cabinet file. + /// + public byte[] CabinetNext { get; private set; } + + /// + /// If the flags.cfhdrNEXT_CABINET field is not set, this field is + /// not present.This is a null-terminated ASCII string that contains a descriptive name for the media + /// that contains the file named in the szCabinetNext field, such as the text on the disk label. The + /// string can contain up to 255 bytes, plus the null byte. This string can be used when prompting the + /// user to insert a disk. + /// + public byte[] DiskNext { get; private set; } + + #endregion + + #region Serialization + + /// + /// Deserialize at into a CFHEADER object + /// + public static CFHEADER Deserialize(byte[] data, ref int dataPtr) + { + if (data == null || dataPtr < 0) + return null; + + CFHEADER header = new CFHEADER(); + + header.Signature = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + if (header.Signature != SignatureValue) + return null; + + header.Reserved1 = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + if (header.Reserved1 != 0x00000000) + return null; + + header.CabinetSize = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + if (header.CabinetSize > MSCABCabinet.MaximumCabSize) + return null; + + header.Reserved2 = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + if (header.Reserved2 != 0x00000000) + return null; + + header.FilesOffset = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + + header.Reserved3 = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + if (header.Reserved3 != 0x00000000) + return null; + + header.VersionMinor = data[dataPtr++]; + header.VersionMajor = data[dataPtr++]; + if (header.VersionMajor != 0x00000001 || header.VersionMinor != 0x00000003) + return null; + + header.FolderCount = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + if (header.FolderCount > MSCABCabinet.MaximumFolderCount) + return null; + + header.FileCount = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + if (header.FileCount > MSCABCabinet.MaximumFileCount) + return null; + + header.Flags = (HeaderFlags)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + header.SetID = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + header.CabinetIndex = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + + if (header.Flags.HasFlag(HeaderFlags.RESERVE_PRESENT)) + { + header.HeaderReservedSize = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + if (header.HeaderReservedSize > 60_000) + return null; + + header.FolderReservedSize = data[dataPtr++]; + header.DataReservedSize = data[dataPtr++]; + + if (header.HeaderReservedSize > 0) + { + header.ReservedData = new byte[header.HeaderReservedSize]; + Array.Copy(data, dataPtr, header.ReservedData, 0, header.HeaderReservedSize); + dataPtr += header.HeaderReservedSize; + } + } + + // TODO: Make string-finding block a helper method + if (header.Flags.HasFlag(HeaderFlags.PREV_CABINET)) + { + byte[] cabPrev = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); + if (cabPrev == null) + return null; + + header.CabinetPrev = cabPrev; + + byte[] diskPrev = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); + if (diskPrev == null) + return null; + + header.DiskPrev = diskPrev; + } + + if (header.Flags.HasFlag(HeaderFlags.NEXT_CABINET)) + { + byte[] cabNext = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); + if (cabNext == null) + return null; + + header.CabinetNext = cabNext; + + byte[] diskNext = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); + if (diskNext == null) + return null; + + header.DiskNext = diskNext; + } + + return header; + } + + #endregion + + #region Public Functionality + + /// + /// Print all info about the cabinet file + /// + public void PrintInfo() + { + Console.WriteLine("CFHEADER INFORMATION:"); + Console.WriteLine("--------------------------------------------"); + Console.WriteLine($" Signature: {Encoding.ASCII.GetString(BitConverter.GetBytes(Signature))} (0x{Signature:X8})"); + Console.WriteLine($" Reserved1: {Reserved1} (0x{Reserved1:X8})"); + Console.WriteLine($" CabinetSize: {CabinetSize} (0x{CabinetSize:X8})"); + Console.WriteLine($" Reserved2: {Reserved2} (0x{Reserved2:X8})"); + Console.WriteLine($" FilesOffset: {FilesOffset} (0x{FilesOffset:X8})"); + Console.WriteLine($" Reserved3: {Reserved3} (0x{Reserved3:X8})"); + Console.WriteLine($" Version: {VersionMajor}.{VersionMinor}"); + Console.WriteLine($" FolderCount: {FolderCount} (0x{FolderCount:X4})"); + Console.WriteLine($" FileCount: {FileCount} (0x{FileCount:X4})"); + Console.WriteLine($" Flags: {Flags} (0x{(ushort)Flags:X4})"); + Console.WriteLine($" SetID: {SetID} (0x{SetID:X4})"); + Console.WriteLine($" CabinetIndex: {CabinetIndex} (0x{CabinetIndex:X4})"); + + if (Flags.HasFlag(HeaderFlags.RESERVE_PRESENT)) + { + Console.WriteLine($" HeaderReservedSize: {HeaderReservedSize} (0x{HeaderReservedSize:X4})"); + Console.WriteLine($" FolderReservedSize: {FolderReservedSize} (0x{FolderReservedSize:X2})"); + Console.WriteLine($" DataReservedSize: {DataReservedSize} (0x{DataReservedSize:X2})"); + // TODO: Output reserved data + } + + if (Flags.HasFlag(HeaderFlags.PREV_CABINET)) + { + Console.WriteLine($" CabinetPrev: {Encoding.ASCII.GetString(CabinetPrev).TrimEnd('\0')}"); + Console.WriteLine($" DiskPrev: {Encoding.ASCII.GetString(DiskPrev).TrimEnd('\0')}"); + } + + if (Flags.HasFlag(HeaderFlags.NEXT_CABINET)) + { + Console.WriteLine($" CabinetNext: {Encoding.ASCII.GetString(CabinetNext).TrimEnd('\0')}"); + Console.WriteLine($" DiskNext: {Encoding.ASCII.GetString(DiskNext).TrimEnd('\0')}"); + } + + Console.WriteLine(); + } + + #endregion + } + + [Flags] + internal enum HeaderFlags : ushort + { + /// + /// The flag is set if this cabinet file is not the first in a set of cabinet files. + /// When this bit is set, the szCabinetPrev and szDiskPrev fields are present in this CFHEADER + /// structure. The value is 0x0001. + /// + PREV_CABINET = 0x0001, + + /// + /// The flag is set if this cabinet file is not the last in a set of cabinet files. + /// When this bit is set, the szCabinetNext and szDiskNext fields are present in this CFHEADER + /// structure. The value is 0x0002. + /// + NEXT_CABINET = 0x0002, + + /// + /// The flag is set if if this cabinet file contains any reserved fields. When + /// this bit is set, the cbCFHeader, cbCFFolder, and cbCFData fields are present in this CFHEADER + /// structure. The value is 0x0004. + /// + RESERVE_PRESENT = 0x0004, + } + + /// + /// Each CFFOLDER structure contains information about one of the folders or partial folders stored in + /// this cabinet file, as shown in the following packet diagram.The first CFFOLDER structure entry + /// immediately follows the CFHEADER structure entry. The CFHEADER.cFolders field indicates how + /// many CFFOLDER structure entries are present. + /// + /// Folders can start in one cabinet, and continue on to one or more succeeding cabinets. When the + /// cabinet file creator detects that a folder has been continued into another cabinet, it will complete + /// that folder as soon as the current file has been completely compressed.Any additional files will be + /// placed in the next folder.Generally, this means that a folder would span at most two cabinets, but it + /// could span more than two cabinets if the file is large enough. + /// + /// CFFOLDER structure entries actually refer to folder fragments, not necessarily complete folders. A + /// CFFOLDER structure is the beginning of a folder if the iFolder field value in the first file that + /// references the folder does not indicate that the folder is continued from the previous cabinet file. + /// + /// The typeCompress field can vary from one folder to the next, unless the folder is continued from a + /// previous cabinet file. + /// + internal class CFFOLDER + { + #region Properties + + /// + /// Specifies the absolute file offset of the first CFDATA field block for the folder. + /// + public uint CabStartOffset { get; private set; } + + /// + /// Specifies the number of CFDATA structures for this folder that are actually in this cabinet. + /// A folder can continue into another cabinet and have more CFDATA structure blocks in that cabinet + /// file.A folder can start in a previous cabinet.This number represents only the CFDATA structures for + /// this folder that are at least partially recorded in this cabinet. + /// + public ushort DataCount { get; private set; } + + /// + /// Indicates the compression method used for all CFDATA structure entries in this + /// folder. + /// + public CompressionType CompressionType { get; private set; } + + /// + /// If the CFHEADER.flags.cfhdrRESERVE_PRESENT field is set + /// and the cbCFFolder field is non-zero, then this field contains per-folder application information. + /// This field is defined by the application, and is used for application-defined purposes. + /// + public byte[] ReservedData { get; private set; } + + /// + /// Data blocks associated with this folder + /// + public Dictionary DataBlocks { get; private set; } = new Dictionary(); + + #endregion + + #region Generated Properties + + /// + /// Get the uncompressed data associated with this folder, if possible + /// + public byte[] UncompressedData + { + get + { + if (DataBlocks == null || DataBlocks.Count == 0) + return null; + + // Store the last decompressed block for MS-ZIP + byte[] lastDecompressed = null; + + List data = new List(); + foreach (CFDATA dataBlock in DataBlocks.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Value)) + { + byte[] decompressed = null; + switch (CompressionType) + { + case CompressionType.TYPE_NONE: + decompressed = dataBlock.CompressedData; + break; + case CompressionType.TYPE_MSZIP: + decompressed = MSZIPBlock.Deserialize(dataBlock.CompressedData).DecompressBlock(dataBlock.UncompressedSize, lastDecompressed); + break; + case CompressionType.TYPE_QUANTUM: + // TODO: UNIMPLEMENTED + break; + case CompressionType.TYPE_LZX: + // TODO: UNIMPLEMENTED + break; + default: + return null; + } + + lastDecompressed = decompressed; + if (decompressed != null) + data.AddRange(decompressed); + } + + return data.ToArray(); + } + } + + #endregion + + #region Serialization + + /// + /// Deserialize at into a CFFOLDER object + /// + public static CFFOLDER Deserialize(byte[] data, ref int dataPtr, int basePtr, byte folderReservedSize, byte dataReservedSize) + { + if (data == null || dataPtr < 0) + return null; + + CFFOLDER folder = new CFFOLDER(); + + folder.CabStartOffset = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + folder.DataCount = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + folder.CompressionType = (CompressionType)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + + if (folderReservedSize > 0) + { + folder.ReservedData = new byte[folderReservedSize]; + Array.Copy(data, dataPtr, folder.ReservedData, 0, folderReservedSize); + dataPtr += folderReservedSize; + } + + if (folder.CabStartOffset > 0) + { + int blockPtr = basePtr + (int)folder.CabStartOffset; + for (int i = 0; i < folder.DataCount; i++) + { + int offset = blockPtr; + CFDATA dataBlock = CFDATA.Deserialize(data, ref blockPtr, dataReservedSize); + folder.DataBlocks[offset] = dataBlock; + } + } + + return folder; + } + + #endregion + + #region Public Functionality + + /// + /// Print all info about the cabinet file + /// + public void PrintInfo() + { + Console.WriteLine($" CabStartOffset: {CabStartOffset} (0x{CabStartOffset:X8})"); + Console.WriteLine($" DataCount: {DataCount} (0x{DataCount:X4})"); + Console.WriteLine($" CompressionType: {CompressionType} (0x{(ushort)CompressionType:X4})"); + // TODO: Output reserved data + + Console.WriteLine(); + } + + #endregion + } + + internal enum CompressionType : ushort + { + /// + /// Mask for compression type. + /// + MASK_TYPE = 0x000F, + + /// + /// No compression. + /// + TYPE_NONE = 0x0000, + + /// + /// MSZIP compression. + /// + TYPE_MSZIP = 0x0001, + + /// + /// Quantum compression. + /// + TYPE_QUANTUM = 0x0002, + + /// + /// LZX compression. + /// + TYPE_LZX = 0x0003, + } + + /// + /// Each CFFILE structure contains information about one of the files stored (or at least partially + /// stored) in this cabinet, as shown in the following packet diagram.The first CFFILE structure entry in + /// each cabinet is found at the absolute offset CFHEADER.coffFiles field. CFHEADER.cFiles field + /// indicates how many of these entries are in the cabinet. The CFFILE structure entries in a cabinet + /// are ordered by iFolder field value, and then by the uoffFolderStart field value.Entries for files + /// continued from the previous cabinet will be first, and entries for files continued to the next cabinet + /// will be last. + /// + internal class CFFILE + { + #region Properties + + /// + /// Specifies the uncompressed size of this file, in bytes. + /// + public uint FileSize { get; private set; } + + /// + /// Specifies the uncompressed offset, in bytes, of the start of this file's data. For the + /// first file in each folder, this value will usually be zero. Subsequent files in the folder will have offsets + /// that are typically the running sum of the cbFile field values. + /// + public uint FolderStartOffset { get; private set; } + + /// + /// Index of the folder that contains this file's data. + /// + public FolderIndex FolderIndex { get; private set; } + + /// + /// Date of this file, in the format ((year–1980) << 9)+(month << 5)+(day), where + /// month={1..12} and day = { 1..31 }. This "date" is typically considered the "last modified" date in local + /// time, but the actual definition is application-defined. + /// + public ushort Date { get; private set; } + + /// + /// Time of this file, in the format (hour << 11)+(minute << 5)+(seconds/2), where + /// hour={0..23}. This "time" is typically considered the "last modified" time in local time, but the + /// actual definition is application-defined. + /// + public ushort Time { get; private set; } + + /// + /// Attributes of this file; can be used in any combination. + /// + public FileAttributes Attributes { get; private set; } + + /// + /// The null-terminated name of this file. Note that this string can include path + /// separator characters.The string can contain up to 256 bytes, plus the null byte. When the + /// _A_NAME_IS_UTF attribute is set, this string can be converted directly to Unicode, avoiding + /// locale-specific dependencies. When the _A_NAME_IS_UTF attribute is not set, this string is subject + /// to interpretation depending on locale. When a string that contains Unicode characters larger than + /// 0x007F is encoded in the szName field, the _A_NAME_IS_UTF attribute SHOULD be included in + /// the file's attributes. When no characters larger than 0x007F are in the name, the + /// _A_NAME_IS_UTF attribute SHOULD NOT be set. If byte values larger than 0x7F are found in + /// CFFILE.szName field, but the _A_NAME_IS_UTF attribute is not set, the characters SHOULD be + /// interpreted according to the current location. + /// + public byte[] Name { get; private set; } + + #endregion + + #region Generated Properties + + /// + /// Name value as a string (not null-terminated) + /// + public string NameAsString + { + get + { + // Perform sanity checks + if (Name == null || Name.Length == 0) + return null; + + // Attempt to respect the attribute flag for UTF-8 + if (Attributes.HasFlag(FileAttributes.NAME_IS_UTF)) + { + try + { + return Encoding.UTF8.GetString(Name).TrimEnd('\0'); + } + catch { } + } + + // Default case uses local encoding + return Encoding.Default.GetString(Name).TrimEnd('\0'); + } + } + + /// + /// Convert the internal values into a DateTime object, if possible + /// + public DateTime DateAndTimeAsDateTime + { + get + { + // Date property + int year = (Date >> 9) + 1980; + int month = (Date >> 5) & 0x0F; + int day = Date & 0x1F; + + // Time property + int hour = Time >> 11; + int minute = (Time >> 5) & 0x3F; + int second = (Time << 1) & 0x3E; + + return new DateTime(year, month, day, hour, minute, second); + } + set + { + Date = (ushort)(((value.Year - 1980) << 9) + (value.Month << 5) + (value.Day)); + Time = (ushort)((value.Hour << 11) + (value.Minute << 5) + (value.Second / 2)); + } + } + + #endregion + + #region Serialization + + /// + /// Deserialize at into a CFFILE object + /// + public static CFFILE Deserialize(byte[] data, ref int dataPtr) + { + if (data == null || dataPtr < 0) + return null; + + CFFILE file = new CFFILE(); + + file.FileSize = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + file.FolderStartOffset = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + file.FolderIndex = (FolderIndex)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + file.Date = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + file.Time = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + file.Attributes = (FileAttributes)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + + byte[] name = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); + if (name == null) + return null; + + file.Name = name; + + return file; + } + + #endregion + + #region Public Functionality + + /// + /// Print all info about the cabinet file + /// + public void PrintInfo() + { + Console.WriteLine($" FileSize: {FileSize} (0x{FileSize:X8})"); + Console.WriteLine($" FolderStartOffset: {FolderStartOffset} (0x{FolderStartOffset:X4})"); + Console.WriteLine($" FolderIndex: {FolderIndex} (0x{(ushort)FolderIndex:X4})"); + Console.WriteLine($" DateTime: {DateAndTimeAsDateTime} (0x{Date:X4} 0x{Time:X4})"); + Console.WriteLine($" Attributes: {Attributes} (0x{(ushort)Attributes:X4})"); + Console.WriteLine($" Name: {NameAsString}"); + + Console.WriteLine(); + } + + #endregion + } + + internal enum FolderIndex : ushort + { + /// + /// A value of zero indicates that this is the + /// first folder in this cabinet file. + /// + FIRST_FOLDER = 0x0000, + + /// + /// Indicates that the folder index is actually zero, but that + /// extraction of this file would have to begin with the cabinet named in the + /// CFHEADER.szCabinetPrev field. + /// + CONTINUED_FROM_PREV = 0xFFFD, + + /// + /// Indicates that the folder index + /// is actually one less than THE CFHEADER.cFolders field value, and that extraction of this file will + /// require continuation to the cabinet named in the CFHEADER.szCabinetNext field. + /// + CONTINUED_TO_NEXT = 0xFFFE, + + /// + /// + CONTINUED_PREV_AND_NEXT = 0xFFFF, + } + + [Flags] + internal enum FileAttributes : ushort + { + /// + /// File is read-only. + /// + RDONLY = 0x0001, + + /// + /// File is hidden. + /// + HIDDEN = 0x0002, + + /// + /// File is a system file. + /// + SYSTEM = 0x0004, + + /// + /// File has been modified since last backup. + /// + ARCH = 0x0040, + + /// + /// File will be run after extraction. + /// + EXEC = 0x0080, + + /// + /// The szName field contains UTF. + /// + NAME_IS_UTF = 0x0100, + } + + /// + /// Each CFDATA structure describes some amount of compressed data, as shown in the following + /// packet diagram. The first CFDATA structure entry for each folder is located by using the + /// field. Subsequent CFDATA structure records for this folder are + /// contiguous. + /// + internal class CFDATA + { + #region Properties + + /// + /// Checksum of this CFDATA structure, from the through the + /// fields. It can be set to 0 (zero) if the checksum is not supplied. + /// + public uint Checksum { get; private set; } + + /// + /// Number of bytes of compressed data in this CFDATA structure record. When the + /// field is zero, this field indicates only the number of bytes that fit into this cabinet file. + /// + public ushort CompressedSize { get; private set; } + + /// + /// The uncompressed size of the data in this CFDATA structure entry in bytes. When this + /// CFDATA structure entry is continued in the next cabinet file, the field will be zero, and + /// the field in the first CFDATA structure entry in the next cabinet file will report the total + /// uncompressed size of the data from both CFDATA structure blocks. + /// + public ushort UncompressedSize { get; private set; } + + /// + /// If the flag is set + /// and the field value is non-zero, this field contains per-datablock application information. + /// This field is defined by the application, and it is used for application-defined purposes. + /// + public byte[] ReservedData { get; private set; } + + /// + /// The compressed data bytes, compressed by using the + /// method. When the field value is zero, these data bytes MUST be combined with the data + /// bytes from the next cabinet's first CFDATA structure entry before decompression. When the + /// field indicates that the data is not compressed, this field contains the + /// uncompressed data bytes. In this case, the and field values will be equal unless + /// this CFDATA structure entry crosses a cabinet file boundary. + /// + public byte[] CompressedData { get; private set; } + + #endregion + + #region Serialization + + /// + /// Deserialize at into a CFDATA object + /// + public static CFDATA Deserialize(byte[] data, ref int dataPtr, byte dataReservedSize = 0) + { + if (data == null || dataPtr < 0) + return null; + + CFDATA dataBlock = new CFDATA(); + + dataBlock.Checksum = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; + dataBlock.CompressedSize = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + dataBlock.UncompressedSize = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + + if (dataBlock.UncompressedSize != 0 && dataBlock.CompressedSize > dataBlock.UncompressedSize) + return null; + + if (dataReservedSize > 0) + { + dataBlock.ReservedData = new byte[dataReservedSize]; + Array.Copy(data, dataPtr, dataBlock.ReservedData, 0, dataReservedSize); + dataPtr += dataReservedSize; + } + + if (dataBlock.CompressedSize > 0) + { + dataBlock.CompressedData = new byte[dataBlock.CompressedSize]; + Array.Copy(data, dataPtr, dataBlock.CompressedData, 0, dataBlock.CompressedSize); + dataPtr += dataBlock.CompressedSize; + } + + return dataBlock; + } + + #endregion + } + + /// + /// The computation and verification of checksums found in CFDATA structure entries cabinet files is + /// done by using a function described by the following mathematical notation. When checksums are + /// not supplied by the cabinet file creating application, the checksum field is set to 0 (zero). Cabinet + /// extracting applications do not compute or verify the checksum if the field is set to 0 (zero). + /// + internal static class Checksum + { + // TODO: Implement from `[MS-CAB].pdf` + //public static uint ChecksumData(byte[] data) + //{ + + //} + } +} diff --git a/BurnOutSharp/FileType/MicrosoftCAB.MSZIP.cs b/BurnOutSharp/FileType/MicrosoftCAB.MSZIP.cs new file mode 100644 index 00000000..787c05fc --- /dev/null +++ b/BurnOutSharp/FileType/MicrosoftCAB.MSZIP.cs @@ -0,0 +1,143 @@ +using System; +using System.Linq; +using ComponentAce.Compression.Libs.zlib; + +namespace BurnOutSharp.FileType +{ + /// + /// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The + /// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be + /// the first 2 bytes in the MSZIP block.The MSZIP signature is shown in the following packet diagram. + /// + internal class MSZIPBlock + { + #region Constants + + /// + /// Human-readable signature + /// + public static readonly string SignatureString = "CK"; + + /// + /// Signature as an unsigned Int16 value + /// + public const ushort SignatureValue = 0x4B43; + + /// + /// Signature as a byte array + /// + public static readonly byte[] SignatureBytes = new byte[] { 0x43, 0x4B }; + + #endregion + + #region Properties + + /// + /// 'CB' + /// + public ushort Signature { get; private set; } + + /// + /// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951]. + /// The compressor that performs the compression operation MUST generate one or more RFC 1951 + /// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each + /// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in + /// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by[RFC1951] + /// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer + /// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data. + /// + /// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes.This enables the MSZIP + /// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which + /// has a value of BTYPE = 00. + /// + public byte[] Data { get; private set; } + + #endregion + + #region Static Properties + + public static ZStream DecompressionStream { get; set; } = new ZStream(); + + #endregion + + #region Serialization + + public static MSZIPBlock Deserialize(byte[] data) + { + if (data == null) + return null; + + MSZIPBlock block = new MSZIPBlock(); + int dataPtr = 0; + + block.Signature = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; + if (block.Signature != SignatureValue) + return null; + + block.Data = new byte[data.Length - 2]; + Array.Copy(data, dataPtr, block.Data, 0, data.Length - 2); + dataPtr += data.Length - 2; + + return block; + } + + #endregion + + #region Public Functionality + + /// + /// Decompress a single block of MS-ZIP data + /// + public byte[] DecompressBlock(int decompressedSize, byte[] previousBytes = null) + { + if (Data == null || Data.Length == 0) + return null; + + try + { + // The first block can use DeflateStream since it has no history + if (previousBytes == null) + { + // Setup the input + DecompressionStream = new ZStream(); + int initErr = DecompressionStream.inflateInit(); + if (initErr != zlibConst.Z_OK) + return null; + } + + // All n+1 blocks require the previous uncompressed data as a dictionary + else + { + // TODO: We need to force a dictionary setting - at this point, mode is 8 not 6 + + // Setup the dictionary + int dictErr = DecompressionStream.inflateSetDictionary(previousBytes, previousBytes.Length); + if (dictErr != zlibConst.Z_OK) + return null; + } + + // Setup the output + byte[] output = new byte[decompressedSize]; + DecompressionStream.next_out = output; + DecompressionStream.avail_out = decompressedSize; + + // Inflate the data -- 0x78, 0x9C is needed to trick zlib + DecompressionStream.next_in = new byte[] { 0x78, 0x9C }.Concat(Data).ToArray(); + DecompressionStream.next_in_index = 0; + DecompressionStream.avail_in = Data.Length + 2; + + int err = DecompressionStream.inflate(zlibConst.Z_FULL_FLUSH); + if (err != zlibConst.Z_OK) + return null; + + return output; + } + catch + { + return null; + } + } + + #endregion + } +} diff --git a/BurnOutSharp/FileType/MicrosoftCAB.Quantum.cs b/BurnOutSharp/FileType/MicrosoftCAB.Quantum.cs new file mode 100644 index 00000000..82f4368f --- /dev/null +++ b/BurnOutSharp/FileType/MicrosoftCAB.Quantum.cs @@ -0,0 +1,191 @@ +namespace BurnOutSharp.FileType +{ + #region TEMPORARY AREA FOR QUANTUM COMPRESSION FORMAT + + // See http://www.russotto.net/quantumcomp.html for details about implementation + + internal enum SelectorModel + { + /// + /// Literal model, 64 entries, start at symbol 0 + /// + SELECTOR_0 = 0, + + /// + /// Literal model, 64 entries, start at symbol 64 + /// + SELECTOR_1 = 1, + + /// + /// Literal model, 64 entries, start at symbol 128 + /// + SELECTOR_2 = 2, + + /// + /// Literal model, 64 entries, start at symbol 192 + /// + SELECTOR_3 = 3, + + /// + /// LZ model, 3 character matches, max 24 entries, start at symbol 0 + /// + SELECTOR_4 = 4, + + /// + /// LZ model, 4 character matches, max 36 entries, start at symbol 0 + /// + SELECTOR_5 = 5, + + /// + /// LZ model, 5+ character matches, max 42 entries, start at symbol 0 + /// + SELECTOR_6_POSITION = 6, + + /// + /// LZ model, 5+ character matches, max 27 entries, start at symbol 0 + /// + SELECTOR_6_LENGTH = 7, + } + + #region LZ Compression Tables + + internal static class QuantumConstants + { + internal static readonly uint[] PositionBaseTable = new uint[] + { + 0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c, + 0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0, + 0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00, + 0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000, + 0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000, + 0x100000, 0x180000, + }; + + internal static readonly int[] PositionExtraBitsTable = new int[] + { + 0, 0, 0, 0, 1, 1, 2, 2, + 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13, 14, 14, + 15, 15, 16, 16, 17, 17, 18, 18, + 19, 19, + }; + + internal static readonly byte[] LengthBaseTable = new byte[] + { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26, + 0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e, + 0xbe, 0xde, 0xfe + }; + + internal static readonly int[] LengthExtraBitsTable = new int[] + { + 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, + 3, 3, 4, 4, 4, 4, 5, 5, + 5, 5, 0, + }; + + /// + /// Number of position slots for (tsize - 10) + /// + internal static readonly int[] NumberOfPositionSlots = new int[] + { + 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, + }; + } + + #endregion + + internal static class QuantumCompressor + { + // TODO: Determine how these values are set + private static uint CS_C = 0; + private static uint CS_H = 0; + private static uint CS_L = 0; + + /// + /// Get frequency from code + /// + public static ushort GetFrequency(ushort totfreq) + { + uint range = ((CS_H - CS_L) & 0xFFFF) + 1; + uint freq = ((CS_C - CS_L + 1) * totfreq - 1) / range; + return (ushort)(freq & 0xFFFF); + } + + /// + /// The decoder renormalization loop + /// + public static int GetCode(int cumfreqm1, int cumfreq, int totfreq) + { + uint range = (CS_H - CS_L) + 1; + CS_H = CS_L + (uint)((cumfreqm1 * range) / totfreq) - 1; + CS_L = CS_L + (uint)((cumfreq * range) / totfreq); + + while (true) + { + if ((CS_L & 0x8000) != (CS_H & 0x8000)) + { + if ((CS_L & 0x4000) != 0 && (CS_H & 0x4000) == 0) + { + // Underflow case + CS_C ^= 0x4000; + CS_L &= 0x3FFF; + CS_H |= 0x4000; + } + else + { + break; + } + } + + CS_L <<= 1; + CS_H = (CS_H << 1) | 1; + CS_C = (CS_C << 1) | 0; // TODO: Figure out what `getbit()` is and replace the placeholder `0` + } + + // TODO: Figure out what is supposed to return here + return 0; + } + + public static int GetSymbol(Model model) + { + int freq = GetFrequency(model.Symbols[0].CumulativeFrequency); + + int i = 1; + for (; i < model.Entries; i++) + { + if (model.Symbols[i].CumulativeFrequency <= freq) + break; + } + + int sym = model.Symbols[i - 1].Symbol; + + GetCode(model.Symbols[i - 1].CumulativeFrequency, model.Symbols[i].CumulativeFrequency, model.Symbols[0].CumulativeFrequency); + + // TODO: Figure out what `update_model` does + //update_model(model, i); + + return sym; + } + } + + internal class ModelSymbol + { + public ushort Symbol { get; private set; } + + public ushort CumulativeFrequency { get; private set; } + } + + internal class Model + { + public int Entries { get; private set; } + + public ModelSymbol[] Symbols { get; private set; } + } + + #endregion + +} diff --git a/BurnOutSharp/FileType/MicrosoftCAB.cs b/BurnOutSharp/FileType/MicrosoftCAB.cs index 884d3110..a228912d 100644 --- a/BurnOutSharp/FileType/MicrosoftCAB.cs +++ b/BurnOutSharp/FileType/MicrosoftCAB.cs @@ -1,16 +1,14 @@ using System; using System.Collections.Concurrent; -using System.Collections.Generic; using System.IO; -using System.IO.Compression; -using System.Linq; -using System.Text; using BurnOutSharp.Interfaces; using BurnOutSharp.Tools; -using ComponentAce.Compression.Libs.zlib; #if NETSTANDARD2_0 using WixToolset.Dtf.Compression; using WixToolset.Dtf.Compression.Cab; +#elif NET6_0_OR_GREATER +using LibMSPackSharp; +using LibMSPackSharp.CABExtract; #endif namespace BurnOutSharp.FileType @@ -19,7 +17,7 @@ namespace BurnOutSharp.FileType /// Microsoft cabinet file /// /// Specification available at - public class MicrosoftCAB : IScannable + public partial class MicrosoftCAB : IScannable { /// public ConcurrentDictionary> Scan(Scanner scanner, string file) @@ -37,7 +35,85 @@ namespace BurnOutSharp.FileType public ConcurrentDictionary> Scan(Scanner scanner, Stream stream, string file) { #if NET6_0_OR_GREATER - // WixToolset does not work with .NET 6.0 and is bound to Windows + // If the cab file itself fails + try + { + string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); + Directory.CreateDirectory(tempPath); + + // Create the decompressor + var decompressor = Library.CreateCABDecompressor(null); + decompressor.Debug = scanner.IncludeDebug; + + // Open the cab file + var cabFile = decompressor.Open(file); + if (cabFile == null) + { + if (scanner.IncludeDebug) Console.WriteLine($"Error occurred opening of '{file}': {decompressor.Error}"); + return null; + } + + // If we have a previous CAB and it exists, don't try scanning + string directory = Path.GetDirectoryName(file); + if (!string.IsNullOrWhiteSpace(cabFile.PreviousCabinetName)) + { + if (File.Exists(Path.Combine(directory, cabFile.PreviousCabinetName))) + return null; + } + + // If there are additional next CABs, add those + string fileName = Path.GetFileName(file); + CABExtract.LoadSpanningCabinets(cabFile, fileName); + + // Loop through the found internal files + var sub = cabFile.Files; + while (sub != null) + { + // If an individual entry fails + try + { + // The trim here is for some very odd and stubborn files + string tempFile = Path.Combine(tempPath, sub.Filename.TrimEnd('\0', ' ', '.')); + Error error = decompressor.Extract(sub, tempFile); + if (error != Error.MSPACK_ERR_OK) + { + if (scanner.IncludeDebug) Console.WriteLine($"Error occurred during extraction of '{sub.Filename}': {error}"); + } + } + catch (Exception ex) + { + if (scanner.IncludeDebug) Console.WriteLine(ex); + } + + sub = sub.Next; + } + + // Destroy the decompressor + Library.DestroyCABDecompressor(decompressor); + + // Collect and format all found protections + var protections = scanner.GetProtections(tempPath); + + // If temp directory cleanup fails + try + { + Directory.Delete(tempPath, true); + } + catch (Exception ex) + { + if (scanner.IncludeDebug) Console.WriteLine(ex); + } + + // Remove temporary path references + Utilities.StripFromKeys(protections, tempPath); + + return protections; + } + catch (Exception ex) + { + if (scanner.IncludeDebug) Console.WriteLine(ex); + } + return null; #else // If the cab file itself fails @@ -75,1595 +151,5 @@ namespace BurnOutSharp.FileType return null; #endif } - -#region LibMSPackSharp - - // TODO: Add stream opening support - /// - //public ConcurrentDictionary> Scan(Scanner scanner, Stream stream, string file) - //{ - // // If the cab file itself fails - // try - // { - // string tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); - // Directory.CreateDirectory(tempPath); - - // // Create the decompressor - // var decompressor = Library.CreateCABDecompressor(null); - // decompressor.Debug = scanner.IncludeDebug; - - // // Open the cab file - // var cabFile = decompressor.Open(file); - // if (cabFile == null) - // { - // if (scanner.IncludeDebug) Console.WriteLine($"Error occurred opening of '{file}': {decompressor.Error}"); - // return null; - // } - - // // If we have a previous CAB and it exists, don't try scanning - // string directory = Path.GetDirectoryName(file); - // if (!string.IsNullOrWhiteSpace(cabFile.PreviousCabinetName)) - // { - // if (File.Exists(Path.Combine(directory, cabFile.PreviousCabinetName))) - // return null; - // } - - // // If there are additional next CABs, add those - // string fileName = Path.GetFileName(file); - // CABExtract.LoadSpanningCabinets(cabFile, fileName); - - // // Loop through the found internal files - // var sub = cabFile.Files; - // while (sub != null) - // { - // // If an individual entry fails - // try - // { - // // The trim here is for some very odd and stubborn files - // string tempFile = Path.Combine(tempPath, sub.Filename.TrimEnd('\0', ' ', '.')); - // Error error = decompressor.Extract(sub, tempFile); - // if (error != Error.MSPACK_ERR_OK) - // { - // if (scanner.IncludeDebug) Console.WriteLine($"Error occurred during extraction of '{sub.Filename}': {error}"); - // } - // } - // catch (Exception ex) - // { - // if (scanner.IncludeDebug) Console.WriteLine(ex); - // } - - // sub = sub.Next; - // } - - // // Destroy the decompressor - // Library.DestroyCABDecompressor(decompressor); - - // // Collect and format all found protections - // var protections = scanner.GetProtections(tempPath); - - // // If temp directory cleanup fails - // try - // { - // Directory.Delete(tempPath, true); - // } - // catch (Exception ex) - // { - // if (scanner.IncludeDebug) Console.WriteLine(ex); - // } - - // // Remove temporary path references - // Utilities.StripFromKeys(protections, tempPath); - - // return protections; - // } - // catch (Exception ex) - // { - // if (scanner.IncludeDebug) Console.WriteLine(ex); - // } - - // return null; - //} - -#endregion - -#region TEMPORARY AREA FOR MS-CAB FORMAT - - // TODO: Add multi-cabinet reading - internal class MSCABCabinet - { -#region Constants - - /// - /// A maximum uncompressed size of an input file to store in CAB - /// - public const uint MaximumUncompressedFileSize = 0x7FFF8000; - - /// - /// A maximum file COUNT - /// - public const ushort MaximumFileCount = 0xFFFF; - - /// - /// A maximum size of a created CAB (compressed) - /// - public const uint MaximumCabSize = 0x7FFFFFFF; - - /// - /// A maximum CAB-folder COUNT - /// - public const ushort MaximumFolderCount = 0xFFFF; - - /// - /// A maximum uncompressed data size in a CAB-folder - /// - public const uint MaximumUncompressedFolderSize = 0x7FFF8000; - -#endregion - -#region Properties - - /// - /// Cabinet header - /// - public CFHEADER Header { get; private set; } - - /// - /// One or more CFFOLDER entries - /// - public CFFOLDER[] Folders { get; private set; } - - /// - /// A series of one or more cabinet file (CFFILE) entries - /// - public CFFILE[] Files { get; private set; } - -#endregion - -#region Serialization - - /// - /// Deserialize at into a MSCABCabinet object - /// - public static MSCABCabinet Deserialize(byte[] data, ref int dataPtr) - { - if (data == null || dataPtr < 0) - return null; - - int basePtr = dataPtr; - MSCABCabinet cabinet = new MSCABCabinet(); - - // Start with the header - cabinet.Header = CFHEADER.Deserialize(data, ref dataPtr); - if (cabinet.Header == null) - return null; - - // Then retrieve all folder headers - cabinet.Folders = new CFFOLDER[cabinet.Header.FolderCount]; - for (int i = 0; i < cabinet.Header.FolderCount; i++) - { - cabinet.Folders[i] = CFFOLDER.Deserialize(data, ref dataPtr, basePtr, cabinet.Header.FolderReservedSize, cabinet.Header.DataReservedSize); - if (cabinet.Folders[i] == null) - return null; - } - - // We need to move to where the file headers are stored - dataPtr = basePtr + (int)cabinet.Header.FilesOffset; - - // Then retrieve all file headers - cabinet.Files = new CFFILE[cabinet.Header.FileCount]; - for (int i = 0; i < cabinet.Header.FileCount; i++) - { - cabinet.Files[i] = CFFILE.Deserialize(data, ref dataPtr); - if (cabinet.Files[i] == null) - return null; - } - - return cabinet; - } - -#endregion - -#region Public Functionality - - /// - /// Find the start of an MS-CAB cabinet in a set of data, if possible - /// - public int FindCabinet(byte[] data) - { - if (data == null || data.Length < CFHEADER.SignatureBytes.Length) - return -1; - - bool found = data.FirstPosition(CFHEADER.SignatureBytes, out int index); - return found ? index : -1; - } - - /// - /// Extract all files from the archive to - /// - public bool ExtractAllFiles(string outputDirectory) - { - // Perform sanity checks - if (Header == null || Files == null || Files.Length == 0) - return false; - - // Loop through and extract all files - foreach (CFFILE file in Files) - { - // Create the output path - string outputPath = Path.Combine(outputDirectory, file.NameAsString); - - // Get the associated folder, if possible - CFFOLDER folder = null; - if (file.FolderIndex != FolderIndex.CONTINUED_FROM_PREV && file.FolderIndex != FolderIndex.CONTINUED_TO_NEXT && file.FolderIndex != FolderIndex.CONTINUED_PREV_AND_NEXT) - folder = Folders[(int)file.FolderIndex]; - - // If we don't have a folder, we can't continue - if (folder == null) - return false; - - // TODO: We don't keep the stream open or accessible here to seek - // TODO: We don't check for other cabinets here yet - // TODO: Read and decompress data blocks - } - - return true; - } - - /// - /// Extract a single file from the archive to - /// - public bool ExtractFile(string filePath, string outputDirectory, bool exact = false) - { - // Perform sanity checks - if (Header == null || Files == null || Files.Length == 0) - return false; - - // Check the file exists - int fileIndex = -1; - for (int i = 0; i < Files.Length; i++) - { - CFFILE tempFile = Files[i]; - if (tempFile == null) - continue; - - // Check for a match - if (exact ? tempFile.NameAsString == filePath : tempFile.NameAsString.EndsWith(filePath, StringComparison.OrdinalIgnoreCase)) - { - fileIndex = i; - break; - } - } - - // -1 is an invalid file index - if (fileIndex == -1) - return false; - - // Get the file to extract - CFFILE file = Files[fileIndex]; - - // Create the output path - string outputPath = Path.Combine(outputDirectory, file.NameAsString); - - // Get the associated folder, if possible - CFFOLDER folder = null; - if (file.FolderIndex != FolderIndex.CONTINUED_FROM_PREV && file.FolderIndex != FolderIndex.CONTINUED_TO_NEXT && file.FolderIndex != FolderIndex.CONTINUED_PREV_AND_NEXT) - folder = Folders[(int)file.FolderIndex]; - - // If we don't have a folder, we can't continue - if (folder == null) - return false; - - // TODO: We don't keep the stream open or accessible here to seek - // TODO: We don't check for other cabinets here yet - // TODO: Read and decompress data blocks - - return true; - } - - /// - /// Print all info about the cabinet file - /// - public void PrintInfo() - { -#region CFHEADER - - if (Header == null) - { - Console.WriteLine("There is no header associated with this cabinet."); - return; - } - - Header.PrintInfo(); - -#endregion - -#region CFFOLDER - - if (Folders == null || Folders.Length == 0) - { - Console.WriteLine("There are no folders associated with this cabinet."); - return; - } - - Console.WriteLine("CFFOLDER INFORMATION:"); - Console.WriteLine("--------------------------------------------"); - for (int i = 0; i < Folders.Length; i++) - { - CFFOLDER folder = Folders[i]; - Console.WriteLine($" CFFOLDER {i:X4}:"); - - if (folder == null) - { - Console.WriteLine($" Not found or null"); - Console.WriteLine(); - continue; - } - - folder.PrintInfo(); - } - - Console.WriteLine(); - -#endregion - -#region CFFILE - - if (Files == null || Files.Length == 0) - { - Console.WriteLine("There are no files associated with this cabinet."); - return; - } - - Console.WriteLine("CFFILE INFORMATION:"); - Console.WriteLine("--------------------------------------------"); - for (int i = 0; i < Files.Length; i++) - { - CFFILE file = Files[i]; - Console.WriteLine($" CFFILE {i:X4}:"); - - if (file == null) - { - Console.WriteLine($" Not found or null"); - Console.WriteLine(); - continue; - } - - file.PrintInfo(); - } - - Console.WriteLine(); - -#endregion - } - -#endregion - -#region Internal Functionality - - /// - /// Get a null-terminated string as a byte array from input data - /// - internal static byte[] GetNullTerminatedString(byte[] data, ref int dataPtr) - { - int nullIndex = Array.IndexOf(data, 0x00, dataPtr, 0xFF); - int stringSize = nullIndex - dataPtr; - if (stringSize < 0 || stringSize > 256) - return null; - - byte[] str = new byte[stringSize]; - Array.Copy(data, dataPtr, str, 0, stringSize); - dataPtr += stringSize + 1; - return str; - } - -#endregion - } - - /// - /// The CFHEADER structure shown in the following packet diagram provides information about this - /// cabinet (.cab) file. - /// - internal class CFHEADER - { -#region Constants - - /// - /// Human-readable signature - /// - public static readonly string SignatureString = "MSCF"; - - /// - /// Signature as an unsigned Int32 value - /// - public const uint SignatureValue = 0x4643534D; - - /// - /// Signature as a byte array - /// - public static readonly byte[] SignatureBytes = new byte[] { 0x4D, 0x53, 0x43, 0x46 }; - -#endregion - -#region Properties - - /// - /// Contains the characters "M", "S", "C", and "F" (bytes 0x4D, 0x53, 0x43, - /// 0x46). This field is used to ensure that the file is a cabinet(.cab) file. - /// - public uint Signature { get; private set; } - - /// - /// Reserved field; MUST be set to 0 (zero). - /// - public uint Reserved1 { get; private set; } - - /// - /// Specifies the total size of the cabinet file, in bytes. - /// - public uint CabinetSize { get; private set; } - - /// - /// Reserved field; MUST be set to 0 (zero). - /// - public uint Reserved2 { get; private set; } - - /// - /// Specifies the absolute file offset, in bytes, of the first CFFILE field entry. - /// - public uint FilesOffset { get; private set; } - - /// - /// Reserved field; MUST be set to 0 (zero). - /// - public uint Reserved3 { get; private set; } - - /// - /// Specifies the minor cabinet file format version. This value MUST be set to 3 (three). - /// - public byte VersionMinor { get; private set; } - - /// - /// Specifies the major cabinet file format version. This value MUST be set to 1 (one). - /// - public byte VersionMajor { get; private set; } - - /// - /// Specifies the number of CFFOLDER field entries in this cabinet file. - /// - public ushort FolderCount { get; private set; } - - /// - /// Specifies the number of CFFILE field entries in this cabinet file. - /// - public ushort FileCount { get; private set; } - - /// - /// Specifies bit-mapped values that indicate the presence of optional data. - /// - public HeaderFlags Flags { get; private set; } - - /// - /// Specifies an arbitrarily derived (random) value that binds a collection of linked cabinet files - /// together.All cabinet files in a set will contain the same setID field value.This field is used by - /// cabinet file extractors to ensure that cabinet files are not inadvertently mixed.This value has no - /// meaning in a cabinet file that is not in a set. - /// - public ushort SetID { get; private set; } - - /// - /// Specifies the sequential number of this cabinet in a multicabinet set. The first cabinet has - /// iCabinet=0. This field, along with the setID field, is used by cabinet file extractors to ensure that - /// this cabinet is the correct continuation cabinet when spanning cabinet files. - /// - public ushort CabinetIndex { get; private set; } - - /// - /// If the flags.cfhdrRESERVE_PRESENT field is not set, this field is not - /// present, and the value of cbCFHeader field MUST be zero.Indicates the size, in bytes, of the - /// abReserve field in this CFHEADER structure.Values for cbCFHeader field MUST be between 0- - /// 60,000. - /// - public ushort HeaderReservedSize { get; private set; } - - /// - /// If the flags.cfhdrRESERVE_PRESENT field is not set, this field is not - /// present, and the value of cbCFFolder field MUST be zero.Indicates the size, in bytes, of the - /// abReserve field in each CFFOLDER field entry.Values for fhe cbCFFolder field MUST be between - /// 0-255. - /// - public byte FolderReservedSize { get; private set; } - - /// - /// If the flags.cfhdrRESERVE_PRESENT field is not set, this field is not - /// present, and the value for the cbCFDATA field MUST be zero.The cbCFDATA field indicates the - /// size, in bytes, of the abReserve field in each CFDATA field entry. Values for the cbCFDATA field - /// MUST be between 0 - 255. - /// - public byte DataReservedSize { get; private set; } - - /// - /// If the flags.cfhdrRESERVE_PRESENT field is set and the - /// cbCFHeader field is non-zero, this field contains per-cabinet-file application information. This field - /// is defined by the application, and is used for application-defined purposes. - /// - public byte[] ReservedData { get; private set; } - - /// - /// If the flags.cfhdrPREV_CABINET field is not set, this - /// field is not present.This is a null-terminated ASCII string that contains the file name of the - /// logically previous cabinet file. The string can contain up to 255 bytes, plus the null byte. Note that - /// this gives the name of the most recently preceding cabinet file that contains the initial instance of a - /// file entry.This might not be the immediately previous cabinet file, when the most recent file spans - /// multiple cabinet files.If searching in reverse for a specific file entry, or trying to extract a file that is - /// reported to begin in the "previous cabinet," the szCabinetPrev field would indicate the name of the - /// cabinet to examine. - /// - public byte[] CabinetPrev { get; private set; } - - /// - /// If the flags.cfhdrPREV_CABINET field is not set, then this - /// field is not present.This is a null-terminated ASCII string that contains a descriptive name for the - /// media that contains the file named in the szCabinetPrev field, such as the text on the disk label. - /// This string can be used when prompting the user to insert a disk. The string can contain up to 255 - /// bytes, plus the null byte. - /// - public byte[] DiskPrev { get; private set; } - - /// - /// If the flags.cfhdrNEXT_CABINET field is not set, this - /// field is not present.This is a null-terminated ASCII string that contains the file name of the next - /// cabinet file in a set. The string can contain up to 255 bytes, plus the null byte. Files that extend - /// beyond the end of the current cabinet file are continued in the named cabinet file. - /// - public byte[] CabinetNext { get; private set; } - - /// - /// If the flags.cfhdrNEXT_CABINET field is not set, this field is - /// not present.This is a null-terminated ASCII string that contains a descriptive name for the media - /// that contains the file named in the szCabinetNext field, such as the text on the disk label. The - /// string can contain up to 255 bytes, plus the null byte. This string can be used when prompting the - /// user to insert a disk. - /// - public byte[] DiskNext { get; private set; } - -#endregion - -#region Serialization - - /// - /// Deserialize at into a CFHEADER object - /// - public static CFHEADER Deserialize(byte[] data, ref int dataPtr) - { - if (data == null || dataPtr < 0) - return null; - - CFHEADER header = new CFHEADER(); - - header.Signature = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - if (header.Signature != SignatureValue) - return null; - - header.Reserved1 = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - if (header.Reserved1 != 0x00000000) - return null; - - header.CabinetSize = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - if (header.CabinetSize > MSCABCabinet.MaximumCabSize) - return null; - - header.Reserved2 = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - if (header.Reserved2 != 0x00000000) - return null; - - header.FilesOffset = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - - header.Reserved3 = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - if (header.Reserved3 != 0x00000000) - return null; - - header.VersionMinor = data[dataPtr++]; - header.VersionMajor = data[dataPtr++]; - if (header.VersionMajor != 0x00000001 || header.VersionMinor != 0x00000003) - return null; - - header.FolderCount = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - if (header.FolderCount > MSCABCabinet.MaximumFolderCount) - return null; - - header.FileCount = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - if (header.FileCount > MSCABCabinet.MaximumFileCount) - return null; - - header.Flags = (HeaderFlags)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - header.SetID = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - header.CabinetIndex = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - - if (header.Flags.HasFlag(HeaderFlags.RESERVE_PRESENT)) - { - header.HeaderReservedSize = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - if (header.HeaderReservedSize > 60_000) - return null; - - header.FolderReservedSize = data[dataPtr++]; - header.DataReservedSize = data[dataPtr++]; - - if (header.HeaderReservedSize > 0) - { - header.ReservedData = new byte[header.HeaderReservedSize]; - Array.Copy(data, dataPtr, header.ReservedData, 0, header.HeaderReservedSize); - dataPtr += header.HeaderReservedSize; - } - } - - // TODO: Make string-finding block a helper method - if (header.Flags.HasFlag(HeaderFlags.PREV_CABINET)) - { - byte[] cabPrev = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); - if (cabPrev == null) - return null; - - header.CabinetPrev = cabPrev; - - byte[] diskPrev = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); - if (diskPrev == null) - return null; - - header.DiskPrev = diskPrev; - } - - if (header.Flags.HasFlag(HeaderFlags.NEXT_CABINET)) - { - byte[] cabNext = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); - if (cabNext == null) - return null; - - header.CabinetNext = cabNext; - - byte[] diskNext = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); - if (diskNext == null) - return null; - - header.DiskNext = diskNext; - } - - return header; - } - -#endregion - -#region Public Functionality - - /// - /// Print all info about the cabinet file - /// - public void PrintInfo() - { - Console.WriteLine("CFHEADER INFORMATION:"); - Console.WriteLine("--------------------------------------------"); - Console.WriteLine($" Signature: {Encoding.ASCII.GetString(BitConverter.GetBytes(Signature))} (0x{Signature:X8})"); - Console.WriteLine($" Reserved1: {Reserved1} (0x{Reserved1:X8})"); - Console.WriteLine($" CabinetSize: {CabinetSize} (0x{CabinetSize:X8})"); - Console.WriteLine($" Reserved2: {Reserved2} (0x{Reserved2:X8})"); - Console.WriteLine($" FilesOffset: {FilesOffset} (0x{FilesOffset:X8})"); - Console.WriteLine($" Reserved3: {Reserved3} (0x{Reserved3:X8})"); - Console.WriteLine($" Version: {VersionMajor}.{VersionMinor}"); - Console.WriteLine($" FolderCount: {FolderCount} (0x{FolderCount:X4})"); - Console.WriteLine($" FileCount: {FileCount} (0x{FileCount:X4})"); - Console.WriteLine($" Flags: {Flags} (0x{(ushort)Flags:X4})"); - Console.WriteLine($" SetID: {SetID} (0x{SetID:X4})"); - Console.WriteLine($" CabinetIndex: {CabinetIndex} (0x{CabinetIndex:X4})"); - - if (Flags.HasFlag(HeaderFlags.RESERVE_PRESENT)) - { - Console.WriteLine($" HeaderReservedSize: {HeaderReservedSize} (0x{HeaderReservedSize:X4})"); - Console.WriteLine($" FolderReservedSize: {FolderReservedSize} (0x{FolderReservedSize:X2})"); - Console.WriteLine($" DataReservedSize: {DataReservedSize} (0x{DataReservedSize:X2})"); - // TODO: Output reserved data - } - - if (Flags.HasFlag(HeaderFlags.PREV_CABINET)) - { - Console.WriteLine($" CabinetPrev: {Encoding.ASCII.GetString(CabinetPrev).TrimEnd('\0')}"); - Console.WriteLine($" DiskPrev: {Encoding.ASCII.GetString(DiskPrev).TrimEnd('\0')}"); - } - - if (Flags.HasFlag(HeaderFlags.NEXT_CABINET)) - { - Console.WriteLine($" CabinetNext: {Encoding.ASCII.GetString(CabinetNext).TrimEnd('\0')}"); - Console.WriteLine($" DiskNext: {Encoding.ASCII.GetString(DiskNext).TrimEnd('\0')}"); - } - - Console.WriteLine(); - } - -#endregion - } - - [Flags] - internal enum HeaderFlags : ushort - { - /// - /// The flag is set if this cabinet file is not the first in a set of cabinet files. - /// When this bit is set, the szCabinetPrev and szDiskPrev fields are present in this CFHEADER - /// structure. The value is 0x0001. - /// - PREV_CABINET = 0x0001, - - /// - /// The flag is set if this cabinet file is not the last in a set of cabinet files. - /// When this bit is set, the szCabinetNext and szDiskNext fields are present in this CFHEADER - /// structure. The value is 0x0002. - /// - NEXT_CABINET = 0x0002, - - /// - /// The flag is set if if this cabinet file contains any reserved fields. When - /// this bit is set, the cbCFHeader, cbCFFolder, and cbCFData fields are present in this CFHEADER - /// structure. The value is 0x0004. - /// - RESERVE_PRESENT = 0x0004, - } - - /// - /// Each CFFOLDER structure contains information about one of the folders or partial folders stored in - /// this cabinet file, as shown in the following packet diagram.The first CFFOLDER structure entry - /// immediately follows the CFHEADER structure entry. The CFHEADER.cFolders field indicates how - /// many CFFOLDER structure entries are present. - /// - /// Folders can start in one cabinet, and continue on to one or more succeeding cabinets. When the - /// cabinet file creator detects that a folder has been continued into another cabinet, it will complete - /// that folder as soon as the current file has been completely compressed.Any additional files will be - /// placed in the next folder.Generally, this means that a folder would span at most two cabinets, but it - /// could span more than two cabinets if the file is large enough. - /// - /// CFFOLDER structure entries actually refer to folder fragments, not necessarily complete folders. A - /// CFFOLDER structure is the beginning of a folder if the iFolder field value in the first file that - /// references the folder does not indicate that the folder is continued from the previous cabinet file. - /// - /// The typeCompress field can vary from one folder to the next, unless the folder is continued from a - /// previous cabinet file. - /// - internal class CFFOLDER - { -#region Properties - - /// - /// Specifies the absolute file offset of the first CFDATA field block for the folder. - /// - public uint CabStartOffset { get; private set; } - - /// - /// Specifies the number of CFDATA structures for this folder that are actually in this cabinet. - /// A folder can continue into another cabinet and have more CFDATA structure blocks in that cabinet - /// file.A folder can start in a previous cabinet.This number represents only the CFDATA structures for - /// this folder that are at least partially recorded in this cabinet. - /// - public ushort DataCount { get; private set; } - - /// - /// Indicates the compression method used for all CFDATA structure entries in this - /// folder. - /// - public CompressionType CompressionType { get; private set; } - - /// - /// If the CFHEADER.flags.cfhdrRESERVE_PRESENT field is set - /// and the cbCFFolder field is non-zero, then this field contains per-folder application information. - /// This field is defined by the application, and is used for application-defined purposes. - /// - public byte[] ReservedData { get; private set; } - - /// - /// Data blocks associated with this folder - /// - public Dictionary DataBlocks { get; private set; } = new Dictionary(); - -#endregion - -#region Generated Properties - - /// - /// Get the uncompressed data associated with this folder, if possible - /// - public byte[] UncompressedData - { - get - { - if (DataBlocks == null || DataBlocks.Count == 0) - return null; - - // Store the last decompressed block for MS-ZIP - byte[] lastDecompressed = null; - - List data = new List(); - foreach (CFDATA dataBlock in DataBlocks.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Value)) - { - byte[] decompressed = null; - switch (CompressionType) - { - case CompressionType.TYPE_NONE: - decompressed = dataBlock.CompressedData; - break; - case CompressionType.TYPE_MSZIP: - decompressed = MSZIPBlock.Deserialize(dataBlock.CompressedData).DecompressBlock(dataBlock.UncompressedSize, lastDecompressed); - break; - case CompressionType.TYPE_QUANTUM: - // TODO: UNIMPLEMENTED - break; - case CompressionType.TYPE_LZX: - // TODO: UNIMPLEMENTED - break; - default: - return null; - } - - lastDecompressed = decompressed; - if (decompressed != null) - data.AddRange(decompressed); - } - - return data.ToArray(); - } - } - -#endregion - -#region Serialization - - /// - /// Deserialize at into a CFFOLDER object - /// - public static CFFOLDER Deserialize(byte[] data, ref int dataPtr, int basePtr, byte folderReservedSize, byte dataReservedSize) - { - if (data == null || dataPtr < 0) - return null; - - CFFOLDER folder = new CFFOLDER(); - - folder.CabStartOffset = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - folder.DataCount = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - folder.CompressionType = (CompressionType)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - - if (folderReservedSize > 0) - { - folder.ReservedData = new byte[folderReservedSize]; - Array.Copy(data, dataPtr, folder.ReservedData, 0, folderReservedSize); - dataPtr += folderReservedSize; - } - - if (folder.CabStartOffset > 0) - { - int blockPtr = basePtr + (int)folder.CabStartOffset; - for (int i = 0; i < folder.DataCount; i++) - { - int offset = blockPtr; - CFDATA dataBlock = CFDATA.Deserialize(data, ref blockPtr, dataReservedSize); - folder.DataBlocks[offset] = dataBlock; - } - } - - return folder; - } - -#endregion - -#region Public Functionality - - /// - /// Print all info about the cabinet file - /// - public void PrintInfo() - { - Console.WriteLine($" CabStartOffset: {CabStartOffset} (0x{CabStartOffset:X8})"); - Console.WriteLine($" DataCount: {DataCount} (0x{DataCount:X4})"); - Console.WriteLine($" CompressionType: {CompressionType} (0x{(ushort)CompressionType:X4})"); - // TODO: Output reserved data - - Console.WriteLine(); - } - -#endregion - } - - internal enum CompressionType : ushort - { - /// - /// Mask for compression type. - /// - MASK_TYPE = 0x000F, - - /// - /// No compression. - /// - TYPE_NONE = 0x0000, - - /// - /// MSZIP compression. - /// - TYPE_MSZIP = 0x0001, - - /// - /// Quantum compression. - /// - TYPE_QUANTUM = 0x0002, - - /// - /// LZX compression. - /// - TYPE_LZX = 0x0003, - } - - /// - /// Each CFFILE structure contains information about one of the files stored (or at least partially - /// stored) in this cabinet, as shown in the following packet diagram.The first CFFILE structure entry in - /// each cabinet is found at the absolute offset CFHEADER.coffFiles field. CFHEADER.cFiles field - /// indicates how many of these entries are in the cabinet. The CFFILE structure entries in a cabinet - /// are ordered by iFolder field value, and then by the uoffFolderStart field value.Entries for files - /// continued from the previous cabinet will be first, and entries for files continued to the next cabinet - /// will be last. - /// - internal class CFFILE - { -#region Properties - - /// - /// Specifies the uncompressed size of this file, in bytes. - /// - public uint FileSize { get; private set; } - - /// - /// Specifies the uncompressed offset, in bytes, of the start of this file's data. For the - /// first file in each folder, this value will usually be zero. Subsequent files in the folder will have offsets - /// that are typically the running sum of the cbFile field values. - /// - public uint FolderStartOffset { get; private set; } - - /// - /// Index of the folder that contains this file's data. - /// - public FolderIndex FolderIndex { get; private set; } - - /// - /// Date of this file, in the format ((year–1980) << 9)+(month << 5)+(day), where - /// month={1..12} and day = { 1..31 }. This "date" is typically considered the "last modified" date in local - /// time, but the actual definition is application-defined. - /// - public ushort Date { get; private set; } - - /// - /// Time of this file, in the format (hour << 11)+(minute << 5)+(seconds/2), where - /// hour={0..23}. This "time" is typically considered the "last modified" time in local time, but the - /// actual definition is application-defined. - /// - public ushort Time { get; private set; } - - /// - /// Attributes of this file; can be used in any combination. - /// - public FileAttributes Attributes { get; private set; } - - /// - /// The null-terminated name of this file. Note that this string can include path - /// separator characters.The string can contain up to 256 bytes, plus the null byte. When the - /// _A_NAME_IS_UTF attribute is set, this string can be converted directly to Unicode, avoiding - /// locale-specific dependencies. When the _A_NAME_IS_UTF attribute is not set, this string is subject - /// to interpretation depending on locale. When a string that contains Unicode characters larger than - /// 0x007F is encoded in the szName field, the _A_NAME_IS_UTF attribute SHOULD be included in - /// the file's attributes. When no characters larger than 0x007F are in the name, the - /// _A_NAME_IS_UTF attribute SHOULD NOT be set. If byte values larger than 0x7F are found in - /// CFFILE.szName field, but the _A_NAME_IS_UTF attribute is not set, the characters SHOULD be - /// interpreted according to the current location. - /// - public byte[] Name { get; private set; } - -#endregion - -#region Generated Properties - - /// - /// Name value as a string (not null-terminated) - /// - public string NameAsString - { - get - { - // Perform sanity checks - if (Name == null || Name.Length == 0) - return null; - - // Attempt to respect the attribute flag for UTF-8 - if (Attributes.HasFlag(FileAttributes.NAME_IS_UTF)) - { - try - { - return Encoding.UTF8.GetString(Name).TrimEnd('\0'); - } - catch { } - } - - // Default case uses local encoding - return Encoding.Default.GetString(Name).TrimEnd('\0'); - } - } - - /// - /// Convert the internal values into a DateTime object, if possible - /// - public DateTime DateAndTimeAsDateTime - { - get - { - // Date property - int year = (Date >> 9) + 1980; - int month = (Date >> 5) & 0x0F; - int day = Date & 0x1F; - - // Time property - int hour = Time >> 11; - int minute = (Time >> 5) & 0x3F; - int second = (Time << 1) & 0x3E; - - return new DateTime(year, month, day, hour, minute, second); - } - set - { - Date = (ushort)(((value.Year - 1980) << 9) + (value.Month << 5) + (value.Day)); - Time = (ushort)((value.Hour << 11) + (value.Minute << 5) + (value.Second / 2)); - } - } - -#endregion - -#region Serialization - - /// - /// Deserialize at into a CFFILE object - /// - public static CFFILE Deserialize(byte[] data, ref int dataPtr) - { - if (data == null || dataPtr < 0) - return null; - - CFFILE file = new CFFILE(); - - file.FileSize = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - file.FolderStartOffset = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - file.FolderIndex = (FolderIndex)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - file.Date = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - file.Time = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - file.Attributes = (FileAttributes)BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - - byte[] name = MSCABCabinet.GetNullTerminatedString(data, ref dataPtr); - if (name == null) - return null; - - file.Name = name; - - return file; - } - -#endregion - -#region Public Functionality - - /// - /// Print all info about the cabinet file - /// - public void PrintInfo() - { - Console.WriteLine($" FileSize: {FileSize} (0x{FileSize:X8})"); - Console.WriteLine($" FolderStartOffset: {FolderStartOffset} (0x{FolderStartOffset:X4})"); - Console.WriteLine($" FolderIndex: {FolderIndex} (0x{(ushort)FolderIndex:X4})"); - Console.WriteLine($" DateTime: {DateAndTimeAsDateTime} (0x{Date:X4} 0x{Time:X4})"); - Console.WriteLine($" Attributes: {Attributes} (0x{(ushort)Attributes:X4})"); - Console.WriteLine($" Name: {NameAsString}"); - - Console.WriteLine(); - } - -#endregion - } - - internal enum FolderIndex : ushort - { - /// - /// A value of zero indicates that this is the - /// first folder in this cabinet file. - /// - FIRST_FOLDER = 0x0000, - - /// - /// Indicates that the folder index is actually zero, but that - /// extraction of this file would have to begin with the cabinet named in the - /// CFHEADER.szCabinetPrev field. - /// - CONTINUED_FROM_PREV = 0xFFFD, - - /// - /// Indicates that the folder index - /// is actually one less than THE CFHEADER.cFolders field value, and that extraction of this file will - /// require continuation to the cabinet named in the CFHEADER.szCabinetNext field. - /// - CONTINUED_TO_NEXT = 0xFFFE, - - /// - /// - CONTINUED_PREV_AND_NEXT = 0xFFFF, - } - - [Flags] - internal enum FileAttributes : ushort - { - /// - /// File is read-only. - /// - RDONLY = 0x0001, - - /// - /// File is hidden. - /// - HIDDEN = 0x0002, - - /// - /// File is a system file. - /// - SYSTEM = 0x0004, - - /// - /// File has been modified since last backup. - /// - ARCH = 0x0040, - - /// - /// File will be run after extraction. - /// - EXEC = 0x0080, - - /// - /// The szName field contains UTF. - /// - NAME_IS_UTF = 0x0100, - } - - /// - /// Each CFDATA structure describes some amount of compressed data, as shown in the following - /// packet diagram. The first CFDATA structure entry for each folder is located by using the - /// field. Subsequent CFDATA structure records for this folder are - /// contiguous. - /// - internal class CFDATA - { -#region Properties - - /// - /// Checksum of this CFDATA structure, from the through the - /// fields. It can be set to 0 (zero) if the checksum is not supplied. - /// - public uint Checksum { get; private set; } - - /// - /// Number of bytes of compressed data in this CFDATA structure record. When the - /// field is zero, this field indicates only the number of bytes that fit into this cabinet file. - /// - public ushort CompressedSize { get; private set; } - - /// - /// The uncompressed size of the data in this CFDATA structure entry in bytes. When this - /// CFDATA structure entry is continued in the next cabinet file, the field will be zero, and - /// the field in the first CFDATA structure entry in the next cabinet file will report the total - /// uncompressed size of the data from both CFDATA structure blocks. - /// - public ushort UncompressedSize { get; private set; } - - /// - /// If the flag is set - /// and the field value is non-zero, this field contains per-datablock application information. - /// This field is defined by the application, and it is used for application-defined purposes. - /// - public byte[] ReservedData { get; private set; } - - /// - /// The compressed data bytes, compressed by using the - /// method. When the field value is zero, these data bytes MUST be combined with the data - /// bytes from the next cabinet's first CFDATA structure entry before decompression. When the - /// field indicates that the data is not compressed, this field contains the - /// uncompressed data bytes. In this case, the and field values will be equal unless - /// this CFDATA structure entry crosses a cabinet file boundary. - /// - public byte[] CompressedData { get; private set; } - -#endregion - -#region Serialization - - /// - /// Deserialize at into a CFDATA object - /// - public static CFDATA Deserialize(byte[] data, ref int dataPtr, byte dataReservedSize = 0) - { - if (data == null || dataPtr < 0) - return null; - - CFDATA dataBlock = new CFDATA(); - - dataBlock.Checksum = BitConverter.ToUInt32(data, dataPtr); dataPtr += 4; - dataBlock.CompressedSize = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - dataBlock.UncompressedSize = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - - if (dataBlock.UncompressedSize != 0 && dataBlock.CompressedSize > dataBlock.UncompressedSize) - return null; - - if (dataReservedSize > 0) - { - dataBlock.ReservedData = new byte[dataReservedSize]; - Array.Copy(data, dataPtr, dataBlock.ReservedData, 0, dataReservedSize); - dataPtr += dataReservedSize; - } - - if (dataBlock.CompressedSize > 0) - { - dataBlock.CompressedData = new byte[dataBlock.CompressedSize]; - Array.Copy(data, dataPtr, dataBlock.CompressedData, 0, dataBlock.CompressedSize); - dataPtr += dataBlock.CompressedSize; - } - - return dataBlock; - } - -#endregion - } - - /// - /// The computation and verification of checksums found in CFDATA structure entries cabinet files is - /// done by using a function described by the following mathematical notation. When checksums are - /// not supplied by the cabinet file creating application, the checksum field is set to 0 (zero). Cabinet - /// extracting applications do not compute or verify the checksum if the field is set to 0 (zero). - /// - internal static class Checksum - { - // TODO: Implement from `[MS-CAB].pdf` - //public static uint ChecksumData(byte[] data) - //{ - - //} - } - -#endregion - -#region TEMPORARY AREA FOR MS-ZIP COMPRESSION FORMAT - - /// - /// Each MSZIP block MUST consist of a 2-byte MSZIP signature and one or more RFC 1951 blocks. The - /// 2-byte MSZIP signature MUST consist of the bytes 0x43 and 0x4B. The MSZIP signature MUST be - /// the first 2 bytes in the MSZIP block.The MSZIP signature is shown in the following packet diagram. - /// - internal class MSZIPBlock - { -#region Constants - - /// - /// Human-readable signature - /// - public static readonly string SignatureString = "CK"; - - /// - /// Signature as an unsigned Int16 value - /// - public const ushort SignatureValue = 0x4B43; - - /// - /// Signature as a byte array - /// - public static readonly byte[] SignatureBytes = new byte[] { 0x43, 0x4B }; - -#endregion - -#region Properties - - /// - /// 'CB' - /// - public ushort Signature { get; private set; } - - /// - /// Each MSZIP block is the result of a single deflate compression operation, as defined in [RFC1951]. - /// The compressor that performs the compression operation MUST generate one or more RFC 1951 - /// blocks, as defined in [RFC1951]. The number, deflation mode, and type of RFC 1951 blocks in each - /// MSZIP block is determined by the compressor, as defined in [RFC1951]. The last RFC 1951 block in - /// each MSZIP block MUST be marked as the "end" of the stream(1), as defined by[RFC1951] - /// section 3.2.3. Decoding trees MUST be discarded after each RFC 1951 block, but the history buffer - /// MUST be maintained.Each MSZIP block MUST represent no more than 32 KB of uncompressed data. - /// - /// The maximum compressed size of each MSZIP block is 32 KB + 12 bytes.This enables the MSZIP - /// block to contain 32 KB of data split between two noncompressed RFC 1951 blocks, each of which - /// has a value of BTYPE = 00. - /// - public byte[] Data { get; private set; } - -#endregion - -#region Static Properties - - public static ZStream DecompressionStream { get; set; } = new ZStream(); - -#endregion - -#region Serialization - - public static MSZIPBlock Deserialize(byte[] data) - { - if (data == null) - return null; - - MSZIPBlock block = new MSZIPBlock(); - int dataPtr = 0; - - block.Signature = BitConverter.ToUInt16(data, dataPtr); dataPtr += 2; - if (block.Signature != SignatureValue) - return null; - - block.Data = new byte[data.Length - 2]; - Array.Copy(data, dataPtr, block.Data, 0, data.Length - 2); - dataPtr += data.Length - 2; - - return block; - } - -#endregion - -#region Public Functionality - - /// - /// Decompress a single block of MS-ZIP data - /// - public byte[] DecompressBlock(int decompressedSize, byte[] previousBytes = null) - { - if (Data == null || Data.Length == 0) - return null; - - try - { - // The first block can use DeflateStream since it has no history - if (previousBytes == null) - { - // Setup the input - DecompressionStream = new ZStream(); - int initErr = DecompressionStream.inflateInit(); - if (initErr != zlibConst.Z_OK) - return null; - } - - // All n+1 blocks require the previous uncompressed data as a dictionary - else - { - // TODO: We need to force a dictionary setting - at this point, mode is 8 not 6 - - // Setup the dictionary - int dictErr = DecompressionStream.inflateSetDictionary(previousBytes, previousBytes.Length); - if (dictErr != zlibConst.Z_OK) - return null; - } - - // Setup the output - byte[] output = new byte[decompressedSize]; - DecompressionStream.next_out = output; - DecompressionStream.avail_out = decompressedSize; - - // Inflate the data -- 0x78, 0x9C is needed to trick zlib - DecompressionStream.next_in = new byte[] { 0x78, 0x9C }.Concat(Data).ToArray(); - DecompressionStream.next_in_index = 0; - DecompressionStream.avail_in = Data.Length + 2; - - int err = DecompressionStream.inflate(zlibConst.Z_FULL_FLUSH); - if (err != zlibConst.Z_OK) - return null; - - return output; - } - catch - { - return null; - } - } - -#endregion - } - -#endregion - -#region TEMPORARY AREA FOR QUANTUM COMPRESSION FORMAT - - // See http://www.russotto.net/quantumcomp.html for details about implementation - - internal enum SelectorModel - { - /// - /// Literal model, 64 entries, start at symbol 0 - /// - SELECTOR_0 = 0, - - /// - /// Literal model, 64 entries, start at symbol 64 - /// - SELECTOR_1 = 1, - - /// - /// Literal model, 64 entries, start at symbol 128 - /// - SELECTOR_2 = 2, - - /// - /// Literal model, 64 entries, start at symbol 192 - /// - SELECTOR_3 = 3, - - /// - /// LZ model, 3 character matches, max 24 entries, start at symbol 0 - /// - SELECTOR_4 = 4, - - /// - /// LZ model, 4 character matches, max 36 entries, start at symbol 0 - /// - SELECTOR_5 = 5, - - /// - /// LZ model, 5+ character matches, max 42 entries, start at symbol 0 - /// - SELECTOR_6_POSITION = 6, - - /// - /// LZ model, 5+ character matches, max 27 entries, start at symbol 0 - /// - SELECTOR_6_LENGTH = 7, - } - -#region LZ Compression Tables - - internal static readonly uint[] PositionBaseTable = new uint[] - { - 0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00006, 0x00008, 0x0000c, - 0x00010, 0x00018, 0x00020, 0x00030, 0x00040, 0x00060, 0x00080, 0x000c0, - 0x00100, 0x00180, 0x00200, 0x00300, 0x00400, 0x00600, 0x00800, 0x00c00, - 0x01000, 0x01800, 0x02000, 0x03000, 0x04000, 0x06000, 0x08000, 0x0c000, - 0x10000, 0x18000, 0x20000, 0x30000, 0x40000, 0x60000, 0x80000, 0xc0000, - 0x100000, 0x180000, - }; - - internal static readonly int[] PositionExtraBitsTable = new int[] - { - 0, 0, 0, 0, 1, 1, 2, 2, - 3, 3, 4, 4, 5, 5, 6, 6, - 7, 7, 8, 8, 9, 9, 10, 10, - 11, 11, 12, 12, 13, 13, 14, 14, - 15, 15, 16, 16, 17, 17, 18, 18, - 19, 19, - }; - - internal static readonly byte[] LengthBaseTable = new byte[] - { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x0a, 0x0c, 0x0e, 0x12, 0x16, 0x1a, 0x1e, 0x26, - 0x2e, 0x36, 0x3e, 0x4e, 0x5e, 0x6e, 0x7e, 0x9e, - 0xbe, 0xde, 0xfe - }; - - internal static readonly int[] LengthExtraBitsTable = new int[] - { - 0, 0, 0, 0, 0, 0, 1, 1, - 1, 1, 2, 2, 2, 2, 3, 3, - 3, 3, 4, 4, 4, 4, 5, 5, - 5, 5, 0, - }; - -#endregion - - /// - /// Number of position slots for (tsize - 10) - /// - internal static readonly int[] NumberOfPositionSlots = new int[] - { - 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, - }; - - internal static class QuantumCompressor - { - // TODO: Determine how these values are set - private static uint CS_C = 0; - private static uint CS_H = 0; - private static uint CS_L = 0; - - /// - /// Get frequency from code - /// - public static ushort GetFrequency(ushort totfreq) - { - uint range = ((CS_H - CS_L) & 0xFFFF) + 1; - uint freq = ((CS_C - CS_L + 1) * totfreq - 1) / range; - return (ushort)(freq & 0xFFFF); - } - - /// - /// The decoder renormalization loop - /// - public static int GetCode(int cumfreqm1, int cumfreq, int totfreq) - { - uint range = (CS_H - CS_L) + 1; - CS_H = CS_L + (uint)((cumfreqm1 * range) / totfreq) - 1; - CS_L = CS_L + (uint)((cumfreq * range) / totfreq); - - while (true) - { - if ((CS_L & 0x8000) != (CS_H & 0x8000)) - { - if ((CS_L & 0x4000) != 0 && (CS_H & 0x4000) == 0) - { - // Underflow case - CS_C ^= 0x4000; - CS_L &= 0x3FFF; - CS_H |= 0x4000; - } - else - { - break; - } - } - - CS_L <<= 1; - CS_H = (CS_H << 1) | 1; - CS_C = (CS_C << 1) | 0; // TODO: Figure out what `getbit()` is and replace the placeholder `0` - } - - // TODO: Figure out what is supposed to return here - return 0; - } - - public static int GetSymbol(Model model) - { - int freq = GetFrequency(model.Symbols[0].CumulativeFrequency); - - int i = 1; - for (; i < model.Entries; i++) - { - if (model.Symbols[i].CumulativeFrequency <= freq) - break; - } - - int sym = model.Symbols[i - 1].Symbol; - - GetCode(model.Symbols[i - 1].CumulativeFrequency, model.Symbols[i].CumulativeFrequency, model.Symbols[0].CumulativeFrequency); - - // TODO: Figure out what `update_model` does - //update_model(model, i); - - return sym; - } - } - - internal class ModelSymbol - { - public ushort Symbol { get; private set; } - - public ushort CumulativeFrequency { get; private set; } - } - - internal class Model - { - public int Entries { get; private set; } - - public ModelSymbol[] Symbols { get; private set; } - } - -#endregion - -#region TEMPORARY AREA FOR LZX COMPRESSION FORMAT - - // See the following for details about implementation (there is no open spec): - // https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzx.h - // https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxc.c - // https://github.com/kyz/libmspack/blob/master/libmspack/mspack/lzxd.c - -#endregion } }