Simplify and fix MS-OLE

This commit is contained in:
Matt Nadareski
2022-06-18 21:16:11 -07:00
parent d46b0768a0
commit dc7f8da52f
3 changed files with 141 additions and 75 deletions

View File

@@ -109,6 +109,9 @@ namespace LibGSF
#region Properties
/// <summary>
/// MS-OLE header signature
/// </summary>
/// <remarks>0x00</remarks>
public byte[] SIGNATURE { get; set; }
@@ -136,9 +139,15 @@ namespace LibGSF
/// <remarks>0x1C</remarks>
public ushort BYTE_ORDER { get; set; }
/// <summary>
/// Number of bits to shift to get a big block size
/// </summary>
/// <remarks>0x1E</remarks>
public ushort BB_SHIFT { get; set; }
/// <summary>
/// Number of bits to shift to get a small block size
/// </summary>
/// <remarks>0x20</remarks>
public ushort SB_SHIFT { get; set; }
@@ -150,10 +159,16 @@ namespace LibGSF
/// <remarks>0x28</remarks>
public uint CSECTDIR { get; set; }
/// <summary>
/// Reported number of big block BATs in the file
/// </summary>
/// <remarks>0x2C</remarks>
public uint NUM_BAT { get; set; }
/// <summary>
/// Directory entry start offset
/// </summary>
/// <remarks>0x30</remarks>
public uint DIRENT_START { get; set; }
@@ -164,23 +179,38 @@ namespace LibGSF
public uint TRANSACTING_SIGNATURE { get; set; }
/// <summary>
/// Transition between small and big blocks
/// File size transition between small and big blocks
/// </summary>
/// <remarks>0x38</remarks>
public uint THRESHOLD { get; set; }
/// <summary>
/// Offset where small block BATs start
/// </summary>
/// <remarks>0x3C</remarks>
public uint SBAT_START { get; set; }
/// <summary>
/// Reported number of small block BATs in the file
/// </summary>
/// <remarks>0x40</remarks>
public uint NUM_SBAT { get; set; }
/// <summary>
/// Block ID of the first MetaBAT block
/// </summary>
/// <remarks>0x44</remarks>
public uint METABAT_BLOCK { get; set; }
/// <summary>
/// Number of MetaBAT blocks in the file
/// </summary>
/// <remarks>0x48</remarks>
public uint NUM_METABAT { get; set; }
/// <summary>
/// Block ID of the first BAT block
/// </summary>
/// <remarks>0x4C</remarks>
public uint START_BAT { get; set; }
@@ -193,10 +223,16 @@ namespace LibGSF
/// </summary>
public bool LITTLE_ENDIAN => BYTE_ORDER == 0xFFFE;
/// <summary>
/// Size of a big block, in bytes
/// </summary>
public int BB_SIZE => 1 << BB_SHIFT;
public int BB_FILTER => BB_SIZE << 1;
/// <summary>
/// Size of a small block, in bytes
/// </summary>
public int SB_SIZE => 1 << SB_SHIFT;
public int SB_FILTER => SB_SIZE << 1;
@@ -340,12 +376,24 @@ namespace LibGSF
{
#region Constants
/// <summary>
/// Maximum length of the entry name including the null terminator, in bytes
/// </summary>
public const int DIRENT_MAX_NAME_SIZE = 0x40;
/// <summary>
/// Size of the non-name parts of directory entry header, in bytes
/// </summary>
public const int DIRENT_DETAILS_SIZE = 0x40;
/// <summary>
/// Total size of the directory entry header
/// </summary>
public const int DIRENT_SIZE = (DIRENT_MAX_NAME_SIZE + DIRENT_DETAILS_SIZE);
/// <summary>
/// Magic number indicating the termination of a node
/// </summary>
public const uint DIRENT_MAGIC_END = 0xffffffff;
#region Offsets
@@ -384,27 +432,42 @@ namespace LibGSF
#region Properties
/// <summary>
/// Name of the entry as a byte array
/// </summary>
/// <remarks>0x00</remarks>
public byte[] NAME { get; set; }
/// <summary>
/// Length in bytes incl 0 terminator
/// Length of the entry name including the null terminator, in bytes
/// </summary>
/// <remarks>0x40</remarks>
public ushort NAME_LEN { get; set; }
/// <summary>
/// Indicates the type of directory entry
/// </summary>
/// <remarks>0x42</remarks>
public DIRENT_TYPE TYPE_FLAG { get; set; }
/// <remarks>0x43</remarks>
public byte COLOR { get; set; }
/// <summary>
/// Index of the previous item in the linked list, DIRENT_MAGIC_END if none
/// </summary>
/// <remarks>0x44</remarks>
public uint PREV { get; set; }
/// <summary>
/// Index of the next item in the linked list, DIRENT_MAGIC_END if none
/// </summary>
/// <remarks>0x48</remarks>
public uint NEXT { get; set; }
/// <summary>
/// Index of the first child directory entry, DIRENT_MAGIC_END if none
/// </summary>
/// <remarks>0x4C</remarks>
public uint CHILD { get; set; }
@@ -421,20 +484,26 @@ namespace LibGSF
public uint USERFLAGS { get; set; }
/// <summary>
/// For files
/// Entry creation time; For files
/// </summary>
/// <remarks>0x64</remarks>
public ulong CREATE_TIME { get; set; }
/// <summary>
/// For files
/// Entry modification time; For files
/// </summary>
/// <remarks>0x6C</remarks>
public ulong MODIFY_TIME { get; set; }
/// <summary>
/// Index of the first block that contains this directory entry
/// </summary>
/// <remarks>0x74</remarks>
public uint FIRSTBLOCK { get; set; }
/// <summary>
/// Size of the file contained within this directory entry
/// </summary>
/// <remarks>0x78</remarks>
public uint FILE_SIZE { get; set; }

View File

@@ -32,8 +32,7 @@ using static LibGSF.GsfUtils;
namespace LibGSF.Input
{
// TODO: Can this be made internal?
public class MSOleBAT
internal class MSOleBAT
{
#region Properties
@@ -65,9 +64,8 @@ namespace LibGSF.Input
/// <param name="block">The first block in the list.</param>
/// <param name="res">Where to store the result.</param>
/// <returns>True on error.</returns>
public static bool Create(MSOleBAT metabat, uint block, out MSOleBAT res)
public static bool Create(in MSOleBAT metabat, uint block, out MSOleBAT res)
{
// NOTE : Only use size as a suggestion, sometimes it is wrong
List<uint> bat = new List<uint>();
byte[] used = new byte[1 + metabat.NumBlocks / 8];
@@ -98,11 +96,15 @@ namespace LibGSF.Input
#endregion
}
// TODO: Can this be made internal?
public class MSOleDirent
internal class MSOleDirent
{
#region Properties
/// <summary>
/// Internal representation of the MS-OLE directory entry header
/// </summary>
public MSOleDirectoryEntry Header { get; set; }
public GsfMSOleSortingKey Key { get; set; }
public uint Index { get; set; }
@@ -111,11 +113,6 @@ namespace LibGSF.Input
public List<MSOleDirent> Children { get; set; } = new List<MSOleDirent>();
/// <summary>
/// Internal representation of the MS-OLE directory entry header
/// </summary>
internal MSOleDirectoryEntry Header { get; set; }
#endregion
#region Functions
@@ -136,15 +133,24 @@ namespace LibGSF.Input
#endregion
}
// TODO: Can this be made internal?
public class MSOleInfo
internal class MSOleInfo
{
#region Properties
/// <summary>
/// Internal representation of the MS-OLE header
/// </summary>
public MSOleHeader Header { get; set; }
public MSOleBAT BigBlockBat { get; set; }
public MSOleBAT SmallBlockBat { get; set; }
/// <summary>
/// MetaBAT for all BATs in the file
/// </summary>
public uint[] MetaBAT { get; set; }
/// <summary>
/// Maximum number of blocks derived from total input length and block size
/// </summary>
@@ -156,11 +162,6 @@ namespace LibGSF.Input
public int RefCount { get; set; }
/// <summary>
/// Internal representation of the MS-OLE header
/// </summary>
internal MSOleHeader Header { get; set; }
#endregion
#region Functions
@@ -170,6 +171,8 @@ namespace LibGSF.Input
if (RefCount-- != 1)
return;
Header = null;
if (RootDir != null)
{
RootDir.Free();
@@ -178,8 +181,6 @@ namespace LibGSF.Input
if (SmallBlockFile != null)
SmallBlockFile = null;
Header = null;
}
public MSOleInfo Ref()
@@ -195,18 +196,18 @@ namespace LibGSF.Input
{
#region Properties
public GsfInput Input { get; private set; } = null;
internal GsfInput Input { get; private set; } = null;
public MSOleInfo Info { get; private set; } = null;
internal MSOleInfo Info { get; private set; } = null;
public MSOleDirent DirectoryEntry { get; private set; }
internal MSOleDirent DirectoryEntry { get; private set; }
public MSOleBAT Bat { get; private set; }
internal MSOleBAT Bat { get; private set; }
public long CurBlock { get; private set; } = BAT_MAGIC_UNUSED;
internal long CurBlock { get; private set; } = BAT_MAGIC_UNUSED;
/// <remarks>Actually `{ byte[] Buf, long BufSize }`</remarks>
public byte[] Stream { get; private set; }
internal byte[] Stream { get; private set; }
#endregion
@@ -251,8 +252,7 @@ namespace LibGSF.Input
/// </summary>
~GsfInfileMSOle()
{
if (Input != null)
Input = null;
Input = null;
if (Info != null && Info.SmallBlockFile != this)
{
@@ -320,14 +320,11 @@ namespace LibGSF.Input
optional_buffer_ptr = 0;
}
int ptr = optional_buffer_ptr; // optional_buffer[0]
int ptr = optional_buffer_ptr; // optional_buffer[optional_buffer_ptr]
int count;
for (i = first_block; i <= last_block; i++, ptr += count, num_bytes -= count)
{
count = (int)(Info.Header.BB_SIZE - offset);
if (count > num_bytes)
count = num_bytes;
count = (int)Math.Min(Info.Header.BB_SIZE - offset, num_bytes);
if (!SeekBlock(Bat.Blocks[i], offset))
return null;
@@ -450,9 +447,9 @@ namespace LibGSF.Input
/// either from the OLE header, or a meta-bat block.
/// </summary>
/// <returns>A pointer to the element after the last position filled</returns>
private int? ReadMetabat(uint[] bats, int batsPtr, uint max_bat, uint[] metabat, uint metabat_end)
private int? ReadMetabat(uint[] bats, int batsPtr, uint max_bat, in uint[] metabat, int metabatPtr, in uint metabat_end)
{
for (int metabatPtr = 0; metabatPtr < metabat_end; metabatPtr++)
for (; metabatPtr < metabat_end; metabatPtr++)
{
if (metabat[metabatPtr] != BAT_MAGIC_UNUSED)
{
@@ -477,6 +474,7 @@ namespace LibGSF.Input
// 'unused' entries in the metabat. Let's assume that
// corresponds to lots of unused blocks
// http://bugzilla.gnome.org/show_bug.cgi?id=336858
uint i = (uint)(Info.Header.BB_SIZE / BAT_INDEX_SIZE);
while (i-- > 0)
{
@@ -491,7 +489,7 @@ namespace LibGSF.Input
/// <summary>
/// Copy some some raw data into an array of uint.
/// </summary>
private static void GetUnsignedInts(uint[] dst, ref int dstPtr, byte[] src, int srcPtr, int num_bytes)
private static void GetUnsignedInts(uint[] dst, int dstPtr, in byte[] src, int srcPtr, int num_bytes)
{
for (; (num_bytes -= BAT_INDEX_SIZE) >= 0; srcPtr += BAT_INDEX_SIZE)
{
@@ -510,8 +508,7 @@ namespace LibGSF.Input
return null;
// Avoid creating a circular reference
if (Info.SmallBlockFile is GsfInfileMSOle)
(Info.SmallBlockFile as GsfInfileMSOle).Info.Unref();
(Info.SmallBlockFile as GsfInfileMSOle)?.Info?.Unref();
if (Info.SmallBlockBat.Blocks != null)
return null;
@@ -520,15 +517,13 @@ namespace LibGSF.Input
return null;
Info.SmallBlockBat.Blocks = new uint[meta_sbat.NumBlocks * (Info.Header.BB_SIZE / BAT_INDEX_SIZE)];
ReadMetabat(Info.SmallBlockBat.Blocks, 0, Info.SmallBlockBat.NumBlocks, meta_sbat.Blocks, meta_sbat.NumBlocks);
ReadMetabat(Info.SmallBlockBat.Blocks, 0, Info.SmallBlockBat.NumBlocks, meta_sbat.Blocks, 0, meta_sbat.NumBlocks);
return Info.SmallBlockFile;
}
private static int DirectoryEntryCompare(MSOleDirent a, MSOleDirent b) => SortingKeyCompare(a.Key, b.Key);
private static DateTime? DateTimeFromFileTime(ulong ft) => ft == 0 ? (DateTime?)null : DateTime.FromFileTime((long)ft);
/// <summary>
/// Parse dirent number <paramref name="entry"/> and recursively handle its siblings and children.
/// parent is optional.
@@ -540,7 +535,7 @@ namespace LibGSF.Input
if (entry > uint.MaxValue / MSOleDirectoryEntry.DIRENT_SIZE)
return null;
uint block = ((entry * MSOleDirectoryEntry.DIRENT_SIZE) >> Info.Header.BB_SHIFT);
uint block = (entry * MSOleDirectoryEntry.DIRENT_SIZE) >> Info.Header.BB_SHIFT;
if (block >= Bat.NumBlocks)
return null;
@@ -577,14 +572,13 @@ namespace LibGSF.Input
MSOleDirent dirent = new MSOleDirent
{
Header = directoryEntry,
Key = GsfMSOleSortingKey.Create(directoryEntry.NAME_STRING),
Index = entry,
// Root dir is always big block
UseSmallBlock = parent != null && (directoryEntry.FILE_SIZE < Info.Header.THRESHOLD),
Children = new List<MSOleDirent>(),
Header = directoryEntry,
};
if (parent != null)
@@ -622,7 +616,7 @@ namespace LibGSF.Input
{
Input = input,
Info = Info.Ref(),
Stream = new byte[0],
Stream = null,
};
return dst;
@@ -665,18 +659,17 @@ namespace LibGSF.Input
Info = new MSOleInfo
{
Header = headerImpl,
BigBlockBat = new MSOleBAT(),
SmallBlockBat = new MSOleBAT(),
MetaBAT = null,
MaxBlock = (Input.Size - MSOleHeader.OLE_HEADER_SIZE + headerImpl.BB_SIZE - 1) / headerImpl.BB_SIZE,
RootDir = null,
SmallBlockFile = null,
RefCount = 1,
Header = headerImpl,
};
uint[] metabat = null;
int metabatPtr = 0; // metabat[0]
int metabatPtr = 0; // MetaBAT[0]
uint last;
int? ptr = null;
@@ -686,14 +679,13 @@ namespace LibGSF.Input
{
Info.BigBlockBat.Blocks = new uint[num_bat * (headerImpl.BB_SIZE / BAT_INDEX_SIZE)];
metabat = new uint[Math.Max(headerImpl.BB_SIZE, MSOleHeader.OLE_HEADER_SIZE)];
Info.MetaBAT = new uint[Math.Max(headerImpl.BB_SIZE, MSOleHeader.OLE_HEADER_SIZE)];
// Reading the elements invalidates this memory, make copy
GetUnsignedInts(metabat, ref metabatPtr, header, OLE_HEADER_START_BAT, MSOleHeader.OLE_HEADER_SIZE - OLE_HEADER_START_BAT);
GetUnsignedInts(Info.MetaBAT, metabatPtr, header, OLE_HEADER_START_BAT, MSOleHeader.OLE_HEADER_SIZE - OLE_HEADER_START_BAT);
last = Math.Min(num_bat, OLE_HEADER_METABAT_SIZE);
ptr = ReadMetabat(Info.BigBlockBat.Blocks, 0, Info.BigBlockBat.NumBlocks, metabat, last); // TODO: Does this need to be offset by metabatPtr?
ptr = ReadMetabat(Info.BigBlockBat.Blocks, 0, Info.BigBlockBat.NumBlocks, Info.MetaBAT, metabatPtr, (uint)(metabatPtr + last));
num_bat -= last;
}
@@ -703,7 +695,7 @@ namespace LibGSF.Input
last = (uint)((Info.Header.BB_SIZE - BAT_INDEX_SIZE) / BAT_INDEX_SIZE);
while (ptr != null && num_metabat-- > 0)
{
byte[] tmp = GetBlock(metabat_block - 1, null);
byte[] tmp = GetBlock(metabat_block, null);
if (tmp == null)
{
ptr = null;
@@ -711,7 +703,7 @@ namespace LibGSF.Input
}
// Reading the elements invalidates this memory, make copy
GetUnsignedInts(metabat, ref metabatPtr, tmp, 0, Info.Header.BB_SIZE);
GetUnsignedInts(Info.MetaBAT, metabatPtr, tmp, 0, Info.Header.BB_SIZE);
if (num_metabat == 0)
{
@@ -726,7 +718,7 @@ namespace LibGSF.Input
}
else if (num_metabat > 0)
{
metabat_block = metabat[last];
metabat_block = Info.MetaBAT[last];
if (num_bat < last)
{
// ::num_bat and ::num_metabat are
@@ -739,7 +731,7 @@ namespace LibGSF.Input
num_bat -= last;
}
ptr = ReadMetabat(Info.BigBlockBat.Blocks, ptr.Value, Info.BigBlockBat.NumBlocks, metabat, last);
ptr = ReadMetabat(Info.BigBlockBat.Blocks, ptr.Value, Info.BigBlockBat.NumBlocks, Info.MetaBAT, metabatPtr, (uint)(metabatPtr + last));
}
bool fail = (ptr == null);
@@ -843,13 +835,19 @@ namespace LibGSF.Input
if (sb_file == null)
return null;
uint remaining = dirent.Header.FILE_SIZE;
int remaining = (int)dirent.Header.FILE_SIZE;
child.Stream = new byte[remaining];
for (uint i = 0; remaining > 0 && i < child.Bat.NumBlocks; i++, remaining -= (uint)Info.Header.SB_SIZE)
for (uint i = 0; remaining > 0 && i < child.Bat.NumBlocks; i++, remaining -= Info.Header.SB_SIZE)
{
if (sb_file.Seek(child.Bat.Blocks[i] << Info.Header.SB_SHIFT, SeekOrigin.Begin)
|| sb_file.Read((int)Math.Min(remaining, Info.Header.SB_SIZE), child.Stream, (int)(i << Info.Header.SB_SHIFT)) == null)
if (sb_file.Seek(child.Bat.Blocks[i] << Info.Header.SB_SHIFT, SeekOrigin.Begin))
{
Console.Error.WriteLine($"Failure seeking to block {i} for '{dirent.Header.NAME_STRING}'");
err = new Exception("Failure seeking block");
return null;
}
if (sb_file.Read((int)Math.Min(remaining, Info.Header.SB_SIZE), child.Stream, (int)(i << Info.Header.SB_SHIFT)) == null)
{
Console.Error.WriteLine($"Failure reading block {i} for '{dirent.Header.NAME_STRING}'");
err = new Exception("Failure reading block");
@@ -857,13 +855,12 @@ namespace LibGSF.Input
}
}
// TODO: Debug as to why this block would be hit
//if (remaining > 0)
//{
// err = new Exception("Insufficient blocks");
// Console.Error.WriteLine($"Small-block file '{dirent.Header.NAME_STRING}' has insufficient blocks ({child.Bat.NumBlocks}) for the stated size ({dirent.Header.FILE_SIZE})");
// return null;
//}
if (remaining > 0)
{
err = new Exception("Insufficient blocks");
Console.Error.WriteLine($"Small-block file '{dirent.Header.NAME_STRING}' has insufficient blocks ({child.Bat.NumBlocks}) for the stated size ({dirent.Header.FILE_SIZE})");
return null;
}
}
return child;

View File

@@ -1956,8 +1956,8 @@ namespace LibMSI
{
Exception err = null;
GsfInput input = Infile.ChildByIndex(i, ref err);
string name = input.Name;
byte[] name8 = Encoding.ASCII.GetBytes(name);
string name = input?.Name;
byte[] name8 = Encoding.ASCII.GetBytes(name ?? string.Empty);
if (name == null)
{