Reading the MS Office Forms binary file format (excerpt)

Posted on

Problem

As part of my VBA Sync Tool project, I need to partially decode files describing Microsoft Office Forms (file format description on MSDN). The file format has lots of structures following a template similar to this one:

FormControl structure

Where

  • cbForm specifies the combinaed size of PropMask, DataBlock, and ExtraDataBlock
  • PropMask is a bitfield specifying what properties are stored in the rest of the structure
  • DataBlock contains aligned properties of 4 bytes or less
  • ExtraDataBlock contains aligned strings and properties of more than 4 bytes
  • StreamData contains unaligned data of various size

To help with decoding this sort of structure, I built an abstract class FrxCommon which all like structures should inherit from:

abstract class FrxCommon {
    ushort _dBytes;
    bool _inD;
    bool _inX;
    ushort _xBytes;

    protected ushort DataBlockBytes => _dBytes;
    protected ushort ExtraDataBlockBytes => _xBytes;

    protected void BeginDataBlock() {
        _dBytes = 0;
        _inD = true;
    }

    protected void BeginExtraDataBlock() {
        _xBytes = 0;
        _inX = true;
    }

    protected void EndDataBlock(BinaryReader r) {
        AlignTo(4, r.BaseStream);
        _inD = false;
    }

    protected void EndExtraDataBlock(BinaryReader r) {
        AlignTo(4, r.BaseStream);
        _inX = false;
    }

    protected byte ReadByteIf(bool b, BinaryReader r, byte ifNot = 0) {
        if (!b) return ifNot;
        AboutToRead(1);
        return r.ReadByte();
    }

    protected void Ignore2AlignedBytesIf(bool b, BinaryReader r) {
        if (!b) return;
        AlignTo(2, r.BaseStream);
        IgnoreNext(2, r.BaseStream);
    }

    protected short ReadAlignedInt16If(bool b, BinaryReader r, short ifNot = 0) {
        if (!b) return ifNot;
        AlignTo(2, r.BaseStream);
        AboutToRead(2);
        return r.ReadInt16();
    }

    protected ushort ReadAlignedUInt16If(bool b, BinaryReader r, ushort ifNot = 0) {
        if (!b) return ifNot;
        AlignTo(2, r.BaseStream);
        AboutToRead(2);
        return r.ReadUInt16();
    }

    protected int ReadAlignedInt32If(bool b, BinaryReader r, int ifNot = 0) {
        if (!b) return ifNot;
        AlignTo(4, r.BaseStream);
        AboutToRead(4);
        return r.ReadInt32();
    }

    protected uint ReadAlignedUInt32If(bool b, BinaryReader r, uint ifNot = 0) {
        if (!b) return ifNot;
        AlignTo(4, r.BaseStream);
        AboutToRead(4);
        return r.ReadUInt32();
    }

    protected Tuple<int, int> ReadAlignedCoordsIf(bool b, BinaryReader r) {
        if (!b) return Tuple.Create(0, 0);
        AlignTo(4, r.BaseStream);
        AboutToRead(8);
        return Tuple.Create(r.ReadInt32(), r.ReadInt32());
    }

    protected Tuple<int, bool> ReadAlignedCcbIf(bool b, BinaryReader r) {
        if (!b) return Tuple.Create(0, false);
        AlignTo(4, r.BaseStream);
        AboutToRead(4);
        var i = r.ReadInt32();
        return i < 0 ? Tuple.Create(unchecked((int)(i ^ 0x80000000)), true) : Tuple.Create(i, false);
    }

    protected OleColor ReadAlignedOleColorIf(bool b, BinaryReader r) {
        if (!b) return null;
        AlignTo(4, r.BaseStream);
        AboutToRead(4);
        return new OleColor(r.ReadBytes(4));
    }

    protected string ReadAlignedWCharIf(bool b, BinaryReader r) {
        if (!b) return "";
        AlignTo(2, r.BaseStream);
        AboutToRead(2);
        return Encoding.Unicode.GetString(r.ReadBytes(2));
    }

    protected string ReadStringFromCcb(Tuple<int, bool> ccb, BinaryReader r) {
        if (ccb.Item1 == 0) return "";
        AboutToRead((ushort)ccb.Item1);
        var s = (ccb.Item2 ? Encoding.UTF8 : Encoding.Unicode).GetString(r.ReadBytes(ccb.Item1));
        AlignTo(4, r.BaseStream);
        return s;
    }

    void AboutToRead(ushort numBytes) {
        if (_inD) {
            _dBytes += numBytes;
        } else if (_inX) {
            _xBytes += numBytes;
        }
    }

    void AlignTo(ushort alignment, Stream st) {
        if (_inD) {
            if (_dBytes%alignment == 0) return;
            st.Seek(alignment - _dBytes%alignment, SeekOrigin.Current);
            _dBytes += (ushort)(alignment - _dBytes%alignment);
        } else if (_inX) {
            if (_xBytes%alignment == 0) return;
            st.Seek(alignment - _xBytes%alignment, SeekOrigin.Current);
            _xBytes += (ushort)(alignment - _xBytes%alignment);
        }
    }

    void IgnoreNext(ushort bytes, Stream st) {
        st.Seek(bytes, SeekOrigin.Current);
        if (_inD) {
            _dBytes += bytes;
        } else if (_inX) {
            _xBytes += bytes;
        }
    }
}

An example class inheriting from this one:

class CommandButtonControl : FrxCommon {
    public byte MinorVersion { get; }
    public byte MajorVersion { get; }
    public CommandButtonPropMask PropMask { get; }
    public OleColor ForeColor { get; }
    public OleColor BackColor { get; }
    public uint VariousPropertyBits { get; }
    public string Caption { get; }
    public PicturePosition PicturePosition { get; }
    public MousePointer MousePointer { get; }
    public string Accelerator { get; }
    public Tuple<int, int> Size { get; }
    public byte[] Picture { get; } = new byte[0];
    public byte[] MouseIcon { get; } = new byte[0];
    public TextProps TextProps { get; }

    public CommandButtonControl(byte[] b) {
        using (var st = new MemoryStream(b))
        using (var r = new BinaryReader(st)) {
            MinorVersion = r.ReadByte();
            MajorVersion = r.ReadByte();

            var cbCommandButton = r.ReadUInt16();
            PropMask = new CommandButtonPropMask(r.ReadUInt32());

            BeginDataBlock();
            ForeColor = ReadAlignedOleColorIf(PropMask.HasForeColor, r);
            BackColor = ReadAlignedOleColorIf(PropMask.HasBackColor, r);
            VariousPropertyBits = ReadAlignedUInt32If(PropMask.HasVariousPropertyBits, r);
            var captionCcb = ReadAlignedCcbIf(PropMask.HasCaption, r);
            PicturePosition = (PicturePosition)ReadAlignedUInt32If(PropMask.HasPicturePosition, r);
            MousePointer = (MousePointer)ReadByteIf(PropMask.HasMousePointer, r);
            Ignore2AlignedBytesIf(PropMask.HasPicture, r);
            Accelerator = ReadAlignedWCharIf(PropMask.HasAccelerator, r);
            Ignore2AlignedBytesIf(PropMask.HasMouseIcon, r);
            EndDataBlock(r);

            BeginExtraDataBlock();
            Caption = ReadStringFromCcb(captionCcb, r);
            Size = ReadAlignedCoordsIf(PropMask.HasSize, r);
            EndExtraDataBlock(r);

            if (cbCommandButton != 4 + DataBlockBytes + ExtraDataBlockBytes)
                throw new ApplicationException("Error reading 'o' stream in .frx data: expected cbCommandButton size "
                                               + $"{4 + DataBlockBytes + ExtraDataBlockBytes}, but actual size was {cbCommandButton}.");

            // StreamData
            if (PropMask.HasPicture) {
                Picture = ReadGuidAndPicture(r);
            }
            if (PropMask.HasMouseIcon) {
                MouseIcon = ReadGuidAndPicture(r);
            }

            TextProps = ReadTextProps(r);
        }
    }

    public override bool Equals(object obj) {
        if (ReferenceEquals(null, obj)) {
            return false;
        }
        if (ReferenceEquals(this, obj)) {
            return true;
        }
        if (obj.GetType() != this.GetType()) {
            return false;
        }
        return Equals((CommandButtonControl)obj);
    }

    protected bool Equals(CommandButtonControl other) {
        return Picture.SequenceEqual(other.Picture) && MouseIcon.SequenceEqual(other.MouseIcon) && MinorVersion == other.MinorVersion &&
               MajorVersion == other.MajorVersion && Equals(PropMask, other.PropMask) && Equals(ForeColor, other.ForeColor) &&
               Equals(BackColor, other.BackColor) && VariousPropertyBits == other.VariousPropertyBits && string.Equals(Caption, other.Caption) &&
               PicturePosition == other.PicturePosition && MousePointer == other.MousePointer && string.Equals(Accelerator, other.Accelerator) &&
               Equals(Size, other.Size) && Equals(TextProps, other.TextProps);
    }

    public override int GetHashCode() {
        unchecked {
            var hashCode = Picture?.Length.GetHashCode() ?? 0;
            hashCode = (hashCode * 397) ^ (MouseIcon?.Length.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ (TextProps?.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ MinorVersion.GetHashCode();
            hashCode = (hashCode * 397) ^ MajorVersion.GetHashCode();
            hashCode = (hashCode * 397) ^ (PropMask?.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ (ForeColor?.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ (BackColor?.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ (int)VariousPropertyBits;
            hashCode = (hashCode * 397) ^ (Caption?.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ (int)PicturePosition;
            hashCode = (hashCode * 397) ^ (int)MousePointer;
            hashCode = (hashCode * 397) ^ (Accelerator?.GetHashCode() ?? 0);
            hashCode = (hashCode * 397) ^ (Size?.GetHashCode() ?? 0);
            return hashCode;
        }
    }
}

class CommandButtonPropMask {
    public bool HasForeColor { get; }
    public bool HasBackColor { get; }
    public bool HasVariousPropertyBits { get; }
    public bool HasCaption { get; }
    public bool HasPicturePosition { get; }
    public bool HasSize { get; }
    public bool HasMousePointer { get; }
    public bool HasPicture { get; }
    public bool HasAccelerator { get; }
    public bool TakeFocusOnClick { get; }
    public bool HasMouseIcon { get; }

    public CommandButtonPropMask(uint i) {
        Func<int, bool> bit = j => (i & ((uint)1 << j)) != 0;
        HasForeColor = bit(0);
        HasBackColor = bit(1);
        HasVariousPropertyBits = bit(2);
        HasCaption = bit(3);
        HasPicturePosition = bit(4);
        HasSize = bit(5);
        HasMousePointer = bit(6);
        HasPicture = bit(7);
        HasAccelerator = bit(8);
        TakeFocusOnClick = !bit(9);
        HasMouseIcon = bit(10);
    }

    public override bool Equals(object obj) {
        if (ReferenceEquals(null, obj)) {
            return false;
        }
        if (ReferenceEquals(this, obj)) {
            return true;
        }
        if (obj.GetType() != this.GetType()) {
            return false;
        }
        return Equals((CommandButtonPropMask)obj);
    }

    protected bool Equals(CommandButtonPropMask other) {
        return HasForeColor == other.HasForeColor && HasBackColor == other.HasBackColor && HasVariousPropertyBits == other.HasVariousPropertyBits &&
               HasCaption == other.HasCaption && HasPicturePosition == other.HasPicturePosition && HasSize == other.HasSize &&
               HasMousePointer == other.HasMousePointer && HasPicture == other.HasPicture && HasAccelerator == other.HasAccelerator &&
               TakeFocusOnClick == other.TakeFocusOnClick && HasMouseIcon == other.HasMouseIcon;
    }

    public override int GetHashCode() {
        unchecked {
            var hashCode = HasForeColor.GetHashCode();
            hashCode = (hashCode*397) ^ HasBackColor.GetHashCode();
            hashCode = (hashCode*397) ^ HasVariousPropertyBits.GetHashCode();
            hashCode = (hashCode*397) ^ HasCaption.GetHashCode();
            hashCode = (hashCode*397) ^ HasPicturePosition.GetHashCode();
            hashCode = (hashCode*397) ^ HasSize.GetHashCode();
            hashCode = (hashCode*397) ^ HasMousePointer.GetHashCode();
            hashCode = (hashCode*397) ^ HasPicture.GetHashCode();
            hashCode = (hashCode*397) ^ HasAccelerator.GetHashCode();
            hashCode = (hashCode*397) ^ TakeFocusOnClick.GetHashCode();
            hashCode = (hashCode*397) ^ HasMouseIcon.GetHashCode();
            return hashCode;
        }
    }
}

Note that the goal here is not to completely decode the file, but only to separate the meaningful data from the padding, so that I can compare two files. So some bitfields (like VariousPropertyBits) are stored as numeric types and not broken down further.

Does this all jive with standard practice about reading variable-length binary structures in C#? Anything you would have done differently?

Solution

  • You are using Java-style braces. In C# you should put opening { on new line.
  • You are using confusing field names. What is _dBytes? What is _inD? No way to tell without digging into your code. You should use descriptive names, which would explain the purpose of your fields.
  • Your hashcode function looks overly complex. Just pick one or two fields an generate hashcode based on that. The whole point of hashcode is that it should be easier (i.e. faster) to compare two hashcodes, than it is to compare two actual objects.
  • You should use aggregation instead of inheritance. I would recommend creating a separate class similar to BinaryReader:

    //1) Remove "...If" methods for the sake of simplicity. 
    //   Regular "if" operator is good enough.
    //2) Remove "...Aligned..." word. Whether the value is aligned 
    //   or not should be an implementation detail.
    //   As a user of this class I should not care how or whether 
    //   the value is aligned, I just want to read it.
    interface IMsFormReader
    {
        byte ReadByte(); 
        int ReadInt16();
        uint ReadUInt32();
        OleColor ReadOleColor();
        ...
    }
    
    class MsFormReader : IMsFormReader { ... }
    

    And then use it for parsing:

    //split the document into smaller logical blocks
    class FormHeader
    {
        public byte MinorVersion { get; }
        public byte MajorVersion { get; }
        public uint DataSize { get; }
    
        public void Read(IMsFormReader reader)
        {
            MinorVersion = reader.ReadByte();
            MajorVersion = reader.ReadByte();
            DataSize = reader.ReadUInt32()();
        }
    
        ...
    }
    
    public class ButtonDataBlock
    {
       ...
    }
    
    class CommandButtonControl 
    {
         public FormHeader Header { get; } = new FormHeader();
         public ButtonDataBlock DataBlock { get; } = new ButtonDataBlock();
    
         ...
    
         public void Read(Stream input)
         {
             Header.Read(new MsFormReader(input));
             //just recreate the reader instead of calling EndDataBlock and managing bool flags internally.
             DataBlock.Read(new MsFormReader(input));
    
             ...
         }
    }
    

Leave a Reply

Your email address will not be published. Required fields are marked *