Problem
As part of my VBA Sync Tool project, I need to partially decode files describing Microsoft Office Forms (file format description on MSDN). The file format has lots of structures following a template similar to this one:
Where
cbForm
specifies the combinaed size ofPropMask
,DataBlock
, andExtraDataBlock
PropMask
is a bitfield specifying what properties are stored in the rest of the structureDataBlock
contains aligned properties of 4 bytes or lessExtraDataBlock
contains aligned strings and properties of more than 4 bytesStreamData
contains unaligned data of various size
To help with decoding this sort of structure, I built an abstract class FrxCommon
which all like structures should inherit from:
abstract class FrxCommon {
ushort _dBytes;
bool _inD;
bool _inX;
ushort _xBytes;
protected ushort DataBlockBytes => _dBytes;
protected ushort ExtraDataBlockBytes => _xBytes;
protected void BeginDataBlock() {
_dBytes = 0;
_inD = true;
}
protected void BeginExtraDataBlock() {
_xBytes = 0;
_inX = true;
}
protected void EndDataBlock(BinaryReader r) {
AlignTo(4, r.BaseStream);
_inD = false;
}
protected void EndExtraDataBlock(BinaryReader r) {
AlignTo(4, r.BaseStream);
_inX = false;
}
protected byte ReadByteIf(bool b, BinaryReader r, byte ifNot = 0) {
if (!b) return ifNot;
AboutToRead(1);
return r.ReadByte();
}
protected void Ignore2AlignedBytesIf(bool b, BinaryReader r) {
if (!b) return;
AlignTo(2, r.BaseStream);
IgnoreNext(2, r.BaseStream);
}
protected short ReadAlignedInt16If(bool b, BinaryReader r, short ifNot = 0) {
if (!b) return ifNot;
AlignTo(2, r.BaseStream);
AboutToRead(2);
return r.ReadInt16();
}
protected ushort ReadAlignedUInt16If(bool b, BinaryReader r, ushort ifNot = 0) {
if (!b) return ifNot;
AlignTo(2, r.BaseStream);
AboutToRead(2);
return r.ReadUInt16();
}
protected int ReadAlignedInt32If(bool b, BinaryReader r, int ifNot = 0) {
if (!b) return ifNot;
AlignTo(4, r.BaseStream);
AboutToRead(4);
return r.ReadInt32();
}
protected uint ReadAlignedUInt32If(bool b, BinaryReader r, uint ifNot = 0) {
if (!b) return ifNot;
AlignTo(4, r.BaseStream);
AboutToRead(4);
return r.ReadUInt32();
}
protected Tuple<int, int> ReadAlignedCoordsIf(bool b, BinaryReader r) {
if (!b) return Tuple.Create(0, 0);
AlignTo(4, r.BaseStream);
AboutToRead(8);
return Tuple.Create(r.ReadInt32(), r.ReadInt32());
}
protected Tuple<int, bool> ReadAlignedCcbIf(bool b, BinaryReader r) {
if (!b) return Tuple.Create(0, false);
AlignTo(4, r.BaseStream);
AboutToRead(4);
var i = r.ReadInt32();
return i < 0 ? Tuple.Create(unchecked((int)(i ^ 0x80000000)), true) : Tuple.Create(i, false);
}
protected OleColor ReadAlignedOleColorIf(bool b, BinaryReader r) {
if (!b) return null;
AlignTo(4, r.BaseStream);
AboutToRead(4);
return new OleColor(r.ReadBytes(4));
}
protected string ReadAlignedWCharIf(bool b, BinaryReader r) {
if (!b) return "";
AlignTo(2, r.BaseStream);
AboutToRead(2);
return Encoding.Unicode.GetString(r.ReadBytes(2));
}
protected string ReadStringFromCcb(Tuple<int, bool> ccb, BinaryReader r) {
if (ccb.Item1 == 0) return "";
AboutToRead((ushort)ccb.Item1);
var s = (ccb.Item2 ? Encoding.UTF8 : Encoding.Unicode).GetString(r.ReadBytes(ccb.Item1));
AlignTo(4, r.BaseStream);
return s;
}
void AboutToRead(ushort numBytes) {
if (_inD) {
_dBytes += numBytes;
} else if (_inX) {
_xBytes += numBytes;
}
}
void AlignTo(ushort alignment, Stream st) {
if (_inD) {
if (_dBytes%alignment == 0) return;
st.Seek(alignment - _dBytes%alignment, SeekOrigin.Current);
_dBytes += (ushort)(alignment - _dBytes%alignment);
} else if (_inX) {
if (_xBytes%alignment == 0) return;
st.Seek(alignment - _xBytes%alignment, SeekOrigin.Current);
_xBytes += (ushort)(alignment - _xBytes%alignment);
}
}
void IgnoreNext(ushort bytes, Stream st) {
st.Seek(bytes, SeekOrigin.Current);
if (_inD) {
_dBytes += bytes;
} else if (_inX) {
_xBytes += bytes;
}
}
}
An example class inheriting from this one:
class CommandButtonControl : FrxCommon {
public byte MinorVersion { get; }
public byte MajorVersion { get; }
public CommandButtonPropMask PropMask { get; }
public OleColor ForeColor { get; }
public OleColor BackColor { get; }
public uint VariousPropertyBits { get; }
public string Caption { get; }
public PicturePosition PicturePosition { get; }
public MousePointer MousePointer { get; }
public string Accelerator { get; }
public Tuple<int, int> Size { get; }
public byte[] Picture { get; } = new byte[0];
public byte[] MouseIcon { get; } = new byte[0];
public TextProps TextProps { get; }
public CommandButtonControl(byte[] b) {
using (var st = new MemoryStream(b))
using (var r = new BinaryReader(st)) {
MinorVersion = r.ReadByte();
MajorVersion = r.ReadByte();
var cbCommandButton = r.ReadUInt16();
PropMask = new CommandButtonPropMask(r.ReadUInt32());
BeginDataBlock();
ForeColor = ReadAlignedOleColorIf(PropMask.HasForeColor, r);
BackColor = ReadAlignedOleColorIf(PropMask.HasBackColor, r);
VariousPropertyBits = ReadAlignedUInt32If(PropMask.HasVariousPropertyBits, r);
var captionCcb = ReadAlignedCcbIf(PropMask.HasCaption, r);
PicturePosition = (PicturePosition)ReadAlignedUInt32If(PropMask.HasPicturePosition, r);
MousePointer = (MousePointer)ReadByteIf(PropMask.HasMousePointer, r);
Ignore2AlignedBytesIf(PropMask.HasPicture, r);
Accelerator = ReadAlignedWCharIf(PropMask.HasAccelerator, r);
Ignore2AlignedBytesIf(PropMask.HasMouseIcon, r);
EndDataBlock(r);
BeginExtraDataBlock();
Caption = ReadStringFromCcb(captionCcb, r);
Size = ReadAlignedCoordsIf(PropMask.HasSize, r);
EndExtraDataBlock(r);
if (cbCommandButton != 4 + DataBlockBytes + ExtraDataBlockBytes)
throw new ApplicationException("Error reading 'o' stream in .frx data: expected cbCommandButton size "
+ $"{4 + DataBlockBytes + ExtraDataBlockBytes}, but actual size was {cbCommandButton}.");
// StreamData
if (PropMask.HasPicture) {
Picture = ReadGuidAndPicture(r);
}
if (PropMask.HasMouseIcon) {
MouseIcon = ReadGuidAndPicture(r);
}
TextProps = ReadTextProps(r);
}
}
public override bool Equals(object obj) {
if (ReferenceEquals(null, obj)) {
return false;
}
if (ReferenceEquals(this, obj)) {
return true;
}
if (obj.GetType() != this.GetType()) {
return false;
}
return Equals((CommandButtonControl)obj);
}
protected bool Equals(CommandButtonControl other) {
return Picture.SequenceEqual(other.Picture) && MouseIcon.SequenceEqual(other.MouseIcon) && MinorVersion == other.MinorVersion &&
MajorVersion == other.MajorVersion && Equals(PropMask, other.PropMask) && Equals(ForeColor, other.ForeColor) &&
Equals(BackColor, other.BackColor) && VariousPropertyBits == other.VariousPropertyBits && string.Equals(Caption, other.Caption) &&
PicturePosition == other.PicturePosition && MousePointer == other.MousePointer && string.Equals(Accelerator, other.Accelerator) &&
Equals(Size, other.Size) && Equals(TextProps, other.TextProps);
}
public override int GetHashCode() {
unchecked {
var hashCode = Picture?.Length.GetHashCode() ?? 0;
hashCode = (hashCode * 397) ^ (MouseIcon?.Length.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ (TextProps?.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ MinorVersion.GetHashCode();
hashCode = (hashCode * 397) ^ MajorVersion.GetHashCode();
hashCode = (hashCode * 397) ^ (PropMask?.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ (ForeColor?.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ (BackColor?.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ (int)VariousPropertyBits;
hashCode = (hashCode * 397) ^ (Caption?.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ (int)PicturePosition;
hashCode = (hashCode * 397) ^ (int)MousePointer;
hashCode = (hashCode * 397) ^ (Accelerator?.GetHashCode() ?? 0);
hashCode = (hashCode * 397) ^ (Size?.GetHashCode() ?? 0);
return hashCode;
}
}
}
class CommandButtonPropMask {
public bool HasForeColor { get; }
public bool HasBackColor { get; }
public bool HasVariousPropertyBits { get; }
public bool HasCaption { get; }
public bool HasPicturePosition { get; }
public bool HasSize { get; }
public bool HasMousePointer { get; }
public bool HasPicture { get; }
public bool HasAccelerator { get; }
public bool TakeFocusOnClick { get; }
public bool HasMouseIcon { get; }
public CommandButtonPropMask(uint i) {
Func<int, bool> bit = j => (i & ((uint)1 << j)) != 0;
HasForeColor = bit(0);
HasBackColor = bit(1);
HasVariousPropertyBits = bit(2);
HasCaption = bit(3);
HasPicturePosition = bit(4);
HasSize = bit(5);
HasMousePointer = bit(6);
HasPicture = bit(7);
HasAccelerator = bit(8);
TakeFocusOnClick = !bit(9);
HasMouseIcon = bit(10);
}
public override bool Equals(object obj) {
if (ReferenceEquals(null, obj)) {
return false;
}
if (ReferenceEquals(this, obj)) {
return true;
}
if (obj.GetType() != this.GetType()) {
return false;
}
return Equals((CommandButtonPropMask)obj);
}
protected bool Equals(CommandButtonPropMask other) {
return HasForeColor == other.HasForeColor && HasBackColor == other.HasBackColor && HasVariousPropertyBits == other.HasVariousPropertyBits &&
HasCaption == other.HasCaption && HasPicturePosition == other.HasPicturePosition && HasSize == other.HasSize &&
HasMousePointer == other.HasMousePointer && HasPicture == other.HasPicture && HasAccelerator == other.HasAccelerator &&
TakeFocusOnClick == other.TakeFocusOnClick && HasMouseIcon == other.HasMouseIcon;
}
public override int GetHashCode() {
unchecked {
var hashCode = HasForeColor.GetHashCode();
hashCode = (hashCode*397) ^ HasBackColor.GetHashCode();
hashCode = (hashCode*397) ^ HasVariousPropertyBits.GetHashCode();
hashCode = (hashCode*397) ^ HasCaption.GetHashCode();
hashCode = (hashCode*397) ^ HasPicturePosition.GetHashCode();
hashCode = (hashCode*397) ^ HasSize.GetHashCode();
hashCode = (hashCode*397) ^ HasMousePointer.GetHashCode();
hashCode = (hashCode*397) ^ HasPicture.GetHashCode();
hashCode = (hashCode*397) ^ HasAccelerator.GetHashCode();
hashCode = (hashCode*397) ^ TakeFocusOnClick.GetHashCode();
hashCode = (hashCode*397) ^ HasMouseIcon.GetHashCode();
return hashCode;
}
}
}
Note that the goal here is not to completely decode the file, but only to separate the meaningful data from the padding, so that I can compare two files. So some bitfields (like VariousPropertyBits
) are stored as numeric types and not broken down further.
Does this all jive with standard practice about reading variable-length binary structures in C#? Anything you would have done differently?
Solution
- You are using Java-style braces. In C# you should put opening
{
on new line. - You are using confusing field names. What is
_dBytes
? What is_inD
? No way to tell without digging into your code. You should use descriptive names, which would explain the purpose of your fields. - Your hashcode function looks overly complex. Just pick one or two fields an generate hashcode based on that. The whole point of hashcode is that it should be easier (i.e. faster) to compare two hashcodes, than it is to compare two actual objects.
-
You should use aggregation instead of inheritance. I would recommend creating a separate class similar to
BinaryReader
://1) Remove "...If" methods for the sake of simplicity. // Regular "if" operator is good enough. //2) Remove "...Aligned..." word. Whether the value is aligned // or not should be an implementation detail. // As a user of this class I should not care how or whether // the value is aligned, I just want to read it. interface IMsFormReader { byte ReadByte(); int ReadInt16(); uint ReadUInt32(); OleColor ReadOleColor(); ... } class MsFormReader : IMsFormReader { ... }
And then use it for parsing:
//split the document into smaller logical blocks class FormHeader { public byte MinorVersion { get; } public byte MajorVersion { get; } public uint DataSize { get; } public void Read(IMsFormReader reader) { MinorVersion = reader.ReadByte(); MajorVersion = reader.ReadByte(); DataSize = reader.ReadUInt32()(); } ... } public class ButtonDataBlock { ... } class CommandButtonControl { public FormHeader Header { get; } = new FormHeader(); public ButtonDataBlock DataBlock { get; } = new ButtonDataBlock(); ... public void Read(Stream input) { Header.Read(new MsFormReader(input)); //just recreate the reader instead of calling EndDataBlock and managing bool flags internally. DataBlock.Read(new MsFormReader(input)); ... } }