Legacy Event File Format (MWK)

Description

MWorks’ legacy event file format uses the file extension .mwk.

An MWK file is a simple stream of MWorks events. Each event is encoded as a list in the LDO format defined by the Scarab library. The list contains the code, time, and data associated with the event. (The only exception is the RESERVED_TERMINATION_CODE event that ends the event file, which has no associated data and is encoded as a two-element list.)

The first time an MWK file is opened using MWorks’ standard data analysis tools, it is converted into a directory containing both the original, unmodified MWK file and an index file. For example:

$ # Before opening
$ ls -l data.mwk
-rw-r--r--  1 cstawarz  staff  7953812 Aug  9  2018 data.mwk

$ # After opening
$ ls -l data.mwk/
total 16400
-rw-r--r--  1 cstawarz  staff  7953812 Aug  9  2018 data.mwk
-rw-r--r--  1 cstawarz  staff      686 Aug 10  2018 data.mwk.idx

If the .mwk.idx file is deleted, the data tools will recreate it the next time the event file is opened.

Example Code

The following Python code demonstrates the MWK format in detail by a implementing a reader for .mwk files:

from __future__ import division, print_function, unicode_literals
import struct

try:
    xrange
except NameError:
    # Python 3
    xrange = range


class LDOError(Exception):
    pass


class LDOReader(object):

    MAGIC = b'\x89CBF\x01\x00\x00'

    INTEGER_N =  0x02
    INTEGER_P =  0x03
    OPAQUE =     0x0A
    NULL =       0x0B
    LIST =       0x0C
    DICTIONARY = 0x0D
    FLOAT =      0x11

    def __init__(self, file, string_encoding='utf-8'):
        magic = file.read(len(self.MAGIC))
        if not isinstance(magic, type(b'')):
            raise TypeError('file.read() must return binary data')
        if magic != self.MAGIC:
            raise LDOError('invalid magic')

        def _read(size):
            assert size > 0
            data = file.read(size)
            if not data:
                raise EOFError
            return data
        self._read = _read

        self._readers = {
            self.INTEGER_N: self._read_integer_n,
            self.INTEGER_P: self._read_integer_p,
            self.OPAQUE: self._read_opaque,
            self.NULL: self._read_null,
            self.LIST: self._read_list,
            self.DICTIONARY: self._read_dict,
            self.FLOAT: self._read_float,
            }

        self.string_encoding = string_encoding

    def _read_ord(self):
        return ord(self._read(1))

    def _read_ber(self):
        # Adapted from https://stackoverflow.com/questions/6776553/
        value = 0
        while True:
            tmp = self._read_ord()
            value = (value << 7) | (tmp & 0x7f)
            if tmp & 0x80 == 0:
                return value

    def _read_integer_n(self):
        return -(self._read_ber())

    def _read_integer_p(self):
        return self._read_ber()

    def _read_opaque(self):
        size = self._read_ber()
        value = self._read(size)

        # If value contains exactly one NUL, and that NUL is the last
        # byte in the array, then value is an encoded string.  Strip
        # the NUL and decode with the specified encoding.
        if value.find(b'\0') == len(value) - 1:
            value = value[:-1].decode(self.string_encoding)

        return value

    def _read_null(self):
        return None

    def _read_list(self):
        size = self._read_ber()
        return list(self.read() for _ in xrange(size))

    def _read_dict(self):
        size = self._read_ber()
        return dict((self.read(), self.read()) for _ in xrange(size))

    def _read_float(self):
        size = self._read_ber()  # Should always be 8
        return struct.unpack(b'<d', self._read(size))[0]

    def read(self):
        typecode = self._read_ord()
        try:
            return self._readers[typecode]()
        except KeyError:
            raise LDOError('invalid type code (0x%0.2X)' % typecode)


class MWKReader(object):

    def __init__(self, filename):
        self._fp = open(filename, 'rb')

    def close(self):
        self._fp.close()

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close()

    def __iter__(self):
        self._fp.seek(0)
        reader = LDOReader(self._fp)
        while True:
            try:
                event = reader.read()
                assert isinstance(event, list)
                assert len(event) in (2, 3)
                if len(event) == 2:
                    event.append(None)
                yield event
            except EOFError:
                break

The MWKReader class defined above can be used as follows:

with MWKReader('my_data.mwk') as event_file:
    for code, time, data in event_file:
        # Process the current event
        ...