pypush-plus-plus/emulated/mparser.py

"""
Hacked up version of 'macholibre', by Aaron Stephens <aaronjst93@gmail.com>
Licensed under Apache Version 2.0
"""


import hashlib

from collections import Counter
from datetime import datetime
from json import dump
from math import exp, log
from os import SEEK_END
from re import split
from struct import unpack
from uuid import UUID

#from asn1crypto.cms import ContentInfo
#from asn1crypto.x509 import DirectoryString
from plistlib import loads

#import mdictionary as mdictionary

from io import BytesIO


class Parser():
    """Main object containing all the necessary functions to parse
    a mach-o binary.
    """

    def __init__(self, file):
        """Initialize instance variables and flags."""

        self.__extract_certs = False
        self.__file = BytesIO(file)
        self.__is_64_bit = True         # default place-holder
        self.__is_little_endian = True  # ^^
        self.__macho = {}
        self.__output = {
            'name': 'IMDAppleServices'
        }

        self.file = self.__file

    def add_abnormality(self, abnormality):
        """Add abnormality to output."""

        if 'abnormalities' not in self.__output:
            self.__output['abnormalities'] = []

        self.__output['abnormalities'].append(abnormality)

    def calc_entropy(self, b):
        """Calculate byte entropy for given bytes."""

        byte_counts = Counter()

        entropy = 0

        for i in b:
            byte_counts[i] += 1

        total = float(sum(byte_counts.values()))

        for count in byte_counts.values():
            p = float(count) / total
            entropy -= p * log(p, 256)

        return entropy

    def get_string(self):
        """Read a null-terminated string from macho."""

        string = bytearray()

        c = self.__file.read(1)

        while c not in (b'\x00', ''):
            string += c
            c = self.__file.read(1)

        return string.decode('utf-8', errors='replace')

    def get_int(self, ignore_endian=False):
        """Read a 4-byte integer from macho, account for endian-ness."""

        integer = self.__file.read(4)

        if self.__is_little_endian and not ignore_endian:
            return int.from_bytes(integer, byteorder='little')

        return int.from_bytes(integer, byteorder='big')

    def get_ll(self):
        """Read an 8-byte long long from macho, account for endian-ness."""

        longlong = self.__file.read(8)

        if self.__is_little_endian:
            return int.from_bytes(longlong, byteorder='little')

        return int.from_bytes(longlong, byteorder='big')

    def make_version(self, version):
        """Construct a version number from given bytes."""

        vx = version >> 16
        vy = (version >> 8) & 0xff
        vz = version & 0xff

        return '{}.{}.{}'.format(vx, vy, vz)

    def identify_file(self):
        """Identify if the given file is a single Mach-O or a
        Universal binary."""

        magic = self.get_int(ignore_endian=True)

        if magic in mdictionary.machos:
            return mdictionary.machos[magic]
        else:
            raise ValueError('Provided file has unrecognized magic: {}'.format(
                magic))

    def parse_macho_flags(self, flags):
        """Parse ``flags`` into list of readable flags."""

        output = []

        i = 0

        while i < 28:
            if (0x1 & (flags >> i)) == 0x1:
                if 2 ** i in mdictionary.flags:
                    output.append(mdictionary.flags[2 ** i])
                else:
                    self.add_abnormality('Unknown mach-o flag "{}".'.format(
                        2 ** i))

            i += 1

        return output

    def get_segment_entropy(self, m_offset, offset, size):
        """Determine byte-entropy for this segment."""

        old = self.__file.tell()

        self.__file.seek(m_offset + offset)
        #print("seeking to: " + str(m_offset + offset))

        entropy = self.calc_entropy(self.__file.read(size))

        self.__file.seek(old)

        return entropy

    def parse_section_attrs(self, attrs):
        """Parse section attributes."""

        output = []

        for a in mdictionary.section_attrs:
            if attrs & a == a:
                output.append(mdictionary.section_attrs[a])

        return output

    def parse_section_flags(self, output, flags):
        """Parse section flags into section type and attributes."""

        output['type'] = mdictionary.section_types[flags & 0xff]

        attrs = flags & 0xffffff00

        output['attrs'] = self.parse_section_attrs(attrs)

    def parse_section(self):
        """Parse section."""

        name = self.__file.read(16).decode().rstrip('\u0000')
        segname = self.__file.read(16).decode().rstrip('\u0000')
        addr = self.get_ll() if self.__is_64_bit else self.get_int()
        size = self.get_ll() if self.__is_64_bit else self.get_int()
        offset = self.get_int()
        align = self.get_int()
        reloff = self.get_int()
        nreloc = self.get_int()
        flags = self.get_int()
        r1 = self.get_int()
        r2 = self.get_int()
        r3 = self.get_int()

        #self.__file.read(12) if self.__is_64_bit else self.__file.read(8)

        output = {
            'name': name,
            'segname': segname,
            'addr': addr,
            'offset': offset,
            'align': align,
            'reloff': reloff,
            'nreloc': nreloc,
            'size': size,
            'r1': r1,
            'r2': r2,
            'r3': r3
        }

        self.parse_section_flags(output, flags)

        return output

    def parse_segment_flags(self, flags):
        """Parse segment flags into readable list."""

        output = []

        i = 1

        while i < 9:
            if flags & i == i:
                output.append(mdictionary.segment_flags[i])
            i <<= 1

        return output

    def parse_segment(self, m_offset, m_size, cmd, cmd_size):
        """Parse segment command."""

        name = self.__file.read(16).decode().rstrip('\u0000')
        vmaddr = self.get_ll() if self.__is_64_bit else self.get_int()
        vmsize = self.get_ll() if self.__is_64_bit else self.get_int()
        offset = self.get_ll() if self.__is_64_bit else self.get_int()
        segsize = self.get_ll() if self.__is_64_bit else self.get_int()
        maxprot = self.get_int()
        initprot = self.get_int()
        nsects = self.get_int()
        flags = self.get_int()

        maxprot = mdictionary.protections[maxprot & 0b111]
        initprot = mdictionary.protections[initprot & 0b111]

        entropy = self.get_segment_entropy(m_offset, offset, segsize)

        output = {
            'm_offset': m_offset,
            'cmd': cmd,
            'size': cmd_size,
            'name': name,
            'vmaddr': vmaddr,
            'vmsize': vmsize,
            'offset': offset,
            'segsize': segsize,
            'maxprot': maxprot,
            'initprot': initprot,
            'nsects': nsects,
            'entropy': entropy,
            'sects': []
        }

        sect_size = 80 if self.__is_64_bit else 68

        for _ in range(nsects):
            if self.__file.tell() + sect_size > m_offset + m_size:
                self.add_abnormality('Section at offset "{}" with size "{}" '
                                     'greater than mach-o size.'.format(
                                         self.__file.tell(), sect_size))

                break

            output['sects'].append(self.parse_section())

        output['flags'] = self.parse_segment_flags(flags)

        return output

    def parse_symtab(self, cmd, cmd_size):
        """Parse symbol table load command."""

        symoff = self.get_int()
        nsyms = self.get_int()
        stroff = self.get_int()
        strsize = self.get_int()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'symoff': symoff,
            'nsyms': nsyms,
            'stroff': stroff,
            'strsize': strsize
        }

        return output

    def parse_symseg(self, cmd, cmd_size):
        """Parse link-edit gdb symbol table info (obsolete)."""

        offset = self.get_int()
        size = self.get_int()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'offset': offset,
            'size': size
        }

        return output

    def parse_thread(self, cmd, cmd_size):
        """Parse thread load command."""

        state = self.get_int()
        count = self.get_int()

        self.__file.read(cmd_size - 16)  # skip thread_state objects.
        # TODO: parse them, definitions in <machine/thread_status.h>

        if state in mdictionary.thread_states:
            state = mdictionary.thread_states[state]
        else:
            self.add_abnormality('Invalid THREAD STATE FLAVOR "{}" at offset '
                                 '"{}".'.format(state, self.__file.tell() - 8))

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'state': state,
            'count': count
        }

        return output

    def parse_fvmlib(self, cmd, cmd_size):
        """Parse fvmlib load command."""

        offset = self.__file.tell() - 8

        self.__file.read(4)  # skip name offset

        minor_version = self.get_int()
        header_addr = self.get_int()
        name = self.get_string()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'name': name,
            'minor_version': self.make_version(minor_version),
            'header_addr': header_addr
        }

        self.__file.read(cmd_size - (self.__file.tell() - offset))

        return output

    def parse_ident(self, cmd, cmd_size):
        """Parse object identification info (obsolete)."""

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'strings': []
        }

        end = self.__file.tell() - 8 + cmd_size

        while self.__file.tell() < end:
            string = self.get_string()

            if string != '':
                output['strings'].append(string)

        return output

    def parse_fvmfile(self, cmd, cmd_size):
        """Parse fixed VM file inclusion (internal use)."""

        name = self.get_string()
        header_addr = self.get_int()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'name': name,
            'header_addr': header_addr
        }

        return output

    def parse_prepage(self, cmd, cmd_size):
        """Parse prepage command (internal use). Load command structure not
        found.
        """

        self.__file.read(cmd_size - 8)

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size
        }

        return output

    def parse_dysymtab(self, cmd, cmd_size):
        """Parse dynamic link-edit symbol table info."""

        ilocalsym = self.get_int()       # index to local symbols
        nlocalsym = self.get_int()       # number of local symbols
        iextdefsym = self.get_int()      # index to externally defined sybmols
        nextdefsym = self.get_int()      # number of externally defined symbols
        iundefsym = self.get_int()       # index to undefined symbols
        nundefsym = self.get_int()       # number of externally defined symbols
        tocoff = self.get_int()          # file offset to table of contents
        ntoc = self.get_int()            # number of module table entries
        modtaboff = self.get_int()       # file offset to module table
        nmodtab = self.get_int()         # number of module table entries
        extrefsymoff = self.get_int()    # offset to referenced symbol table
        nextrefsyms = self.get_int()     # number of referenced symbol table entries
        indirectsymoff = self.get_int()  # file offset to the indirect symbol table
        nindirectsyms = self.get_int()   # number of indirect symbol table entries
        extreloff = self.get_int()       # offset to external relocation entries
        nextrel = self.get_int()         # number of external relocation entries
        locreloff = self.get_int()       # offset to local relocation entries
        nlocrel = self.get_int()         # number of local relocation entries

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'ilocalsym': ilocalsym,
            'nlocalsym': nlocalsym,
            'iextdefsym': iextdefsym,
            'nextdefsym': nextdefsym,
            'iundefsym': iundefsym,
            'nundefsym': nundefsym,
            'tocoff': tocoff,
            'ntoc': ntoc,
            'modtaboff': modtaboff,
            'nmodtab': nmodtab,
            'extrefsymoff': extrefsymoff,
            'nextrefsyms': nextrefsyms,
            'indirectsymoff': indirectsymoff,
            'nindirectsyms': nindirectsyms,
            'extreloff': extreloff,
            'nextrel': nextrel,
            'locreloff': locreloff,
            'nlocrel': nlocrel
        }

        return output

    def parse_load_dylib(self, cmd, cmd_size):
        """Parse dylib load command."""

        offset = self.__file.tell() - 8

        self.__file.read(4)  # skip name offset

        timestamp = self.get_int()
        current_version = self.get_int()
        compatibility_version = self.get_int()
        name = self.get_string()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'name': name,
            'timestamp': datetime.fromtimestamp(timestamp).strftime(
                '%Y-%m-%d %H:%M:%S'),
            'current_version': self.make_version(current_version),
            'compatability_version': self.make_version(compatibility_version)
        }

        # skip padding
        self.__file.read(cmd_size - (self.__file.tell() - offset))

        return output

    def parse_load_dylinker(self, cmd, cmd_size):
        """Parse dylinker load command."""

        offset = self.__file.tell() - 8

        self.__file.read(4)  # skip name offset

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'name': self.get_string()
        }

        # skip padding
        self.__file.read(cmd_size - (self.__file.tell() - offset))

        return output

    def parse_prebound_dylib(self, cmd, cmd_size):
        """Parse prebound dylib load command.  An executable that is prebound to
        its dynamic libraries will have one of these for each library that the
        static linker used in prebinding.
        """

        name = self.get_string()
        nmodules = self.get_int()
        linked_modules = self.get_string()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'name': name,
            'nmodules': nmodules,
            'linked_modules': linked_modules
        }

        return output

    def parse_routines(self, cmd, cmd_size):
        """Parse routines load command. The routines command contains the
        address of the dynamic shared library initialization routine and an
        index into the module table for the module that defines the routine.
        """

        init_address = self.get_ll() if self.__is_64_bit else self.get_int()
        init_module = self.get_ll() if self.__is_64_bit else self.get_int()

        self.__file.read(48) if self.__is_64_bit else self.__file.read(24)

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'init_address': init_address,
            'init_module': init_module
        }

        return output

    def parse_sub_stuff(self, cmd, cmd_size):
        """Parse sub_* load command."""

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'name': self.get_string()
        }

        return output

    def parse_twolevel_hints(self, cmd, cmd_size):
        """Parse two-level hints load command."""

        offset = self.get_int()
        nhints = self.get_int()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'offset': offset,
            'nhints': nhints
        }

        return output

    def parse_prebind_cksum(self, cmd, cmd_size):
        """Parse prebind checksum load command."""

        cksum = self.get_int()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'cksum': cksum
        }

        return output

    def parse_uuid(self, cmd, cmd_size):
        """Parse UUID load command."""

        uuid = self.__file.read(16)

        if self.__is_little_endian:
            uuid = unpack('<16s', uuid)[0]

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'uuid': UUID(bytes=uuid).hex
        }

        return output

    def parse_linkedit_data(self, cmd, cmd_size):
        """Parse link-edit data load command."""

        dataoff = self.get_int()   # file offset of data in __LINKEDIT segment
        datasize = self.get_int()  # file size of data in __LINKEDIT segment

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'dataoff': dataoff,
            'datasize': datasize
        }

        return output

    def parse_encryption_info(self, cmd, cmd_size):
        """Parse encryption info load command. Contains the file offset and size
        of an encrypted segment.
        """

        cryptoff = self.get_int()
        cryptsize = self.get_int()
        cryptid = self.get_int()

        if cmd.endswith('64'):
            self.__file.read(4)  # skip padding

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'cryptoff': cryptoff,
            'cryptsize': cryptsize,
            'cryptid': cryptid
        }

        return output

    def parse_dyld_info(self, cmd, cmd_size):
        """Parse dyld info load command. contains the file offsets and sizes of
        the new compressed form of the information dyld needs to load the
        image. This information is used by dyld on Mac OS X 10.6 and later. All
        information pointed to by this command is encoded using byte streams,
        so no endian swapping is needed to interpret it.
        """

        rebase_off = self.get_int()      # file offset to rebase info
        rebase_size = self.get_int()     # size of rebase info
        bind_off = self.get_int()        # file offset to binding info
        bind_size = self.get_int()       # size of binding info
        weak_bind_off = self.get_int()   # file offset to weak binding info
        weak_bind_size = self.get_int()  # size of weak binding info
        lazy_bind_off = self.get_int()   # file offset to lazy binding info
        lazy_bind_size = self.get_int()  # size of lazy binding info
        export_off = self.get_int()      # file offset to export info
        export_size = self.get_int()     # size of offset info

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'rebase_off': rebase_off,
            'rebase_size': rebase_size,
            'bind_off': bind_off,
            'bind_size': bind_size,
            'weak_bind_off': weak_bind_off,
            'weak_bind_size': weak_bind_size,
            'lazy_bind_off': lazy_bind_off,
            'lazy_bind_size': lazy_bind_size,
            'export_off': export_off,
            'export_size': export_size
        }

        return output

    def parse_version_min_os(self, cmd, cmd_size):
        """Parse minimum OS version load command."""

        version = self.get_int()
        sdk = self.get_int()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'version': self.make_version(version),
            'sdk': self.make_version(sdk)
        }

        return output

    def parse_source_version(self, cmd, cmd_size):
        """Parse source version load command."""

        version = self.get_ll()  # A.B.C.D.E packed as a24.b10.c10.d10.e10

        mask = 0b1111111111  # 10 bit mask for B, C, D, and E

        a = version >> 40
        b = (version >> 30) & mask
        c = (version >> 20) & mask
        d = (version >> 10) & mask
        e = version & mask

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'version': '{}.{}.{}.{}.{}'.format(a, b, c, d, e)
        }

        return output

    def parse_linker_option(self, cmd, cmd_size):
        """Parse linker options load command."""

        start = self.__file.tell() - 8

        count = self.get_int()

        linker_options = []

        for _ in range(count):
            linker_options.append(self.get_string())

        self.__file.read(cmd_size - (self.__file.tell() - start))

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'count': count,
            'linker_options': linker_options
        }

        return output

    def parse_rpath(self, cmd, cmd_size):
        """Parse rpath load command."""

        offset = self.__file.tell() - 8

        self.__file.read(4)  # skip path offset

        path = self.get_string()

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'path': path
        }

        self.__file.read(cmd_size - (self.__file.tell() - offset))

        return output

    def parse_main(self, cmd, cmd_size):
        """Parse main load command."""

        entryoff = self.get_ll()   # file (__TEXT) offset of main()
        stacksize = self.get_ll()  # if not zero, initialize stack size

        output = {
            'cmd': cmd,
            'cmd_size': cmd_size,
            'entryoff': entryoff,
            'stacksize': stacksize
        }

        return output

    def parse_lcs(self, offset, size, nlcs, slcs):
        """Determine which load commands are present and parse each one
        accordingly. Return as a list.

        Load command structures found in '/usr/include/mach-o/loader.h'.
        """

        self.__macho['lcs'] = []
        self.segments = []

        for _ in range(nlcs):
            cmd = self.get_int()       # Load command type
            cmd_size = self.get_int()  # Size of load command

            if self.__is_64_bit and cmd_size % 8 != 0:
                raise ValueError('Load command size "{}" for 64-bit mach-o at '
                                 'offset "{}" is not divisible by 8.'.format(
                                    cmd_size, self.__file.tell() - 4))
            elif cmd_size % 4 != 0:
                raise ValueError('Load command size "{}" for 32-bit mach-o at '
                                 'offset "{}" is not divisible by 4.'.format(
                                    cmd_size, self.__file.tell() - 4))

            if cmd in mdictionary.loadcommands:
                cmd = mdictionary.loadcommands[cmd]
            else:
                self.add_abnormality('Unknown load command "{}" at offset '
                                     '"{}".'.format(
                                         cmd, self.__file.tell() - 8))

                self.__file.read(cmd_size - 8)  # skip load command

            if cmd == 'SEGMENT' or cmd == 'SEGMENT_64':
                #self.segments.append((offset, size, cmd, cmd_size))
                #self.__macho['lcs'].append(
                    
                parsed = self.parse_segment(offset, size, cmd, cmd_size)
                self.__macho['lcs'].append(parsed)
                self.segments.append(parsed)
            elif cmd == 'SYMTAB':
                self.symtab = self.parse_symtab(cmd, cmd_size)
                self.__macho['lcs'].append(self.symtab)
            elif cmd == 'SYMSEG':
                self.__macho['lcs'].append(self.parse_symseg(cmd, cmd_size))
            elif cmd in ('THREAD', 'UNIXTHREAD'):
                self.__macho['lcs'].append(self.parse_thread(cmd, cmd_size))
            elif cmd in ('LOADFVMLIB', 'IDFVMLIB'):
                self.__macho['lcs'].append(self.parse_fvmlib(cmd, cmd_size))
            elif cmd == 'IDENT':
                self.__macho['lcs'].append(self.parse_ident(cmd, cmd_size))
            elif cmd == 'FVMFILE':
                self.__macho['lcs'].append(self.parse_fvmfile(cmd, cmd_size))
            elif cmd == 'PREPAGE':
                self.__macho['lcs'].append(self.parse_prepage(cmd, cmd_size))
            elif cmd == 'DYSYMTAB':
                self.__macho['lcs'].append(self.parse_dysymtab(cmd, cmd_size))
            elif cmd in ('LOAD_DYLIB', 'ID_DYLIB', 'LAZY_LOAD_DYLIB',
                         'LOAD_WEAK_DYLIB', 'REEXPORT_DYLIB',
                         'LOAD_UPWARD_DYLIB'):
                self.__macho['lcs'].append(
                    self.parse_load_dylib(cmd, cmd_size))
            elif cmd in ('LOAD_DYLINKER', 'ID_DYLINKER', 'DYLD_ENVIRONMENT'):
                self.__macho['lcs'].append(
                    self.parse_load_dylinker(cmd, cmd_size))
            elif cmd == 'PREBOUND_DYLIB':
                self.__macho['lcs'].append(
                    self.parse_prebound_dylib(cmd, cmd_size))
            elif cmd in ('ROUTINES', 'ROUTINES_64'):
                self.__macho['lcs'].append(self.parse_routines(cmd, cmd_size))
            elif cmd in ('SUB_FRAMEWORK', 'SUB_UMBRELLA', 'SUB_CLIENT',
                         'SUB_LIBRARY'):
                self.__macho['lcs'].append(self.parse_sub_stuff(cmd, cmd_size))
            elif cmd == 'TWOLEVEL_HINTS':
                self.__macho['lcs'].append(
                    self.parse_twolevel_hints(cmd, cmd_size))
            elif cmd == 'PREBIND_CKSUM':
                self.__macho['lcs'].append(
                    self.parse_prebind_cksum(cmd, cmd_size))
            elif cmd == 'UUID':
                self.__macho['lcs'].append(self.parse_uuid(cmd, cmd_size))
            elif cmd in ('CODE_SIGNATURE', 'SEGMENT_SPLIT_INFO',
                         'FUNCTION_STARTS', 'DATA_IN_CODE',
                         'DYLIB_CODE_SIGN_DRS', 'LINKER_OPTIMIZATION_HINT'):
                self.__macho['lcs'].append(
                    self.parse_linkedit_data(cmd, cmd_size))
            elif cmd in ('ENCRYPTION_INFO', 'ENCRYPTION_INFO_64'):
                self.__macho['lcs'].append(
                    self.parse_encryption_info(cmd, cmd_size))
            elif cmd in ('DYLD_INFO', 'DYLD_INFO_ONLY'):
                self.dyld_info = self.parse_dyld_info(cmd, cmd_size)
                self.__macho['lcs'].append(self.dyld_info)
            elif cmd in ('VERSION_MIN_MACOSX', 'VERSION_MIN_IPHONEOS',
                         'VERSION_MIN_WATCHOS', 'VERSION_MIN_TVOS'):
                self.__macho['lcs'].append(
                    self.parse_version_min_os(cmd, cmd_size))
            elif cmd == 'SOURCE_VERSION':
                self.__macho['lcs'].append(
                    self.parse_source_version(cmd, cmd_size))
            elif cmd == 'LINKER_OPTION':
                self.__macho['lcs'].append(
                    self.parse_linker_option(cmd, cmd_size))
            elif cmd == 'RPATH':
                self.__macho['lcs'].append(self.parse_rpath(cmd, cmd_size))
            elif cmd == 'MAIN':
                self.__macho['lcs'].append(self.parse_main(cmd, cmd_size))

    def parse_syms(self, offset, size, lc_symtab):
        """Parse symbol and string tables.

        Symbol table format found in:
        /usr/include/mach-o/nlist.h
        /usr/include/mach-o/stab.h
        """

        # Check if symbol table offset is within mach-o
        if lc_symtab['symoff'] > size:
            self.add_abnormality('Symbol table at offset "{}" out of '
                                 'bounds.'.format(
                                     offset + lc_symtab['symoff']))

            return

        true_offset = offset + lc_symtab['symoff']  # beginning of symbol table

        symbol_size = 16 if self.__is_64_bit else 12

        self.__file.seek(true_offset)

        entropy = self.calc_entropy(self.__file.read(
            lc_symtab['nsyms'] * symbol_size))

        if entropy >= 0.8:
            self.add_abnormality('Symbol table with entropy of "{}" is '
                                 'probably packed. Not attempting to '
                                 'parse.'.format(entropy))

            return

        if lc_symtab['symoff'] + lc_symtab['nsyms'] * symbol_size > size:
            self.add_abnormality('Symbol table at offset "{}" partially out '
                                 'of bounds. Attempting to parse as many '
                                 'symbols as possible.'.format(true_offset))

        self.__file.seek(true_offset)  # jump to beginning of symbol table

        self.__macho['symtab'] = []

        for _ in range(lc_symtab['nsyms']):
            if self.__file.tell() + symbol_size > offset + size:
                break

            n_strx = self.get_int()
            n_type = int(self.__file.read(1).hex(), 16)
            n_sect = int(self.__file.read(1).hex(), 16)
            n_desc = int(self.__file.read(2).hex(), 16)

            n_value = self.get_ll() if self.__is_64_bit else self.get_int()

            symbol = {
                'n_strx': n_strx,
                'n_sect': n_sect,
                'n_desc': n_desc,
                'n_value': n_value
            }

            if n_type >= 32:
                if n_type in mdictionary.stabs:
                    symbol['stab'] = mdictionary.stabs[n_type]
                else:
                    self.add_abnormality(
                        'Unknown stab type "{}" at offset "{}".'.format(
                            n_type, self.__file.tell() - symbol_size + 4))
            else:
                n_pext = n_type & 0x10  # private external symbol flag
                n_ext = n_type & 0x01   # external symbol flag
                n_type = n_type & 0x0e  # symbol type

                if n_type in mdictionary.n_types:
                    n_type = mdictionary.n_types[n_type]
                else:
                    self.add_abnormality(
                        'Unknown N_TYPE "{}" at offset "{}".'.format(
                            n_type, self.__file.tell() - symbol_size + 4))

                if self.__is_little_endian:
                    dylib = n_desc & 0x0f
                    ref = (n_desc >> 8) & 0xff
                else:
                    dylib = (n_desc >> 8) & 0xff
                    ref = n_desc & 0x0f

                symbol['pext'] = n_pext
                symbol['n_type'] = n_type
                symbol['ext'] = n_ext
                symbol['dylib'] = dylib
                symbol['ref'] = ref

            self.__macho['symtab'].append(symbol)

    def parse_strings(self, offset, size, lc_symtab):
        """Parse string table."""

        # Check is string table offset is within mach-o
        if lc_symtab['stroff'] > size:
            self.add_abnormality(
                'String table at offset "{}" greater than mach-o size.'.format(
                    offset + lc_symtab['stroff']))

            return

        true_offset = offset + lc_symtab['stroff']

        self.__file.seek(true_offset)
        #self.strtab = bytes(self.__file.read(lc_symtab['strsize']))
        #self.__file.seek(true_offset)

        entropy = self.calc_entropy(self.__file.read(lc_symtab['strsize']))

        if entropy >= 0.8:
            self.add_abnormality('String table with entropy of "{}" is '
                                 'probably packed. Not attempting to '
                                 'parse.'.format(entropy))

            return

        if true_offset + lc_symtab['strsize'] > offset + size:
            self.add_abnormality('String Table at offset "{}" partially out '
                                 'of bounds. Attempting to parse as many '
                                 'strings as possible.'.format(true_offset))

        self.__macho['strtab'] = []

        self.__file.seek(true_offset)

        while self.__file.tell() < true_offset + lc_symtab['strsize']:
            try:
                string = self.get_string()

                if string != '':
                    self.__macho['strtab'].append(string)
            except:
                break

    def parse_imports(self, offset, size, lc_symtab, lc_dysymtab=None,
                      lc_dylibs=None):
        """Parse undefined external symbols (imports) out of the symbol and
        string tables.
        """

        self.__macho['imports'] = []

        true_offset = offset + lc_symtab['stroff']

        undef_syms = None

        if lc_dysymtab is not None:  # Use symtab layout info from DYSYMTAB
            i_undef = lc_dysymtab['nlocalsym'] + lc_dysymtab['nextdefsym'] - 1
            j_undef = i_undef + lc_dysymtab['nundefsym']

            undef_syms = self.__macho['symtab'][i_undef:j_undef]
        else:  # Find undefined symbols manually by checking n_type
            undef_syms = filter(lambda sym: sym['n_type'] in ('UNDF', 'PBUD'),
                                self.__macho['symtab'])

        for sym in undef_syms:
            self.__file.seek(true_offset + sym['n_strx'])

            value = self.get_string()

            if lc_dylibs is not None:  # If created with two-level namespace
                dylib = sym['dylib']

                if dylib == 0:
                    dylib = 'SELF_LIBRARY'
                elif dylib == 254:
                    dylib = 'DYNAMIC_LOOKUP'
                elif dylib == 255:
                    dylib = 'EXECUTABLE'
                elif dylib > len(lc_dylibs):
                    dylib = f'{dylib} (OUT_OF_RANGE)'
                else:
                    dylib = lc_dylibs[dylib - 1]['name']

                self.__macho['imports'].append((value, dylib))
            else:
                self.__macho['imports'].append(value)

    def parse_certs(self, sig_offset, index_offset):
        """Parse X509 certificates out of code signature."""

        prev = self.__file.tell()

        true_offset = sig_offset + index_offset

        self.__file.seek(true_offset)

        magic = self.get_int(ignore_endian=True)

        if magic != mdictionary.signatures['BLOBWRAPPER']:
            self.add_abnormality('Bad magic "{}" for certificate blob wrapper '
                                 'at offset "{}".'.format(magic, true_offset))

            return []

        # subtract 8 to ignore magic and size fields
        size = self.get_int(ignore_endian=True) - 8

        if size <= 0:
            self.add_abnormality('Non-positive CMS size "{}" at offset '
                                 '"{}".'.format(size, self.__file.tell() - 4))

            return []

        signed_data = ContentInfo.load(self.__file.read(size))['content']

        self.__macho['code_signature']['certs'] = []

        for cert in signed_data['certificates']:
            cert = cert.chosen

            if self.__extract_certs:
                c_bytes = cert.dump()
                open(hashlib.md5(c_bytes).hexdigest(), 'wb').write(c_bytes)

            subject = {}

            for rdn in cert.subject.chosen:
                name = rdn[0]['type'].human_friendly
                value = rdn[0]['value']

                if name == 'Country':
                    subject['country'] = str(value.chosen)
                elif name == 'Organization':
                    subject['org'] = str(value.chosen)
                elif name == 'Organizational Unit':
                    subject['org_unit'] = str(value.chosen)
                elif name == 'Common Name':
                    subject['common_name'] = str(value.chosen)
                else:
                    if isinstance(value, DirectoryString):
                        subject[name] = str(value.chosen)
                    else:
                        subject[name] = str(value.parsed)

            issuer = {}

            for rdn in cert.issuer.chosen:
                name = rdn[0]['type'].human_friendly
                value = rdn[0]['value']

                if name == 'Country':
                    issuer['country'] = str(value.chosen)
                elif name == 'Organization':
                    issuer['org'] = str(value.chosen)
                elif name == 'Organizational Unit':
                    issuer['org_unit'] = str(value.chosen)
                elif name == 'Common Name':
                    issuer['common_name'] = str(value.chosen)
                else:
                    if isinstance(value, DirectoryString):
                        issuer[name] = str(value.chosen)
                    else:
                        issuer[name] = str(value.parsed)

            certificate = {
                'subject': subject,
                'issuer': issuer,
                'serial': cert.serial_number,
                'is_ca': cert.ca
            }

            self.__macho['code_signature']['certs'].append(certificate)

        self.__file.seek(prev)

    def parse_codedirectory(self, sig_offset, index_offset):
        """Parse code directory from code signature."""

        prev = self.__file.tell()

        true_offset = sig_offset + index_offset

        self.__file.seek(true_offset)

        magic = self.get_int(ignore_endian=True)

        if magic != mdictionary.signatures['CODEDIRECTORY']:
            self.add_abnormality('Bad magic "{}" for code directory at offset '
                                 '"{}".'.format(magic, self.__file.tell() - 4))

            return

        size = self.get_int(ignore_endian=True)
        version = self.get_int(ignore_endian=True)
        # TODO: not sure how to parse flags yet...
        flags = self.get_int(ignore_endian=True)
        hash_offset = self.get_int(ignore_endian=True)
        ident_offset = self.get_int(ignore_endian=True)
        n_special_slots = self.get_int(ignore_endian=True)
        n_code_slots = self.get_int(ignore_endian=True)
        code_limit = self.get_int(ignore_endian=True)
        hash_size = int(self.__file.read(1).hex(), 16)
        hash_type = mdictionary.hashes[int(self.__file.read(1).hex(), 16)]

        if version >= 0x20200:
            platform = int(self.__file.read(1).hex(), 16)
        else:
            self.__file.read(1)  # skip spare1

        page_size = int(round(exp(
            int(self.__file.read(1).hex(), 16) * log(2))))

        self.__file.read(4)  # skip spare2

        if version >= 0x20100:
            scatter_offset = self.get_int(ignore_endian=True)
        if version >= 0x20200:
            team_id_offset = self.get_int(ignore_endian=True)
            self.__file.seek(true_offset + team_id_offset)
            team_id = self.get_string()

        self.__file.seek(true_offset + ident_offset)

        identity = self.get_string()

        self.__macho['code_signature']['codedirectory'] = {
            'size': size,
            'version': version,
            'flags': flags,
            'hash_offset': hash_offset,
            'n_special_slots': n_special_slots,
            'n_code_slots': n_code_slots,
            'code_limit': code_limit,
            'hash_size': hash_size,
            'hash_type': hash_type,
            'page_size': page_size,
            'identity': identity,
            'hashes': []
        }

        if version >= 0x20100:
            self.__macho['code_signature']['codedirectory']['scatter_offset'] = scatter_offset
        if version >= 0x20200:
            self.__macho['code_signature']['codedirectory']['platform'] = platform
            self.__macho['code_signature']['codedirectory']['team_id_offset'] = team_id_offset
            self.__macho['code_signature']['codedirectory']['team_id'] = team_id

        self.__file.seek(
            true_offset + hash_offset - n_special_slots * hash_size)

        count = n_special_slots + n_code_slots

        for _ in range(count):
            self.__macho['code_signature']['codedirectory']['hashes'].append(
                self.__file.read(hash_size).hex())

        self.__file.seek(prev)

    def get_oid(self, db, p):
        """OID parser implementation from:

        http://opensource.apple.com/source/Security/Security-57337.20.44/
        OSX/libsecurity_cdsa_utilities/lib/cssmdata.cpp
        """

        q = 0

        while True:
            q = q * 128 + (db[p] & ~0x80)

            if p < len(db) and db[p] & 0x80:
                p += 1
            else:
                p += 1
                break

        return q, p

    def to_oid(self, length):
        """Convert bytes to correct OID."""

        if length == 0:
            return ''

        data_bytes = [
            int(self.__file.read(1).hex(), 16) for i in range(length)
        ]

        p = 0

        # first byte is composite (q1, q2)
        oid1, p = self.get_oid(data_bytes, p)

        q1 = min(oid1 / 40, 2)

        data = str(q1) + '.' + str(oid1 - q1 * 40)

        while p < len(data_bytes):
            d, p = self.get_oid(data_bytes, p)
            data += '.' + str(d)

        self.__file.read(-length & 3)

        return data

    def parse_entitlement(self, sig_offset, index_offset):
        """Parse entitlement from code signature."""

        prev = self.__file.tell()

        true_offset = sig_offset + index_offset

        self.__file.seek(true_offset)

        magic = self.get_int(ignore_endian=True)

        if magic != mdictionary.signatures['ENTITLEMENT']:
            self.add_abnormality('Bad magic "{}" for entitlement at offset '
                                 '"{}".'.format(magic, self.__file.tell() - 4))

            return

        # size of plist minus magic and size values
        size = self.get_int(ignore_endian=True) - 8

        try:
            plist = loads(self.__file.read(size))
        except Exception as exc:
            plist = {}
            self.add_abnormality('Unable to parse plist at offset "{}". '
                                 '{}.'.format(self.__file.tell() - size, exc))

        if 'entitlements' not in self.__macho['code_signature']:
            self.__macho['code_signature']['entitlements'] = []

        self.__macho['code_signature']['entitlements'].append({
            'size': size,
            'plist': plist
        })

        self.__file.seek(prev)

    def parse_data(self):
        """Parse data for requirement expression."""

        length = self.get_int(ignore_endian=True)

        data = self.__file.read(length)

        self.__file.read(-length & 3)  # skip padding

        return data

    def parse_match(self):
        """Parse match for requirement expression."""

        match_type = self.get_int(ignore_endian=True)

        if match_type in mdictionary.matches:
            match_type = mdictionary.matches[match_type]

        if match_type == 'matchExists':
            return ' /* exists */'
        elif match_type == 'matchEqual':
            return ' = "{}"'.format(self.parse_data().decode())
        elif match_type == 'matchContains':
            return ' ~ "{}"'.format(self.parse_data().decode())
        elif match_type == 'matchBeginsWith':
            return ' = "{}*"'.format(self.parse_data().decode())
        elif match_type == 'matchEndsWith':
            return ' = "*{}"'.format(self.parse_data().decode())
        elif match_type == 'matchLessThan':
            return ' < {}'.format(int(self.parse_data(), 16))
        elif match_type == 'matchGreaterThan':
            return ' > {}'.format(int(self.parse_data(), 16))
        elif match_type == 'matchLessEqual':
            return ' <= {}'.format(int(self.parse_data(), 16))
        elif match_type == 'matchGreaterEqual':
            return ' >= {}'.format(int(self.parse_data(), 16))
        else:
            return ' UNKNOWN MATCH TYPE "{}"'.format(match_type)

    def parse_expression(self, in_or=False):
        """Parse requirement expression. Recurse if necessary"""

        # Zero out flags in high byte (TODO: Look into flags field)
        operator = self.get_int(ignore_endian=True)
        operator = mdictionary.operators[operator & 0xfff]

        expression = ''

        if operator == 'False':
            expression += 'never'
        elif operator == 'True':
            expression += 'always'
        elif operator == 'Ident':
            expression += 'identity "{}"'.format(self.parse_data().decode())
        elif operator == 'AppleAnchor':
            expression += 'anchor apple'
        elif operator == 'AppleGenericAnchor':
            expression += 'anchor apple generic'
        elif operator == 'AnchorHash':
            cert_slot = self.get_int(ignore_endian=True)

            if cert_slot in mdictionary.cert_slots:
                cert_slot = mdictionary.cert_slots[cert_slot]

            expression += 'certificate {} = {}'.format(
                cert_slot, self.parse_data().decode())
        elif operator == 'InfoKeyValue':
            expression += 'info[{}] = "{}"'.format(
                self.parse_data().decode(), self.parse_data().decode())
        elif operator == 'And':
            if in_or:
                expression += '({} and {})'.format(
                    self.parse_expression(), self.parse_expression())
            else:
                expression += '{} and {}'.format(
                    self.parse_expression(), self.parse_expression())
        elif operator == 'Or':
            if in_or:
                expression += '({} or {})'.format(
                    self.parse_expression(in_or=True),
                    self.parse_expression(in_or=True))
            else:
                expression += '{} or {}'.format(
                    self.parse_expression(in_or=True),
                    self.parse_expression(in_or=True))
        elif operator == 'Not':
            expression += '! {}'.format(self.parse_expression())
        elif operator == 'CDHash':
            expression += 'cdhash {}'.format(self.parse_data().decode())
        elif operator == 'InfoKeyField':
            expression += 'info[{}]{}'.format(
                self.parse_data().decode(), self.parse_match())
        elif operator == 'EntitlementField':
            expression += 'entitlement[{}]{}'.format(
                self.parse_data().decode(), self.parse_match())
        elif operator == 'CertField':
            cert_slot = self.get_int(ignore_endian=True)

            if cert_slot in mdictionary.cert_slots:
                cert_slot = mdictionary.cert_slots[cert_slot]

            expression += 'certificate {}[{}]{}'.format(
                cert_slot, self.parse_data().decode(), self.parse_match())
        elif operator == 'CertGeneric':
            cert_slot = self.get_int(ignore_endian=True)

            if cert_slot in mdictionary.cert_slots:
                cert_slot = mdictionary.cert_slots[cert_slot]

            length = self.get_int(ignore_endian=True)

            expression += 'certificate {}[field.{}]{}'.format(
                cert_slot, self.to_oid(length), self.parse_match())
        elif operator == 'CertPolicy':
            cert_slot = self.get_int(ignore_endian=True)

            if cert_slot in mdictionary.cert_slots:
                cert_slot = mdictionary.cert_slots[cert_slot]

            expression += 'certificate {}[policy.{}]{}'.format(
                cert_slot, self.parse_data().decode(), self.parse_match())
        elif operator == 'TrustedCert':
            cert_slot = self.get_int(ignore_endian=True)

            if cert_slot in mdictionary.cert_slots:
                cert_slot = mdictionary.cert_slots[cert_slot]

            expression += 'certificate {} trusted'.format(cert_slot)
        elif operator == 'TrustedCerts':
            expression += 'anchor trusted'
        elif operator == 'NamedAnchor':
            expression += 'anchor apple {}'.format(self.parse_data().decode())
        elif operator == 'NamedCode':
            expression += '({})'.format(self.parse_data().decode())
        elif operator == 'Platform':
            platform = self.get_int(ignore_endian=True)
            expression += 'platform = {}'.format(platform)

        return expression

    def parse_requirement(self, reqs_offset, req_type, req_offset):
        """Parse single requirement from code signature."""

        prev = self.__file.tell()

        true_offset = reqs_offset + req_offset

        self.__file.seek(true_offset)

        magic = self.get_int(ignore_endian=True)

        if magic != mdictionary.signatures['REQUIREMENT']:
            self.add_abnormality('Bad magic "{}" for requirement at offset '
                                 '"{}".'.format(magic, self.__file.tell() - 4))

            return

        self.__file.read(8)  # skip size and kind fields
        # (TODO: look into ``kind`` field)

        self.__macho['code_signature']['requirements'].append({
            'req_type': req_type,
            'req_offset': req_offset,
            'expression': self.parse_expression()
        })

        self.__file.seek(prev)

    def parse_requirements(self, sig_offset, index_offset):
        """Parse requirements from code signature."""

        prev = self.__file.tell()

        true_offset = sig_offset + index_offset

        self.__file.seek(true_offset)

        magic = self.get_int(ignore_endian=True)

        if magic != mdictionary.signatures['REQUIREMENTS']:
            self.add_abnormality('Bad magic "{}" for requirements at offset '
                                 '"{}".'.format(magic, self.__file.tell() - 4))

            return

        self.__file.read(4)  # skip size field

        count = self.get_int(ignore_endian=True)

        self.__macho['code_signature']['requirements'] = []

        for _ in range(count):
            req_type = self.get_int(ignore_endian=True)
            req_type = mdictionary.requirements[req_type]

            req_offset = self.get_int(ignore_endian=True)

            self.parse_requirement(true_offset, req_type, req_offset)

        self.__file.seek(prev)

    def parse_sig(self, offset, size, lc_codesig):
        """Parse code signature in its entirety."""

        if lc_codesig['dataoff'] + lc_codesig['datasize'] > size:
            self.add_abnormality('CODE_SIGNATURE at offset "{}" with size '
                                 '"{}" greater than mach-o size.'.format(
                                     offset + lc_codesig['dataoff'],
                                     lc_codesig['datasize']))

            return

        true_offset = offset + lc_codesig['dataoff']

        self.__file.seek(true_offset)

        magic = self.get_int(ignore_endian=True)

        if magic != mdictionary.signatures['EMBEDDED_SIGNATURE']:
            self.add_abnormality('Bad magic "{}" for embedded signature at '
                                 'offset "{}".'.format(magic, true_offset))

            return

        self.__macho['code_signature'] = {}

        size = self.get_int(ignore_endian=True)
        count = self.get_int(ignore_endian=True)

        for _ in range(count):
            index_type = self.get_int(ignore_endian=True)

            if index_type in mdictionary.indeces:
                index_type = mdictionary.indeces[index_type]
            else:
                self.add_abnormality('Unknown code signature index type "{}" '
                                     'at offset "{}".'.format(
                                         index_type, self.__file.tell() - 4))

                self.__file.read(4)  # skip offset
                continue

            index_offset = self.get_int(ignore_endian=True)

            if index_type == 'SignatureSlot':
                self.parse_certs(true_offset, index_offset)
            elif index_type == 'CodeDirectorySlot':
                self.parse_codedirectory(true_offset, index_offset)
            elif index_type == 'EntitlementSlot':
                self.parse_entitlement(true_offset, index_offset)
            elif index_type == 'RequirementsSlot':
                self.parse_requirements(true_offset, index_offset)

    def parse_macho(self, offset, size):
        """Parse mach-o binary, possibly contained within a
        universal binary.
        """

        if size is None:
            self.__file.seek(0, SEEK_END)  # find the end of the file
            size = self.__file.tell()

        # jump to the location of this mach-o within the file
        self.__file.seek(offset)

        identity = self.identify_file()
        self.__is_64_bit = identity[0]
        self.__is_little_endian = identity[1]

        cputype = self.get_int()   # CPU type
        subtype = self.get_int()   # CPU sub-type
        filetype = self.get_int()  # Mach-o file type
        nlcs = self.get_int()      # Number of load commands
        slcs = self.get_int()      # Size of load commands
        flags = self.get_int()     # Mach-o flags

        if self.__is_64_bit:
            self.__file.read(4)  # skip padding

        if cputype in mdictionary.cputypes:
            if subtype in mdictionary.cputypes[cputype]:
                subtype = mdictionary.cputypes[cputype][subtype]
            else:
                self.add_abnormality('Unknown SUBTYPE "{}" for CPUTYPE "{}" '
                                     'at offset "{}".'.format(
                                         subtype, cputype, offset + 8))

            cputype = mdictionary.cputypes[cputype][-2]
        else:
            raise ValueError('Unknown or unsupported CPUTYPE "{}" at offset '
                             '"{}".'.format(cputype, offset + 4))

        if filetype in mdictionary.filetypes:
            filetype = mdictionary.filetypes[filetype]
        else:
            self.add_abnormality('Unknown FILETYPE "{}" at offset '
                                 '"{}".'.format(filetype, offset + 12))

        flags = self.parse_macho_flags(flags)

        self.__macho['cputype'] = cputype
        self.__macho['subtype'] = subtype
        self.__macho['filetype'] = filetype
        self.__macho['nlcs'] = nlcs
        self.__macho['slcs'] = slcs
        self.__macho['flags'] = flags

        # Parse load commands
        self.parse_lcs(offset, size, nlcs, slcs)

        lcs = list(map(lambda x: x['cmd'], self.__macho['lcs']))

        # Check for symbol and strings tables and parse if present
        if 'SYMTAB' in lcs:
            lc_symtab = self.__macho['lcs'][lcs.index('SYMTAB')]

            self.parse_syms(offset, size, lc_symtab)
            self.parse_strings(offset, size, lc_symtab)

        # If symbol and strings tables were parsed, parse imports
        if 'symtab' in self.__macho and 'strtab' in self.__macho:
            lc_dysymtab = None
            lc_dylibs = None

            # Check for presence of DYSYMTAB load command and, if present, use
            # it to parse undefined external symbols (imports). Otherwise, find
            # imports manually.
            if 'DYSYMTAB' in lcs:
                lc_dysymtab = self.__macho['lcs'][lcs.index('DYSYMTAB')]
                self.dysymtab = lc_dysymtab

            # Check if the static linker used the two-level namespace feature.
            # If so, pass in the list of dynamic libraries (dylibs) given in
            # the 'DYLIB' load commands.
            if 'TWOLEVEL' in self.__macho['flags']:
                lc_dylibs = list(filter(lambda x: x['cmd'].endswith('DYLIB'),
                                        self.__macho['lcs']))

            self.parse_imports(offset, size, lc_symtab,
                               lc_dysymtab=lc_dysymtab, lc_dylibs=lc_dylibs)

        # Check for a code signature and parse if present
        if 'CODE_SIGNATURE' in lcs:
            lc_codesig = self.__macho['lcs'][lcs.index('CODE_SIGNATURE')]

            #self.parse_sig(offset, size, lc_codesig)

        #self.__macho['strtab'] = None
        #self.__macho['symtab'] = None
        self.__macho['imports'] = None


        return self.__macho

    def parse_universal(self):
        """Parses universal binary."""

        self.__output['universal'] = {
            'machos': []
        }

        # number of mach-o's contained in this binary
        n_machos = self.get_int(ignore_endian=True)

        for i in range(n_machos):
            self.__file.read(8)  # skip cputype and subtype fields

            offset = self.get_int(ignore_endian=True)
            size = self.get_int(ignore_endian=True)

            self.__file.read(4)  # skip align field

            prev = self.__file.tell()
            self.parse_macho(offset, size)
            self.__file.seek(prev)

            self.__output['universal']['machos'].append(self.__macho.copy())
            self.__macho.clear()


    def u_get_offset(self, cpu_type = None, uni_index = None):
        self.__file.seek(0)  # return to beginning of file

        if self.__file.read(4) != b'\xca\xfe\xba\xbe':
            # Throw a fit
            print("NOT A UNI MACHO???")

        n_machos = self.get_int(ignore_endian=True)

        for i in range(n_machos):
            self.__file.read(8)  # skip cputype and subtype fields

            offset = self.get_int(ignore_endian=True)
            size = self.get_int(ignore_endian=True)

            self.__file.read(4)  # skip align field

            # Read the cpu type and subtype in the macho
            old = self.__file.tell()
            self.__file.seek(offset)
            identity = self.identify_file()
            self.__is_64_bit = identity[0]
            self.__is_little_endian = identity[1]

            cputype = self.get_int()   # CPU type
            subtype = self.get_int()   # CPU sub-type

            if cputype in mdictionary.cputypes:
                if subtype in mdictionary.cputypes[cputype]:
                    subtype = mdictionary.cputypes[cputype][subtype]
            else:
                print("UNKNOWN CPU TYPE: " + str(cputype))

            cputype = mdictionary.cputypes[cputype][-2]

            #print(f"CPU TYPE: {cputype} SUBTYPE: {subtype}")

            self.__file.seek(old)
            if i == uni_index or cpu_type == cputype:
                return offset, size

            #prev = self.__file.tell()

            #self.parse_macho(offset, size)
            #self.__file.seek(prev)

            #self.__output['universal']['machos'].append(self.__macho.copy())
            #self.__macho.clear()


    def parse_file(self):
        """Determines characteristics about the entire file and begins
        to parse.
        """

        contents = self.__file.read()

        self.__output['size'] = len(contents)

        self.__output['hashes'] = {
            'md5': hashlib.md5(contents).hexdigest(),
            'sha1': hashlib.sha1(contents).hexdigest(),
            'sha256': hashlib.sha256(contents).hexdigest()
        }

        self.__file.seek(0)  # return to beginning of file

        if self.__file.read(4) == b'\xca\xfe\xba\xbe':
            self.parse_universal()
        else:
            self.parse_macho(0, self.__output['size'])
            self.__output['macho'] = self.__macho

    def parse(self, certs: bool=False, out=None):
        """Parse Mach-O file at given path, and either return a dict
        or write output to provided file.
        """

        self.__extract_certs = certs

        self.parse_file()

        if out is None:
            return self.__output

        dump(self.__output, out)

class mdictionary:
    cert_slots = {
        -1: 'root',
        0: 'leaf'
    }

    hashes = {
        0: 'No Hash',
        1: 'SHA-1',
        2: 'SHA-256'
    }

    segment_flags = {
        1: 'HIGHVM',
        2: 'FVMLIB',
        4: 'NORELOC',
        8: 'PROTECTED_VERSION_1'
    }

    n_types = {
        0x0: 'UNDF',
        0x2: 'ABS',
        0xe: 'SECT',
        0xc: 'PBUD',
        0xa: 'INDR'
    }

    machos = {
        4277009102: (False, False),  # 32 bit, big endian
        4277009103: (True, False),   # 64 bit, big endian
        3472551422: (False, True),   # 32 bit, little endian
        3489328638: (True, True)     # 64 bit, little endian
    }

    requirements = {
        1: 'HostRequirementType',
        2: 'GuestRequirementType',
        3: 'DesignatedRequirementType',
        4: 'LibraryRequirementType',
        5: 'PluginRequirementType',
    }

    indeces = {
        0: 'CodeDirectorySlot',
        1: 'InfoSlot',
        2: 'RequirementsSlot',
        3: 'ResourceDirSlot',
        4: 'ApplicationSlot',
        5: 'EntitlementSlot',
        0x10000: 'SignatureSlot'
    }

    matches = {
        0: 'matchExists',
        1: 'matchEqual',
        2: 'matchContains',
        3: 'matchBeginsWith',
        4: 'matchEndsWith',
        5: 'matchLessThan',
        6: 'matchGreaterThan',
        7: 'matchLessEqual',
        8: 'matchGreaterEqual'
    }

    protections = {
        0b000: '---',
        0b001: 'r--',
        0b010: '-w-',
        0b011: 'rw-',
        0b100: '--x',
        0b101: 'r-x',
        0b110: '-wx',
        0b111: 'rwx'
    }

    signatures = {
        'REQUIREMENT': 0xfade0c00,
        'REQUIREMENTS': 0xfade0c01,
        'CODEDIRECTORY': 0xfade0c02,
        'ENTITLEMENT': 0xfade7171,
        'BLOBWRAPPER': 0xfade0b01,
        'EMBEDDED_SIGNATURE': 0xfade0cc0,
        'DETACHED_SIGNATURE': 0xfade0cc1,
        'CODE_SIGN_DRS': 0xfade0c05
    }

    section_attrs = {
        0x80000000: 'PURE_INSTRUCTIONS',
        0x40000000: 'NO_TOC',
        0x20000000: 'STRIP_STATIC_SYMS',
        0x10000000: 'NO_DEAD_STRIP',
        0x08000000: 'LIVE_SUPPORT',
        0x04000000: 'SELF_MODIFYING_CODE',
        0x02000000: 'DEBUG',
        0x00000400: 'SOME_INSTRUCTIONS',
        0x00000200: 'EXT_RELOC',
        0x00000100: 'LOC_RELOC'
    }

    filetypes = {
        1: 'OBJECT',
        2: 'EXECUTE',
        3: 'FVMLIB',
        4: 'CORE',
        5: 'PRELOAD',
        6: 'DYLIB',
        7: 'DYLINKER',
        8: 'BUNDLE',
        9: 'DYLIB_STUB',
        10: 'DSYM',
        11: 'KEXT_BUNDLE'
    }

    section_types = {
        0x0: 'REGULAR',
        0x1: 'ZEROFILL',
        0x2: 'CSTRING_LITERALS',
        0x3: '4BYTE_LITERALS',
        0x4: '8BYTE_LITERALS',
        0x5: 'LITERAL_POINTERS',
        0x6: 'NON_LAZY_SYMBOL_POINTERS',
        0x7: 'LAZY_SYMBOL_POINTERS',
        0x8: 'SYMBOL_STUBS',
        0x9: 'MOD_INIT_FUNC_POINTERS',
        0xa: 'MOD_TERM_FUNC_POINTERS',
        0xb: 'COALESCED',
        0xc: 'GB_ZEROFILL',
        0xd: 'INTERPOSING',
        0xe: '16BYTE_LITERALS',
        0xf: 'DTRACE_DOF',
        0x10: 'LAZY_DYLIB_SYMBOL_POINTERS',
        0x11: 'THREAD_LOCAL_REGULAR',
        0x12: 'THREAD_LOCAL_ZEROFILL',
        0x13: 'THREAD_LOCAL_VARIABLES',
        0x14: 'THREAD_LOCAL_VARIABLE_POINTERS',
        0x15: 'THREAD_LOCAL_INIT_FUNCTION_POINTERS'
    }

    operators = {
        0: 'False',
        1: 'True',
        2: 'Ident',
        3: 'AppleAnchor',
        4: 'AnchorHash',
        5: 'InfoKeyValue',
        6: 'And',
        7: 'Or',
        8: 'CDHash',
        9: 'Not',
        10: 'InfoKeyField',
        11: 'CertField',
        12: 'TrustedCert',
        13: 'TrustedCerts',
        14: 'CertGeneric',
        15: 'AppleGenericAnchor',
        16: 'EntitlementField',
        17: 'CertPolicy',
        18: 'NamedAnchor',
        19: 'NamedCode',
        20: 'Platform'
    }

    thread_states = {
        1: 'x86_THREAD_STATE32',
        2: 'x86_FLOAT_STATE32',
        3: 'x86_EXCEPTION_STATE32',
        4: 'x86_THREAD_STATE64',
        5: 'x86_FLOAT_STATE64',
        6: 'x86_EXCEPTION_STATE64',
        7: 'x86_THREAD_STATE',
        8: 'x86_FLOAT_STATE',
        9: 'x86_EXCEPTION_STATE',
        10: 'x86_DEBUG_STATE32',
        11: 'x86_DEBUG_STATE64',
        12: 'x86_DEBUG_STATE',
        13: 'THREAD_STATE_NONE',
        14: 'x86_SAVED_STATE_1 (INTERNAL ONLY)',
        15: 'x86_SAVED_STATE_2 (INTERNAL ONLY)',
        16: 'x86_AVX_STATE32',
        17: 'x86_AVX_STATE64',
        18: 'x86_AVX_STATE'
    }

    flags = {
        1: 'NOUNDEFS',
        2: 'INCRLINK',
        4: 'DYLDLINK',
        8: 'BINDATLOAD',
        16: 'PREBOUND',
        32: 'SPLIT_SEGS',
        64: 'LAZY_INIT',
        128: 'TWOLEVEL',
        256: 'FORCE_FLAT',
        512: 'NOMULTIDEFS',
        1024: 'NOFIXPREBINDING',
        2048: 'PREBINDABLE',
        4096: 'ALLMODSBOUND',
        8192: 'SUBSECTIONS_VIA_SYMBOLS',
        16384: 'CANONICAL',
        32768: 'WEAK_DEFINES',
        65536: 'BINDS_TO_WEAK',
        131072: 'ALLOW_STACK_EXECUTION',
        262144: 'ROOT_SAFE',
        524288: 'SETUID_SAFE',
        1048576: 'NOREEXPORTED_DYLIBS',
        2097152: 'PIE',
        4194304: 'DEAD_STRIPPABLE_DYLIB',
        8388608: 'HAS_TLV_DESCRIPTORS',
        16777216: 'NO_HEAP_EXECUTION',
        33554432: 'APP_EXTENSION_SAFE'
    }

    stabs = {
        0x20: 'GSYM',
        0x22: 'FNAME',
        0x24: 'FUN',
        0x26: 'STSYM',
        0x28: 'LCSYM',
        0x2a: 'MAIN',
        0x2e: 'BNSYM',
        0x30: 'PC',
        0x32: 'AST',
        0x3a: 'MAC_UNDEF',
        0x3c: 'OPT',
        0x40: 'RSYM',
        0x44: 'SLINE',
        0x46: 'DSLINE',
        0x48: 'BSLINE',
        0x4e: 'ENSYM',
        0x60: 'SSYM',
        0x64: 'SO',
        0x66: 'OSO',
        0x80: 'LSYM',
        0x82: 'BINCL',
        0x84: 'SOL',
        0x86: 'PARAMS',
        0x88: 'VERSION',
        0x8a: 'OLEVEL',
        0xa0: 'PSYM',
        0xa2: 'EINCL',
        0xa4: 'ENTRY',
        0xc0: 'LBRAC',
        0xc2: 'EXCL',
        0xe0: 'RBRAC',
        0xe2: 'BCOMM',
        0xe4: 'ECOMM',
        0xe8: 'ECOML',
        0xfe: 'LENG'
    }

    loadcommands = {
        1: 'SEGMENT',
        2: 'SYMTAB',
        3: 'SYMSEG',
        4: 'THREAD',
        5: 'UNIXTHREAD',
        6: 'LOADFVMLIB',
        7: 'IDFVMLIB',
        8: 'IDENT',
        9: 'FVMFILE',
        10: 'PREPAGE',
        11: 'DYSYMTAB',
        12: 'LOAD_DYLIB',
        13: 'ID_DYLIB',
        14: 'LOAD_DYLINKER',
        15: 'ID_DYLINKER',
        16: 'PREBOUND_DYLIB',
        17: 'ROUTINES',
        18: 'SUB_FRAMEWORK',
        19: 'SUB_UMBRELLA',
        20: 'SUB_CLIENT',
        21: 'SUB_LIBRARY',
        22: 'TWOLEVEL_HINTS',
        23: 'PREBIND_CKSUM',
        25: 'SEGMENT_64',
        26: 'ROUTINES_64',
        27: 'UUID',
        29: 'CODE_SIGNATURE',
        30: 'SEGMENT_SPLIT_INFO',
        32: 'LAZY_LOAD_DYLIB',
        33: 'ENCRYPTION_INFO',
        34: 'DYLD_INFO',
        36: 'VERSION_MIN_MACOSX',
        37: 'VERSION_MIN_IPHONEOS',
        38: 'FUNCTION_STARTS',
        39: 'DYLD_ENVIRONMENT',
        41: 'DATA_IN_CODE',
        42: 'SOURCE_VERSION',
        43: 'DYLIB_CODE_SIGN_DRS',
        44: 'ENCRYPTION_INFO_64',
        45: 'LINKER_OPTION',
        46: 'LINKER_OPTIMIZATION_HINT',
        47: 'VERSION_MIN_TVOS',
        48: 'VERSION_MIN_WATCHOS',
        49: 'NOTE',
        50: 'BUILD_VERSION',
        2147483672: 'LOAD_WEAK_DYLIB',
        2147483676: 'RPATH',
        2147483679: 'REEXPORT_DYLIB',
        2147483682: 'DYLD_INFO_ONLY',
        2147483683: 'LOAD_UPWARD_DYLIB',
        2147483688: 'MAIN',
    }

    # CPU Types & Subtypes as defined in
    # http://opensource.apple.com/source/cctools/cctools-822/include/mach/machine.h
    cputypes = {
        -1: {
            -2: 'ANY',
            -1: 'MULTIPLE',
            0: 'LITTLE_ENDIAN',
            1: 'BIG_ENDIAN'
        },
        1: {
            -2: 'VAX',
            -1: 'MULTIPLE',
            0: 'VAX_ALL',
            1: 'VAX780',
            2: 'VAX785',
            3: 'VAX750',
            4: 'VAX730',
            5: 'UVAXI',
            6: 'UVAXII',
            7: 'VAX8200',
            8: 'VAX8500',
            9: 'VAX8600',
            10: 'VAX8650',
            11: 'VAX8800',
            12: 'UVAXIII'
        },
        6: {
            -2: 'MC680x0',
            -1: 'MULTIPLE',
            1: 'MC680x0_ALL or MC68030',
            2: 'MC68040',
            3: 'MC68030_ONLY'
        },
        7: {-2: 'X86 (I386)',
            -1: 'MULITPLE',
            0: 'INTEL_MODEL_ALL',
            3: 'X86_ALL, X86_64_ALL, I386_ALL, or 386',
            4: 'X86_ARCH1 or 486',
            5: '586 or PENT',
            8: 'X86_64_H or PENTIUM_3',
            9: 'PENTIUM_M',
            10: 'PENTIUM_4',
            11: 'ITANIUM',
            12: 'XEON',
            15: 'INTEL_FAMILY_MAX',
            22: 'PENTPRO',
            24: 'PENTIUM_3_M',
            26: 'PENTIUM_4_M',
            27: 'ITANIUM_2',
            28: 'XEON_MP',
            40: 'PENTIUM_3_XEON',
            54: 'PENTII_M3',
            86: 'PENTII_M5',
            103: 'CELERON',
            119: 'CELERON_MOBILE',
            132: '486SX'
        },
        10: {
            -2: 'MC98000',
            -1: 'MULTIPLE',
            0: 'MC98000_ALL',
            1: 'MC98601'
        },
        11: {
            -2: 'HPPA',
            -1: 'MULITPLE',
            0: 'HPPA_ALL or HPPA_7100',
            1: 'HPPA_7100LC'
        },
        12: {
            -2: 'ARM',
            -1: 'MULTIPLE',
            0: 'ARM_ALL',
            1: 'ARM_A500_ARCH',
            2: 'ARM_A500',
            3: 'ARM_A440',
            4: 'ARM_M4',
            5: 'ARM_V4T',
            6: 'ARM_V6',
            7: 'ARM_V5TEJ',
            8: 'ARM_XSCALE',
            9: 'ARM_V7',
            10: 'ARM_V7F',
            11: 'ARM_V7S',
            12: 'ARM_V7K',
            13: 'ARM_V8',
            14: 'ARM_V6M',
            15: 'ARM_V7M',
            16: 'ARM_V7EM'
        },
        13: {
            -2: 'MC88000',
            -1: 'MULTIPLE',
            0: 'MC88000_ALL',
            1: 'MMAX_JPC or MC88100',
            2: 'MC88110'
        },
        14: {
            -2: 'SPARC',
            -1: 'MULTIPLE',
            0: 'SPARC_ALL or SUN4_ALL',
            1: 'SUN4_260',
            2: 'SUN4_110'
        },
        15: {
            -2: 'I860 (big-endian)',
            -1: 'MULTIPLE',
            0: 'I860_ALL',
            1: 'I860_860'
        },
        18: {
            -2: 'POWERPC',
            -1: 'MULTIPLE',
            0: 'POWERPC_ALL',
            1: 'POWERPC_601',
            2: 'POWERPC_602',
            3: 'POWERPC_603',
            4: 'POWERPC_603e',
            5: 'POWERPC_603ev',
            6: 'POWERPC_604',
            7: 'POWERPC_604e',
            8: 'POWERPC_620',
            9: 'POWERPC_750',
            10: 'POWERPC_7400',
            11: 'POWERPC_7450',
            100: 'POWERPC_970'
        },
        16777223: {
            -2: 'X86_64',
            -1: 'MULTIPLE',
            0: 'INTEL_MODEL_ALL',
            3: 'X86_ALL, X86_64_ALL, I386_ALL, or 386',
            4: 'X86_ARCH1 or 486',
            5: '586 or PENT',
            8: 'X86_64_H or PENTIUM_3',
            9: 'PENTIUM_M',
            10: 'PENTIUM_4',
            11: 'ITANIUM',
            12: 'XEON',
            15: 'INTEL_FAMILY_MAX',
            22: 'PENTPRO',
            24: 'PENTIUM_3_M',
            26: 'PENTIUM_4_M',
            27: 'ITANIUM_2',
            28: 'XEON_MP',
            40: 'PENTIUM_3_XEON',
            54: 'PENTII_M3',
            86: 'PENTII_M5',
            103: 'CELERON',
            119: 'CELERON_MOBILE',
            132: '486SX',
            2147483648 + 0: 'INTEL_MODEL_ALL',
            2147483648 + 3: 'X86_ALL, X86_64_ALL, I386_ALL, or 386',
            2147483648 + 4: 'X86_ARCH1 or 486',
            2147483648 + 5: '586 or PENT',
            2147483648 + 8: 'X86_64_H or PENTIUM_3',
            2147483648 + 9: 'PENTIUM_M',
            2147483648 + 10: 'PENTIUM_4',
            2147483648 + 11: 'ITANIUM',
            2147483648 + 12: 'XEON',
            2147483648 + 15: 'INTEL_FAMILY_MAX',
            2147483648 + 22: 'PENTPRO',
            2147483648 + 24: 'PENTIUM_3_M',
            2147483648 + 26: 'PENTIUM_4_M',
            2147483648 + 27: 'ITANIUM_2',
            2147483648 + 28: 'XEON_MP',
            2147483648 + 40: 'PENTIUM_3_XEON',
            2147483648 + 54: 'PENTII_M3',
            2147483648 + 86: 'PENTII_M5',
            2147483648 + 103: 'CELERON',
            2147483648 + 119: 'CELERON_MOBILE',
            2147483648 + 132: '486SX'
        },
        16777228: {
            -2: 'ARM64',
            -1: 'MULTIPLE',
            0: 'ARM64_ALL',
            1: 'ARM64_V8',
            2147483648 + 0: 'ARM64_ALL',
            2147483648 + 1: 'ARM64_V8'
        },
        16777234: {
            -2: 'POWERPC64',
            -1: 'MULTIPLE',
            0: 'POWERPC_ALL',
            1: 'POWERPC_601',
            2: 'POWERPC_602',
            3: 'POWERPC_603',
            4: 'POWERPC_603e',
            5: 'POWERPC_603ev',
            6: 'POWERPC_604',
            7: 'POWERPC_604e',
            8: 'POWERPC_620',
            9: 'POWERPC_750',
            10: 'POWERPC_7400',
            11: 'POWERPC_7450',
            100: 'POWERPC_970',
            2147483648 + 0: 'POWERPC_ALL (LIB64)',
            2147483648 + 1: 'POWERPC_601 (LIB64)',
            2147483648 + 2: 'POWERPC_602 (LIB64)',
            2147483648 + 3: 'POWERPC_603 (LIB64)',
            2147483648 + 4: 'POWERPC_603e (LIB64)',
            2147483648 + 5: 'POWERPC_603ev (LIB64)',
            2147483648 + 6: 'POWERPC_604 (LIB64)',
            2147483648 + 7: 'POWERPC_604e (LIB64)',
            2147483648 + 8: 'POWERPC_620 (LIB64)',
            2147483648 + 9: 'POWERPC_750 (LIB64)',
            2147483648 + 10: 'POWERPC_7400 (LIB64)',
            2147483648 + 11: 'POWERPC_7450 (LIB64)',
            2147483648 + 100: 'POWERPC_970 (LIB64)'
        }
    }