#
# Gordon McMillan (as inspired and influenced by Greg Stein)
#

# subclasses may not need marshal or struct, but since they're
# builtin, importing is safe.
#
# While an Archive is really an abstraction for any "filesystem
# within a file", it is tuned for use with imputil.FuncImporter.
# This assumes it contains python code objects, indexed by the
# the internal name (ie, no '.py').
# See carchive.py for a more general archive (contains anything)
# that can be understood by a C program.

import marshal
import struct

class Archive:
  """ A base class for a repository of python code objects.

      The get_code method is used by imputil.FuntionImporter
      to get code objects by name.
      Archives are flat namespaces, so conflict between module
      names in different packages are possible. Use a different
      Archive for each package.
  """
  MAGIC = 'PYL\0'
  HDRLEN = 12        # default is MAGIC followed by python's magic, int pos of toc
  TOCPOS = 8
  TRLLEN = 0        # default - no trailer
  TOCTMPLT = {}     #
  os = None
  def __init__(self, path=None, start=0):
    """
         Initialize an Archive. If path is omitted, it will be an empty Archive.
         start is the seek position within path where the Archive starts."""
    self.toc = None
    self.path = path
    self.start = start
    import imp
    self.pymagic = imp.get_magic()
    if path is not None:
      self.lib = open(self.path, 'rb')
      self.checkmagic()
      self.loadtoc()

  ####### Sub-methods of __init__ - override as needed #############
  def checkmagic(self):
    """Verify version and validity of file.

        Overridable.
        Check to see if the file object self.lib actually has a file
        we understand.
    """
    self.lib.seek(self.start)   #default - magic is at start of file
    if self.lib.read(len(self.MAGIC)) != self.MAGIC:
      raise RuntimeError, "%s is not a valid %s archive file" \
                % (self.path, self.__class__.__name__)
    if self.lib.read(len(self.pymagic)) != self.pymagic:
      raise RuntimeError, "%s has version mismatch to dll" % (self.path)

  def loadtoc(self):
    """Load the table of contents.

        Overridable.
        Default: After magic comes an int (4 byte native) giving the
        position of the TOC within self.lib.
        Default: The TOC is a marshal-able string.
    """
    self.lib.seek(self.start + self.TOCPOS)
    (offset,) = struct.unpack('=i', self.lib.read(4))
    self.lib.seek(self.start + offset)
    self.toc = marshal.load(self.lib)

  ######## This is what is called by FuncImporter #######
  ## Since an Archive is flat, we ignore parent and modname.

  def get_code(self, parent, modname, fqname):
    """The import hook.

       Called by imputil.FunctionImporter.
       Override extract to tune getting code from the Archive."""
    rslt = self.extract(fqname) # None if not found, (ispkg, code) otherwise
    if rslt is None:
      return None
    ispkg, code = rslt
    if ispkg:
      return ispkg, code, {'__path__': []}
    return rslt

  ####### Core method - Override as needed  #########
  def extract(self, name):
    """ Get the object corresponding to name, or None.

        NAME is the name as specified in an 'import name'.
        'import a.b' will become:
        extract('a') (return None because 'a' is not a code object)
        extract('a.__init__') (return a code object)
        extract('a.b') (return a code object)
        Default implementation:
          self.toc is a dict
          self.toc[name] is pos
          self.lib has the code object marshal-ed at pos
    """
    ispkg, pos = self.toc.get(name, (0, None))
    if pos is None:
      return None
    self.lib.seek(self.start + pos)
    return ispkg, marshal.load(self.lib)

  ########################################################################
  # Informational methods

  def contents(self):
    """Return a list of the contents.

       Default implementation assumes self.toc is a dict like object.
    """
    return self.toc.keys()

  ########################################################################
  # Building

  ####### Top level method - shouldn't need overriding #######
  def build(self, path, lTOC):
    """Create an archive file of name PATH from LTOC.

       lTOC is a 'logical TOC' - a list of (name, path, ...)
       where name is the internal (import) name,
       and path is a file to get the object from, eg './a.pyc'.
    """
    self.path = path
    self.lib = open(path, 'wb')
    #reserve space for the header
    if self.HDRLEN:
      self.lib.write('\0'*self.HDRLEN)

    #create an empty toc

    if type(self.TOCTMPLT) == type({}):
      self.toc = {}
    else:       # assume callable
      self.toc = self.TOCTMPLT()

    for tocentry in lTOC:
      self.add(tocentry)   # the guts of the archive

    tocpos = self.lib.tell()
    self.save_toc(tocpos)
    if self.TRLLEN:
      self.save_trailer(tocpos)
    if self.HDRLEN:
      self.update_headers(tocpos)
    self.lib.close()


  ####### manages keeping the internal TOC and the guts in sync #######
  def add(self, entry):
    """Add an entry to the archive.

      Override this to influence the mechanics of the Archive.
       Assumes entry is a seq beginning with (nm, pth, ...) where
       nm is the key by which we'll be asked for the object.
       pth is the name of where we find the object.
    """
    if self.os is None:
      import os
      self.os = os
    nm = entry[0]
    pth = entry[1]
    ispkg = self.os.path.splitext(self.os.path.basename(pth))[0] == '__init__'
    self.toc[nm] = (ispkg, self.lib.tell())
    f = open(entry[1], 'rb')
    f.seek(8)   #skip magic and timestamp
    self.lib.write(f.read())

  def save_toc(self, tocpos):
    """Save the table of contents.

       Default - toc is a dict
       Gets marshaled to self.lib
    """
    marshal.dump(self.toc, self.lib)

  def save_trailer(self, tocpos):
    """Placeholder for Archives with trailers."""
    pass

  def update_headers(self, tocpos):
    """Update any header data.

       Default header is  MAGIC + Python's magic + tocpos"""
    self.lib.seek(self.start)
    self.lib.write(self.MAGIC)
    self.lib.write(self.pymagic)
    self.lib.write(struct.pack('=i', tocpos))

##############################################################
#
# ZlibArchive - an archive with compressed entries
#

class ZlibArchive(Archive):
  """A subclass of Archive that compresses entries with zlib
     and uses a (marshalled) dict as a table of contents"""
  MAGIC = 'PYZ\0'
  TOCPOS = 8
  HDRLEN = 12
  TRLLEN = 0
  TOCTMPLT = {}
  LEVEL = 9

  def __init__(self, path=None, offset=0):
    Archive.__init__(self, path, offset)
    # dynamic import so not imported if not needed
    global zlib
    import zlib

  def extract(self, name):
    """Get the code object for NAME.

       Return None if name is not in the table of contents.
       Otherwise, return a tuple (ispkg, code)"""
    (ispkg, pos, lngth) = self.toc.get(name, (0, None, 0))
    if pos is None:
      return None
    self.lib.seek(self.start + pos)
    return ispkg, marshal.loads(zlib.decompress(self.lib.read(lngth)))

  def add(self, entry):
    """Add an entry.

       ENTRY is a sequence where entry[0] is name and entry[1] is full path name.
       zlib compress the code object, and build a toc entry"""
    if self.os is None:
      import os
      self.os = os
    nm = entry[0]
    pth = entry[1]
    ispkg = self.os.path.splitext(self.os.path.basename(pth))[0] == '__init__'
    f = open(pth, 'rb')
    f.seek(8)   #skip magic and timestamp
    obj = zlib.compress(f.read(), self.LEVEL)
    self.toc[nm] = (ispkg, self.lib.tell(), len(obj))
    self.lib.write(obj)