# Python interface to Hope, Harlequin's version control system.
#
# This was written in order to extract MLWorks, including all of its
# history, from a collection of "compounds" sold by Xanalys to
# Ravenbrook.
# 
# Nick Barnes, Ravenbrook Limited, 2015-01-13.
#
# $Id: //info.ravenbrook.com/project/mlworks/import/2013-04-25/xanalys-tarball/hope.py#6 $
#
# To run:
# >>> hope.compound('MLW').extract()

import os
import shutil
import itertools
import functools
import re

import rcs

# .log.branch: For a period, when branching files in Hope each branch
# base revision would get a log string such as "branched from 1.37\n",
# where "1.37" was the parent revision of that particular file.  To
# consolidate these revisions for many files branched together, we
# replace that log string with "Hope branch base revision".  This
# regexp matches the problematic log messages.
branched_from_re = re.compile('^branched from ([0-9]+\.)+[0-9]+\n$')

def mindef(iter, default=None):
    """Return the minimum value of `iter`, returning `default` if no values.
    
    (Emulating min() in Python 3.x.)"""
    
    first = next(iter, None)
    return default if first is None else min(itertools.chain([first], iter))

class FileRevision(rcs.FileRevision):
    """A Hope file revision.  We subclass RCS to allow us to 
    spot (and sometimes work around) Hope-specific niceties."""
    
    def __init__(self, *args, **kwargs):
        super(FileRevision, self).__init__(*args, **kwargs)
        # For a period, branch base revisions (e.g. 1.37.4.1) received
        # unhelpful log strings.  See .log.branch.
        if self.is_branch_head and branched_from_re.match(self.log):
            self.key = (self.author, "Hope branch base revision", self.is_branch_head)

class FileBranch(rcs.FileBranch):
    """A Hope file branch.  We don't currently need this subclass, but it
    might come in handy later."""

    def __init__(self, *args, **kwargs):
        super(FileBranch, self).__init__(*args, **kwargs)
        # may have other things to do here later.

class File(rcs.File):
    """An RCS file specialized to Hope: just like an RCS file but using
    the name "." for the trunk.
    """
    def __init__(self, filename):
        super(File, self).__init__(filename,
                                   revisionClass=FileRevision,
                                   branchClass=FileBranch)
        # Hope RCS files only have one trunk branch, the branch "1".
        assert(len(self.trunk) == 1)
        assert(self.trunk[0].id == rcs.ID((1,)))
        for b in self.branch_named.keys():
            if self.branch_named[b] in self.trunk:
                del self.branch_named[b]
        self.branch_named['.'] = self.trunk[0]
        self.start = self.trunk[0].timestamps[0]

class Unit(File):
    """A Hope unit: basically an RCS file with some Hope-specific methods."""

    def __init__(self, compound, name, rcs_filename):
        super(Unit, self).__init__(rcs_filename)
        self.compound = compound
        self.name = name
        self.path = os.path.join(*name.split(':'))
        self.unnamed_branches = set(self.branch.values()) - set(self.branch_named.values())

    def __repr__(self):
        return "%s(%s %s)" % (type(self).__name__, self.compound.name, self.filename)

    def compound_branches(self, rev):
        """Generate a sequence of names of branches of the compound
        which include this revision.

        A given branch of a compound at a given timestamp may include
        one specific branch of a unit.  So a given unit revision may
        be on several different branches of the compound.
        """

        revision = self.revision(rev)
        branch = revision.branch()
        found=False
        while not found:
            # branch names to search for in parent compound
            if branch.trunk:
                branch_names = ['.'] + branch.names
            else:
                branch_names = branch.names
            for b in self.compound.branch_names:
                units = self.compound.units(branch=b, at=revision.timestamp)
                if self.name in units:
                    # this unit exists on this compound branch at this time
                    if units[self.name][1] in branch_names:
                        found=True
                        yield b
            if branch.trunk:
                return
            if not found:
                # Go to parent branch and keep looking
                branch = branch.branch_point.branch()

    def paths(self, rev):
        """Generate a sequence of paths of this unit's working file at the
        given revision.  A compound branch at a given timestamp may be
        in several different parent compounds and on various branches
        of those parent compounds.
        """
        # TODO: Write this!!
        pass
        

class Metadata(File):
    """A Hope versioned metadata file: an RCS file with some Hope-specific
    methods."""

    def __init__(self, compound, name, rcs_filename):
        super(Metadata, self).__init__(rcs_filename)
        self.compound = compound
        self.name = name

    def __repr__(self):
        return "%s(%s %s)" % (type(self).__name__, self.compound.name, self.filename)
        

def versioned(metadata):
    """Decorator for Compound methods which return results
    extracted from a metadata file.  Use as:
           @versioned('*attributes')
           def foo(self, rev, contents):
               ... do something with rev and contents, usually processing contents,
               ... to return a value.
    The wrapper memoizes the function result as belonging to the
    revision of the metadata file.
    The wrapped function can take arguments `branch` and `at` (see the arguments of
    rcs.File.revision) or 'rev' (which can be a revision name, an
    revision ID like "1.2.3.4" or an rcs.RevisionID).

    """
    
    def version_wrapped(f):
        cache = {} # per-compound per-method cache rev->result
        @functools.wraps(f)
        def wrapper(self, *args, **kwargs):
            rcsfile = self.metadata[metadata]
            if 'rev' in kwargs:
                assert('branch' not in kwargs and 'at' not in kwargs)
                rev = kwargs['rev']
                if isinstance(rev, str):
                    if rev in rcsfile.rev_named:
                        # named revision, like "MLWorks_Beta_7"
                        rev = rcsfile.rev_named[rev]
                    else:
                        # RCS revision name, like "1.2.3.4"
                        rev = rcsfile.rev[rcs.ID(rev)]
                if isinstance(rev, rcs.RevisionID):
                    if rev not in rcsfile.rev:
                        return None
                    rev = rcsfile.rev[rev]
            else:
                branch = kwargs.pop('branch', None)
                at = kwargs.pop('at', None)
                rev = rcsfile.revision_at(branch=branch, at=at, or_earliest=True)
            assert(isinstance(rev, rcs.FileRevision))
            if rev not in cache:
                kwargs['rev'] = rev
                kwargs['contents'] = self.metadata_contents(rev)
                cache[rev] = f(self, *args, **kwargs)
            return cache[rev]
        return wrapper
    return version_wrapped

            
class Compound(object):
    """A Hope compound.  Code for understanding Hope metadata and
    directory structure belongs here."""

    def __init__(self, name, dir):
        self.name = name
        self.dir = dir

        self._metadata_contents={}
        self.metadata = {}
        self.unit = {}
        rcs_dir = os.path.join(self.dir, 'RCS')
        for rcs_filename in os.listdir(rcs_dir):
            assert(rcs_filename.endswith(',v'))
            filename = rcs_filename[:-2]
            rcs_path = os.path.join(rcs_dir, rcs_filename)
            if filename.startswith('*'):
                self.metadata[filename] = Metadata(self, filename, rcs_path)
            else:
                self.unit[filename] = Unit(self, filename, rcs_path)
        self.metadata_start = min(m.start for m in self.metadata.values())
        self.unit_start = mindef((u.start for u in self.unit.values()),
                                 default=self.metadata_start)
        self._definitions = self.metadata['*definitions']
        self._attributes = self.metadata['*attributes']
        self._parents = self.metadata['*parents']

        self.branch_names = self._attributes.branch_named.keys()
        self.revision_names = self._attributes.rev_named.keys()

        # map from branch ID (of *definitions) to names of units
        # live somewhere on that branch
        branch_units = {b: set(u for r in self._definitions.branch[b].revs
                               for u in self._units(rev=r))
                        for b in self._definitions.branch_ids}
        
        # map from branch ID (of *definitions) to names of units
        # which are stale somewhere on that branch
        branch_stales = {b: set(u for r in self._definitions.branch[b].revs
                                for u in self.stale_units(rev=r))
                         for b in self._definitions.branch_ids}

        # Fill in each unit with timestamp information
        for u in self.unit:
            # map from branch ID (of *definitions) to first timestamp
            # at which the unit is marked stale on that branch of the
            # compound.
            self.unit[u].first_stale = {b: next(r.timestamp
                                                for r in self._definitions.branch[b].revs
                                                if u in self.stale_units(rev=r))
                                        for b in branch_stales
                                        if u in branch_stales[b]}
            # map from branch ID (of *definitions) to first timestamp
            # at which the unit appears on that branch of the compound.
            self.unit[u].first_rev = {b: next(r.timestamp
                                              for r in self._definitions.branch[b].revs
                                              if u in self._units(rev=r))
                                      for b in branch_units
                                      if u in branch_units[b]}
            # first timestamp at which the unit appears in *defintions.
            self.unit[u].first_meta = min(self.unit[u].first_rev.values())

    def __repr__(self):
        return '<%s %s>' % (type(self).__name__, self.name)

    def metadata_contents(self, rev):
        """Return (memoized) contents of a metadata file revision.

        All metadata files consist of a number of lines, each line
        having several fields separated by ::=, with some blank lines.
        Line ordering appears not to be significant.  This function
        returns a list of lists of fields, one list per line.
        """

        if rev not in self._metadata_contents:
            self._metadata_contents[rev] = [line.split('::=')
                                            for line in rev.contents().split('\n')
                                            if len(line.strip()) > 0]
        return self._metadata_contents[rev]

    @versioned('*attributes')
    def attributes(self, rev, contents):
        """Return dictionary of per-compound attributes."""
        # these lists are length 3 or 4
        return {l[1]:l[2] for l in contents if l[0] == 'comp'}

    @versioned('*attributes')
    def directory(self, rev, contents):
        """Return the compound's _directory attribute."""
        return self.attributes(rev=rev)['_directory']

    def paths(self, branch, at):
        """Generates all paths at which this compound's branch may appear.

        For each path, generates a pair `(path, [(c1, b1), ...])` at
        which this branch of this compound may appear at this
        timestamp, based on its ancestor (parent, grand-parent, ...)
        compounds.
        
        The list `[(c1, b1), ...]` is a list of compounds and branch
        names, working upwards from this compound and branch.  Each
        compound branch may appear in several parent compounds, and on
        multiple branches of any given parent compound.
        """

        d = self.directory(branch=branch, at=at)
        found=False
        for p,b,orphan in parents(self, branch, at, trunk_if_orphan=True):
            for parent_path, branches in p.paths(branch=b,at=at):
                found=True
                yield (os.path.join(parent_path,d), [(self, branch)]+branches)
        if not found:
            # No parents of this branch at this timestamp;
            # either it's a top-level compound or an orphan.
            yield (d, [(self, branch)])

    @versioned('*attributes')
    def unit_attributes(self, rev, contents):
        """Return dictionary of per-unit attributes.

        unit name -> key -> value."""
        d = {}
        for l in contents:
            # these lists are length 3 or 4
            if l[0] == 'unit':
                ud = d.setdefault(l[1],{})
                ud[l[2]] = l[3]
        return d
        
    @versioned('*definitions')
    def _subcompounds(self, rev, contents):
        """Return a dictionary of sub-compounds given by the metadata.

        subcompound name -> (subcompound, subcompound branch name)."""
        # John Kamp tells us that l[3:] are meaningless.
        return {l[1]:(compound(l[1]), l[2])
                for l in  contents if l[0] == 'subcomp'}
        
    @versioned('*definitions')
    def subcompounds(self, rev, contents):
        """Return a dictionary of sub-compounds including any omitted by Hope.

        At revision 1.1, we may have subcompounds which are not in
        the metadata, because of the migration from RCS.
        
        We work around this by including, for any trunk revision of
        the compound, any subcompound which is included anywhere along
        the trunk, if the units in that subcompound are started before
        the revision timestamp, and that subcompound hasn't been
        removed from the trunk of this compound before this revision.
        """
        
        if rev.trunk:
            scs = self._subcompounds(rev=rev)
            trunk_revs = self._definitions.trunk[0].revs
            removed = set(sc for r in trunk_revs
                          if r.timestamp < rev.timestamp
                          for next_r in (trunk_revs[trunk_revs.index(r)+1],)
                          for sc,_ in self._subcompounds(rev=r).values()
                          if sc is not None and sc.name not in self._subcompounds(rev=next_r).keys())
            all_trunk = set(sc for r in trunk_revs
                            for sc,_ in self._subcompounds(rev=r).values()
                            if sc is not None)
            for sc in all_trunk - removed:
                if (sc.name not in scs and
                    sc.unit_start < rev.timestamp):
                    scs[sc.name] = (sc,'.')
            return scs
        else:
            return self._subcompounds(rev=rev)

    @versioned('*definitions')
    """Return a dictionary of units included by metadata.

    unit name -> (unit, unit branch name, unit branch)."""

    def _units(self, rev, contents):
        return {l[1]: (self.unit[l[1]], l[2], self.unit[l[1]].branch_named.get(l[2], None))
                for l in contents if l[0] == 'unit'}

    @versioned('*definitions')
    def units(self, rev, contents):
        """Return a dictionary of units, including any omitted by Hope.

        Compounds which were migrated from RCS to Hope may include
        units which existed before the compound metadata did, but the
        compound metadata starts empty.  So any early trunk revision
        of the compound definitions should actually say "I have all
        these units." """
        if rev.trunk:
            return {u.name: (u, '.', u.branch_named['.'])
                    for u in self.unit.values()
                    if u.start < rev.timestamp}
        else:
            return self._units(rev)

    @versioned('*definitions')
    def stale_units(self, rev, contents):
        """Return a dictionary of stale units.
        
        unit name -> unit branch name or '*'.
        
        John Kamp advises us that the unit branch names are probably
        meaningless. """
        return {l[1]:l[2] for l in contents if l[0] == 'staleunit'}

    @versioned('*parents')
    """Return a dictionary of parents defined by metadata.
    
    parent compound name -> parent compound branch."""
    def parents(self, rev, contents):
        return {l[0]:l[1] for l in contents}
        
_compounds = None

def get_compounds(base):
    """Populate _compounds with all the compounds in the directory `base`."""

    global _compounds
    _compounds = {}
    for dirpath, dirnames, filenames in os.walk(base):
        if 'RCS' in dirnames:
            parent, compound_name = os.path.split(dirpath)
            assert(compound_name not in _compounds)
            _compounds[compound_name] = Compound(compound_name, dirpath)
            print("Compound %s" % compound_name)


def compound(name):
    """Return the compound named `name`."""
    assert(_compounds is not None)
    return _compounds.get(name, None)

def parents(compound, branch, at, trunk_if_orphan=False):
    """Generate a sequence of triples `(parent, parent_branch, True)` of
    compound branches which have `branch` of `compound` as a
    subcompound at time `at`.

    If `trunk_if_orphan` is True, and we find no suitable parent
    compound branches, also generate triples `(parent, '.', False)`,
    of compounds which have any branch of `compound` as a subcompound
    on their trunk at time `at`.  This allows us to find a home for
    subcompounds which are branched independently of their parents.

    A given branch of a compound at a given timestamp may include a
    specific branch of a subcompound.  So a given subcompound branch
    may be on several different branches of several different compounds.

    """
    n = compound.name
    found = False
    trunk_parents = set()
    for c in _compounds.values():
        # is our 
        if (n in c.subcompounds(at=at)):
            trunk_parents.add(c)
        for b in c.branch_names:
            scs = c.subcompounds(branch=b, at=at)
            if (n in scs
                and scs[n][1] == branch
                and (b == '.' or
                     at > c._definitions.branch_named[b].at)):
                found = True
                yield (c, b, True)
    if trunk_if_orphan and not found:
        for c in trunk_parents:
            yield (c, '.', False)
        

def all_rcs():
    """Generate all the RCS file objects we know about, both units and metadata."""
    for c in _compounds.values():
        for f in itertools.chain(c.metadata.values(), c.unit.values()):
            yield f

def slide(limit=None, slack=600, noisy=0):
    return rcs.slide(all_rcs(), limit=limit, slack=slack, noisy=noisy)

import timeit

noisy=1

def go():
    print len(list(slide(noisy=noisy)))

def time():
    t = timeit.Timer(go,"import hope; hope.get_compounds('hope')")
    try:
        return t.timeit(number=1)
    except:
        t.print_exc()
        
    
    
