From 86388c55fb7852ce9c8a2db9ee45ca2b3a126f6e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 4 Apr 2011 16:46:57 +0200 Subject: [PATCH 01/15] Added first scetch of a transport DB interface which should be enough to allow an own implementation of the git protocol without breaking clients. It also includes interfaces for the fetchinfo and pushinfo types --- gitdb/db/base.py | 131 ++++++++++++++++++++++++++++++++++++- gitdb/db/pack.py | 5 +- gitdb/test/db/test_base.py | 18 +++++ 3 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 gitdb/test/db/test_base.py diff --git a/gitdb/db/base.py b/gitdb/db/base.py index 2189d41..acc32ee 100644 --- a/gitdb/db/base.py +++ b/gitdb/db/base.py @@ -22,7 +22,8 @@ from itertools import chain -__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB') +__all__ = ( 'ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB', + 'TransportDBMixin', 'RefSpec', 'FetchInfo', 'PushInfo') class ObjectDBR(object): @@ -321,5 +322,133 @@ def partial_to_complete_sha_hex(self, partial_hexsha): return candidate #} END interface + + +class RefSpec(object): + """A refspec is a simple container which provides information about the way + something should be fetched or pushed. It requires to use symbols to describe + the actual objects which is done using reference names (or respective instances + which resolve to actual reference names).""" + __slots__ = ('source', 'destination', 'force') + + def __init__(self, source, destination, force=False): + """initalize the instance with the required values + :param source: reference name or instance. If None, the Destination + is supposed to be deleted.""" + self.source = source + self.destination = destination + self.force = force + if self.destination is None: + raise ValueError("Destination must be set") + + def __str__(self): + """:return: a git-style refspec""" + s = str(self.source) + if self.source is None: + s = '' + #END handle source + d = str(self.destination) + p = '' + if self.force: + p = '+' + #END handle force + res = "%s%s:%s" % (p, s, d) + + def delete_destination(self): + return self.source is None + + +class PushInfo(object): + """A type presenting information about the result of a push operation for exactly + one refspec + + flags # bitflags providing more information about the result + local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + remote_ref_string # path to the remote reference located on the remote side + remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + old_commit # commit at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + summary # summary line providing human readable english text about the push + """ + __slots__ = tuple() + + NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ + FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] + + +class FetchInfo(object): + """A type presenting information about the fetch operation on exactly one refspec + + The following members are defined: + ref # name of the reference to the changed + # remote head or FETCH_HEAD. Implementations can provide + # actual class instance which convert to a respective string + flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is FETCH_HEAD + note # additional notes given by the fetch-pack implementation intended for the user + old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref as hexsha or None + # Implementors may use their own type too, but it should decay into a + # string of its hexadecimal sha representation""" + __slots__ = tuple() + + NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ + FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] + + +class TransportDBMixin(object): + """A database which allows to transport objects from and to different locations + which are specified by urls (location) and refspecs (what to transport, + see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). + + At the beginning of a transport operation, it will be determined which objects + have to be sent (either by this or by the other side). + + Afterwards a pack with the required objects is sent (or received). If there is + nothing to send, the pack will be empty. + + The communication itself if implemented using a protocol instance which deals + with the actual formatting of the lines sent.""" + # The following variables need to be set by the derived class + #{Configuration + protocol = None + #}end configuraiton + + #{ Interface + + def fetch(self, url, refspecs, progress=None): + """Fetch the objects defined by the given refspec from the given url. + :param url: url identifying the source of the objects. It may also be + a symbol from which the respective url can be resolved, like the + name of the remote. The implementation should allow objects as input + as well, these are assumed to resovle to a meaningful string though. + :param refspecs: iterable of reference specifiers or RefSpec instance, + identifying the references to be fetch from the remote. + :param progress: callable which receives progress messages for user consumption + :return: List of binary object shas matching the respective remote ref which + was previously fetched, in the order of the input refspecs. + :note: even if the operation fails, one of the returned FetchInfo instances + may still contain errors or failures in only part of the refspecs. + :raise: if any issue occours during the transport or if the url is not + supported by the protocol. + """ + raise NotImplementedError() + def push(self, url, refspecs, progress=None): + """Transport the objects identified by the given refspec to the remote + at the given url. + :param url: Decribes the location which is to receive the objects + see fetch() for more details + :param refspecs: iterable of refspecs strings or RefSpec instances + to identify the objects to push + :param progress: see fetch() + :todo: what to return ? + :raise: if any issue arises during transport or if the url cannot be handled""" + raise NotImplementedError() + + #}end interface + diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py index eef3f71..56b3c0e 100644 --- a/gitdb/db/pack.py +++ b/gitdb/db/pack.py @@ -30,6 +30,9 @@ class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin): """A database operating on a set of object packs""" + # the type to use when instantiating a pack entity + PackEntityCls = PackEntity + # sort the priority list every N queries # Higher values are better, performance tests don't show this has # any effect, but it should have one @@ -156,7 +159,7 @@ def update_cache(self, force=False): for pack_file in (pack_files - our_pack_files): # init the hit-counter/priority with the size, a good measure for hit- # probability. Its implemented so that only 12 bytes will be read - entity = PackEntity(pack_file) + entity = self.PackEntityCls(pack_file) self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) # END for each new packfile diff --git a/gitdb/test/db/test_base.py b/gitdb/test/db/test_base.py new file mode 100644 index 0000000..0a381be --- /dev/null +++ b/gitdb/test/db/test_base.py @@ -0,0 +1,18 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from gitdb.db import RefSpec + +class TestBase(TestDBBase): + + @with_rw_directory + def test_basics(self, path): + self.failUnlessRaises(ValueError, RefSpec, None, None) + rs = RefSpec(None, "something") + assert rs.force == False + assert rs.delete_destination() + assert rs.source is None + assert rs.destination == "something" + From 7c4d3d6b000930134019515c83c10b140330d313 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2011 12:12:22 +0200 Subject: [PATCH 02/15] Added ref implementation and tried to fix it so that it has a chance of running. Currently it requires an object implementation which will be ported next. None of the tests is expected to run yet. --- gitdb/db/base.py | 10 +- gitdb/ref/__init__.py | 21 ++ gitdb/ref/head.py | 106 ++++++ gitdb/ref/log.py | 279 ++++++++++++++++ gitdb/ref/reference.py | 81 +++++ gitdb/ref/remote.py | 47 +++ gitdb/ref/symbolic.py | 642 +++++++++++++++++++++++++++++++++++++ gitdb/ref/tag.py | 50 +++ gitdb/test/db/test_pack.py | 1 - gitdb/test/lib.py | 57 +++- gitdb/test/test_refs.py | 523 ++++++++++++++++++++++++++++++ gitdb/typ.py | 8 + gitdb/util.py | 298 +++++++++++++++++ 13 files changed, 2109 insertions(+), 14 deletions(-) create mode 100644 gitdb/ref/__init__.py create mode 100644 gitdb/ref/head.py create mode 100644 gitdb/ref/log.py create mode 100644 gitdb/ref/reference.py create mode 100644 gitdb/ref/remote.py create mode 100644 gitdb/ref/symbolic.py create mode 100644 gitdb/ref/tag.py create mode 100644 gitdb/test/test_refs.py diff --git a/gitdb/db/base.py b/gitdb/db/base.py index acc32ee..7310521 100644 --- a/gitdb/db/base.py +++ b/gitdb/db/base.py @@ -419,7 +419,7 @@ class TransportDBMixin(object): #{ Interface - def fetch(self, url, refspecs, progress=None): + def fetch(self, url, refspecs, progress=None, **kwargs): """Fetch the objects defined by the given refspec from the given url. :param url: url identifying the source of the objects. It may also be a symbol from which the respective url can be resolved, like the @@ -428,7 +428,9 @@ def fetch(self, url, refspecs, progress=None): :param refspecs: iterable of reference specifiers or RefSpec instance, identifying the references to be fetch from the remote. :param progress: callable which receives progress messages for user consumption - :return: List of binary object shas matching the respective remote ref which + :param kwargs: may be used for additional parameters that the actual implementation could + find useful. + :return: List of FetchInfo compatible instances which provide information about what was previously fetched, in the order of the input refspecs. :note: even if the operation fails, one of the returned FetchInfo instances may still contain errors or failures in only part of the refspecs. @@ -437,7 +439,7 @@ def fetch(self, url, refspecs, progress=None): """ raise NotImplementedError() - def push(self, url, refspecs, progress=None): + def push(self, url, refspecs, progress=None, **kwargs): """Transport the objects identified by the given refspec to the remote at the given url. :param url: Decribes the location which is to receive the objects @@ -445,6 +447,8 @@ def push(self, url, refspecs, progress=None): :param refspecs: iterable of refspecs strings or RefSpec instances to identify the objects to push :param progress: see fetch() + :param kwargs: additional arguments which may be provided by the caller + as they may be useful to the actual implementation :todo: what to return ? :raise: if any issue arises during transport or if the url cannot be handled""" raise NotImplementedError() diff --git a/gitdb/ref/__init__.py b/gitdb/ref/__init__.py new file mode 100644 index 0000000..9223f3a --- /dev/null +++ b/gitdb/ref/__init__.py @@ -0,0 +1,21 @@ + +# import all modules in order, fix the names they require +from symbolic import * +from reference import * +from head import * +from tag import * +from remote import * + +# name fixes +import head +head.Head.RemoteReferenceCls = RemoteReference +del(head) + + +import symbolic +for item in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): + setattr(symbolic.SymbolicReference, item.__name__+'Cls', item) +del(symbolic) + + +from log import * diff --git a/gitdb/ref/head.py b/gitdb/ref/head.py new file mode 100644 index 0000000..832c273 --- /dev/null +++ b/gitdb/ref/head.py @@ -0,0 +1,106 @@ +from symbolic import SymbolicReference +from reference import Reference +from gitdb.config import SectionConstraint +from gitdb.util import join_path + +__all__ = ["HEAD", "Head"] + + + +class HEAD(SymbolicReference): + """Special case of a Symbolic Reference as it represents the repository's + HEAD reference.""" + _HEAD_NAME = 'HEAD' + _ORIG_HEAD_NAME = 'ORIG_HEAD' + __slots__ = tuple() + + def __init__(self, repo, path=_HEAD_NAME): + if path != self._HEAD_NAME: + raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) + super(HEAD, self).__init__(repo, path) + + def orig_head(self): + """ + :return: SymbolicReference pointing at the ORIG_HEAD, which is maintained + to contain the previous value of HEAD""" + return SymbolicReference(self.repo, self._ORIG_HEAD_NAME) + + +class Head(Reference): + """A Head is a named reference to a Commit""" + _common_path_default = "refs/heads" + k_config_remote = "remote" + k_config_remote_ref = "merge" # branch to merge from remote + + # will be set by init method ! + RemoteReferenceCls = None + + #{ Configuration + + def set_tracking_branch(self, remote_reference): + """ + Configure this branch to track the given remote reference. This will alter + this branch's configuration accordingly. + + :param remote_reference: The remote reference to track or None to untrack + any references + :return: self""" + if remote_reference is not None and not isinstance(remote_reference, self.RemoteReferenceCls): + raise ValueError("Incorrect parameter type: %r" % remote_reference) + # END handle type + + writer = self.config_writer() + if remote_reference is None: + writer.remove_option(self.k_config_remote) + writer.remove_option(self.k_config_remote_ref) + if len(writer.options()) == 0: + writer.remove_section() + # END handle remove section + else: + writer.set_value(self.k_config_remote, remote_reference.remote_name) + writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head)) + # END handle ref value + + return self + + def tracking_branch(self): + """ + :return: The remote_reference we are tracking, or None if we are + not a tracking branch""" + reader = self.config_reader() + if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref): + ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref))) + remote_refpath = self.RemoteReferenceCls.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name)) + return self.RemoteReferenceCls(self.repo, remote_refpath) + # END handle have tracking branch + + # we are not a tracking branch + return None + + + #{ Configruation + + def _config_parser(self, read_only): + if read_only: + parser = self.repo.config_reader() + else: + parser = self.repo.config_writer() + # END handle parser instance + + return SectionConstraint(parser, 'branch "%s"' % self.name) + + def config_reader(self): + """ + :return: A configuration parser instance constrained to only read + this instance's values""" + return self._config_parser(read_only=True) + + def config_writer(self): + """ + :return: A configuration writer instance with read-and write acccess + to options of this head""" + return self._config_parser(read_only=False) + + #} END configuration + + diff --git a/gitdb/ref/log.py b/gitdb/ref/log.py new file mode 100644 index 0000000..defe124 --- /dev/null +++ b/gitdb/ref/log.py @@ -0,0 +1,279 @@ +from gitdb.util import ( + join_path, + Actor, + LockedFD, + LockFile, + assure_directory_exists, + to_native_path, + bin_to_hex, + join, + file_contents_ro_filepath + ) + +from gitdb.object.util import ( + parse_date, + Serializable, + utctz_to_altz, + altz_to_utctz_str, + ) + +import time +import os +import re + +__all__ = ["RefLog", "RefLogEntry"] + + +class RefLogEntry(tuple): + """Named tuple allowing easy access to the revlog data fields""" + _fmt = "%s %s %s <%s> %i %s\t%s\n" + _re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + __slots__ = tuple() + + def __repr__(self): + """Representation of ourselves in git reflog format""" + act = self.actor + time = self.time + return self._fmt % (self.oldhexsha, self.newhexsha, act.name, act.email, + time[0], altz_to_utctz_str(time[1]), self.message) + + @property + def oldhexsha(self): + """The hexsha to the commit the ref pointed to before the change""" + return self[0] + + @property + def newhexsha(self): + """The hexsha to the commit the ref now points to, after the change""" + return self[1] + + @property + def actor(self): + """Actor instance, providing access""" + return self[2] + + @property + def time(self): + """time as tuple: + + * [0] = int(time) + * [1] = int(timezone_offset) in time.altzone format """ + return self[3] + + @property + def message(self): + """Message describing the operation that acted on the reference""" + return self[4] + + @classmethod + def new(self, oldhexsha, newhexsha, actor, time, tz_offset, message): + """:return: New instance of a RefLogEntry""" + if not isinstance(actor, Actor): + raise ValueError("Need actor instance, got %s" % actor) + # END check types + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), message)) + + @classmethod + def from_line(cls, line): + """:return: New RefLogEntry instance from the given revlog line. + :param line: line without trailing newline + :raise ValueError: If line could not be parsed""" + try: + info, msg = line.split('\t', 2) + except ValueError: + raise ValueError("line is missing tab separator") + #END handle first plit + oldhexsha = info[:40] + newhexsha = info[41:81] + for hexsha in (oldhexsha, newhexsha): + if not cls._re_hexsha_only.match(hexsha): + raise ValueError("Invalid hexsha: %s" % hexsha) + # END if hexsha re doesn't match + #END for each hexsha + + email_end = info.find('>', 82) + if email_end == -1: + raise ValueError("Missing token: >") + #END handle missing end brace + + actor = Actor._from_string(info[82:email_end+1]) + time, tz_offset = parse_date(info[email_end+2:]) + + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) + + +class RefLog(list, Serializable): + """A reflog contains reflog entries, each of which defines a certain state + of the head in question. Custom query methods allow to retrieve log entries + by date or by other criteria. + + Reflog entries are orded, the first added entry is first in the list, the last + entry, i.e. the last change of the head or reference, is last in the list.""" + + __slots__ = ('_path', ) + + def __new__(cls, filepath=None): + inst = super(RefLog, cls).__new__(cls) + return inst + + def __init__(self, filepath=None): + """Initialize this instance with an optional filepath, from which we will + initialize our data. The path is also used to write changes back using + the write() method""" + self._path = filepath + if filepath is not None: + self._read_from_file() + # END handle filepath + + def _read_from_file(self): + fmap = file_contents_ro_filepath(self._path, stream=False, allow_mmap=True) + try: + self._deserialize(fmap) + finally: + fmap.close() + #END handle closing of handle + + #{ Interface + + @classmethod + def from_file(cls, filepath): + """ + :return: a new RefLog instance containing all entries from the reflog + at the given filepath + :param filepath: path to reflog + :raise ValueError: If the file could not be read or was corrupted in some way""" + return cls(filepath) + + @classmethod + def path(cls, ref): + """ + :return: string to absolute path at which the reflog of the given ref + instance would be found. The path is not guaranteed to point to a valid + file though. + :param ref: SymbolicReference instance""" + return join(ref.repo.git_dir, "logs", to_native_path(ref.path)) + + @classmethod + def iter_entries(cls, stream): + """ + :return: Iterator yielding RefLogEntry instances, one for each line read + sfrom the given stream. + :param stream: file-like object containing the revlog in its native format + or basestring instance pointing to a file to read""" + new_entry = RefLogEntry.from_line + if isinstance(stream, basestring): + stream = file_contents_ro_filepath(stream) + #END handle stream type + while True: + line = stream.readline() + if not line: + return + yield new_entry(line.strip()) + #END endless loop + + @classmethod + def entry_at(cls, filepath, index): + """:return: RefLogEntry at the given index + :param filepath: full path to the index file from which to read the entry + :param index: python list compatible index, i.e. it may be negative to + specifiy an entry counted from the end of the list + + :raise IndexError: If the entry didn't exist + + .. note:: This method is faster as it only parses the entry at index, skipping + all other lines. Nonetheless, the whole file has to be read if + the index is negative + """ + fp = open(filepath, 'rb') + if index < 0: + return RefLogEntry.from_line(fp.readlines()[index].strip()) + else: + # read until index is reached + for i in xrange(index+1): + line = fp.readline() + if not line: + break + #END abort on eof + #END handle runup + + if i != index or not line: + raise IndexError + #END handle exception + + return RefLogEntry.from_line(line.strip()) + #END handle index + + def to_file(self, filepath): + """Write the contents of the reflog instance to a file at the given filepath. + :param filepath: path to file, parent directories are assumed to exist""" + lfd = LockedFD(filepath) + assure_directory_exists(filepath, is_file=True) + + fp = lfd.open(write=True, stream=True) + try: + self._serialize(fp) + lfd.commit() + except: + # on failure it rolls back automatically, but we make it clear + lfd.rollback() + raise + #END handle change + + @classmethod + def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): + """Append a new log entry to the revlog at filepath. + + :param config_reader: configuration reader of the repository - used to obtain + user information. May be None + :param filepath: full path to the log file + :param oldbinsha: binary sha of the previous commit + :param newbinsha: binary sha of the current commit + :param message: message describing the change to the reference + :param write: If True, the changes will be written right away. Otherwise + the change will not be written + :return: RefLogEntry objects which was appended to the log + :note: As we are append-only, concurrent access is not a problem as we + do not interfere with readers.""" + if len(oldbinsha) != 20 or len(newbinsha) != 20: + raise ValueError("Shas need to be given in binary format") + #END handle sha type + assure_directory_exists(filepath, is_file=True) + entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) + + lf = LockFile(filepath) + lf._obtain_lock_or_raise() + + fd = open(filepath, 'a') + try: + fd.write(repr(entry)) + finally: + fd.close() + lf._release_lock() + #END handle write operation + + return entry + + def write(self): + """Write this instance's data to the file we are originating from + :return: self""" + if self._path is None: + raise ValueError("Instance was not initialized with a path, use to_file(...) instead") + #END assert path + self.to_file(self._path) + return self + + #} END interface + + #{ Serializable Interface + def _serialize(self, stream): + lm1 = len(self) - 1 + write = stream.write + + # write all entries + for e in self: + write(repr(e)) + #END for each entry + + def _deserialize(self, stream): + self.extend(self.iter_entries(stream)) + #} END serializable interface diff --git a/gitdb/ref/reference.py b/gitdb/ref/reference.py new file mode 100644 index 0000000..b51b367 --- /dev/null +++ b/gitdb/ref/reference.py @@ -0,0 +1,81 @@ +import os + +from symbolic import SymbolicReference +from gitdb.util import ( + LazyMixin, + Iterable, + isfile, + hex_to_bin + ) + +__all__ = ["Reference"] + + +class Reference(SymbolicReference, LazyMixin, Iterable): + """Represents a named reference to any object. Subclasses may apply restrictions though, + i.e. Heads can only point to commits.""" + __slots__ = tuple() + _points_to_commits_only = False + _resolve_ref_on_create = True + _common_path_default = "refs" + + def __init__(self, repo, path): + """Initialize this instance + :param repo: Our parent repository + + :param path: + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master""" + if not path.startswith(self._common_path_default+'/'): + raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path )) + super(Reference, self).__init__(repo, path) + + + def __str__(self): + return self.name + + def set_object(self, object, logmsg = None): + """Special version which checks if the head-log needs an update as well""" + oldbinsha = None + if logmsg is not None: + head = self.repo.head + if not head.is_detached and head.ref == self: + oldbinsha = self.commit.binsha + #END handle commit retrieval + #END handle message is set + + super(Reference, self).set_object(object, logmsg) + + if oldbinsha is not None: + # /* from refs.c in git-source + # * Special hack: If a branch is updated directly and HEAD + # * points to it (may happen on the remote side of a push + # * for example) then logically the HEAD reflog should be + # * updated too. + # * A generic solution implies reverse symref information, + # * but finding all symrefs pointing to the given branch + # * would be rather costly for this rare event (the direct + # * update of a branch) to be worth it. So let's cheat and + # * check with HEAD only which should cover 99% of all usage + # * scenarios (even 100% of the default ones). + # */ + self.repo.head.log_append(oldbinsha, logmsg) + #END check if the head + + # NOTE: Don't have to overwrite properties as the will only work without a the log + + @property + def name(self): + """:return: (shortest) Name of this reference - it may contain path components""" + # first two path tokens are can be removed as they are + # refs/heads or refs/tags or refs/remotes + tokens = self.path.split('/') + if len(tokens) < 3: + return self.path # could be refs/HEAD + return '/'.join(tokens[2:]) + + @classmethod + def iter_items(cls, repo, common_path = None): + """Equivalent to SymbolicReference.iter_items, but will return non-detached + references as well.""" + return cls._iter_items(repo, common_path) diff --git a/gitdb/ref/remote.py b/gitdb/ref/remote.py new file mode 100644 index 0000000..5a141f1 --- /dev/null +++ b/gitdb/ref/remote.py @@ -0,0 +1,47 @@ +from head import Head +from gitdb.util import ( + join, + join_path + ) + + + +__all__ = ["RemoteReference"] + + +class RemoteReference(Head): + """Represents a reference pointing to a remote head.""" + _common_path_default = "refs/remotes" + + + @classmethod + def iter_items(cls, repo, common_path = None, remote=None): + """Iterate remote references, and if given, constrain them to the given remote""" + common_path = common_path or cls._common_path_default + if remote is not None: + common_path = join_path(common_path, str(remote)) + # END handle remote constraint + return super(RemoteReference, cls).iter_items(repo, common_path) + + @property + def remote_name(self): + """ + :return: + Name of the remote we are a reference of, such as 'origin' for a reference + named 'origin/master'""" + tokens = self.path.split('/') + # /refs/remotes// + return tokens[2] + + @property + def remote_head(self): + """:return: Name of the remote head itself, i.e. master. + :note: The returned name is usually not qualified enough to uniquely identify + a branch""" + tokens = self.path.split('/') + return '/'.join(tokens[3:]) + + @classmethod + def create(cls, *args, **kwargs): + """Used to disable this method""" + raise TypeError("Cannot explicitly create remote references") diff --git a/gitdb/ref/symbolic.py b/gitdb/ref/symbolic.py new file mode 100644 index 0000000..3db98f4 --- /dev/null +++ b/gitdb/ref/symbolic.py @@ -0,0 +1,642 @@ +import os +import re + +from gitdb.object import ( + Object, + Commit + ) +from gitdb.util import ( + join_path, + join_path_native, + to_native_path_linux, + assure_directory_exists, + join, + dirname, + isdir, + exists, + isfile, + rename, + hex_to_bin, + LockedFD + ) + +from gitdb.exc import BadObject +from log import RefLog + +__all__ = ["SymbolicReference"] + +class SymbolicReference(object): + """Represents a special case of a reference such that this reference is symbolic. + It does not point to a specific commit, but to another Head, which itself + specifies a commit. + + A typical example for a symbolic reference is HEAD.""" + __slots__ = ("odb", "path") + _resolve_ref_on_create = False + _points_to_commits_only = True + _common_path_default = "" + _id_attribute_ = "name" + + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + + #{ Configuration + # Object class to be used when instantiating objects + ObjectCls = Object + + # all of the following are set by the package initializer + HEADCls = None + HeadCls = None + RemoteReferenceCls = None + TagReferenceCls = None + ReferenceCls = None + #}END configuration + + def __init__(self, odb, path): + self.odb = odb + self.path = path + + def __str__(self): + return self.path + + def __repr__(self): + return '' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + return self.path == other.path + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + :return: + In case of symbolic references, the shortest assumable name + is the path itself.""" + return self.path + + @property + def abspath(self): + return join_path_native(self.odb.git_dir, self.path) + + @classmethod + def _get_packed_refs_path(cls, odb): + return join(odb.git_dir, 'packed-refs') + + @classmethod + def _iter_packed_refs(cls, odb): + """Returns an iterator yielding pairs of sha1/path pairs for the corresponding refs. + :note: The packed refs file will be kept open as long as we iterate""" + try: + fp = open(cls._get_packed_refs_path(odb), 'r') + for line in fp: + line = line.strip() + if not line: + continue + if line.startswith('#'): + if line.startswith('# pack-refs with:') and not line.endswith('peeled'): + raise TypeError("PackingType of packed-Refs not understood: %r" % line) + # END abort if we do not understand the packing scheme + continue + # END parse comment + + # skip dereferenced tag object entries - previous line was actual + # tag reference for it + if line[0] == '^': + continue + + yield tuple(line.split(' ', 1)) + # END for each line + except (OSError,IOError): + raise StopIteration + # END no packed-refs file handling + # NOTE: Had try-finally block around here to close the fp, + # but some python version woudn't allow yields within that. + # I believe files are closing themselves on destruction, so it is + # alright. + + @classmethod + def dereference_recursive(cls, odb, ref_path): + """ + :return: hexsha stored in the reference at the given ref_path, recursively dereferencing all + intermediate references as required + :param odb: the repository containing the reference at ref_path""" + while True: + hexsha, ref_path = cls._get_ref_info(odb, ref_path) + if hexsha is not None: + return hexsha + # END recursive dereferencing + + @classmethod + def _get_ref_info(cls, odb, ref_path): + """Return: (sha, target_ref_path) if available, the sha the file at + rela_path points to, or None. target_ref_path is the reference we + point to, or None""" + tokens = None + try: + fp = open(join(odb.git_dir, ref_path), 'r') + value = fp.read().rstrip() + fp.close() + tokens = value.split(" ") + except (OSError,IOError): + # Probably we are just packed, find our entry in the packed refs file + # NOTE: We are not a symbolic ref if we are in a packed file, as these + # are excluded explictly + for sha, path in cls._iter_packed_refs(odb): + if path != ref_path: continue + tokens = (sha, path) + break + # END for each packed ref + # END handle packed refs + if tokens is None: + raise ValueError("Reference at %r does not exist" % ref_path) + + # is it a reference ? + if tokens[0] == 'ref:': + return (None, tokens[1]) + + # its a commit + if re_hexsha_only.match(tokens[0]): + return (tokens[0], None) + + raise ValueError("Failed to parse reference information from %r" % ref_path) + + def _get_object_sha(self): + """ + :return: + The binary sha to the object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query""" + return hex_to_bin(self.dereference_recursive(self.odb, self.path)) + + def _get_object(self): + """ + :return: + The object our ref currently refers to.""" + # have to be dynamic here as we may be a tag which can point to anything + # Our path will be resolved to the hexsha which will be used accordingly + return self.ObjectCls.new_from_sha(self._get_object_sha()) + + def set_object(self, object_id, logmsg = None): + """Set the object we point to, possibly dereference our symbolic reference first. + If the reference does not exist, it will be created + + :param object: a reference specifier string, a SymbolicReference or an object hex sha. + SymbolicReferences will be dereferenced beforehand to obtain the object they point to + :param logmsg: If not None, the message will be used in the reflog entry to be + written. Otherwise the reflog is not altered + :note: plain SymbolicReferences may not actually point to objects by convention + :return: self""" + if isinstance(object_id, SymbolicReference): + object = object.object + #END resolve references + + is_detached = True + try: + is_detached = self.is_detached + except ValueError: + pass + # END handle non-existing ones + + if is_detached: + return self.set_reference(object_id, logmsg) + + # set the commit on our reference + return self._get_reference().set_object(object_id, logmsg) + + def _get_commit(self): + """ + :return: + Commit object we point to, works for detached and non-detached + SymbolicReferences. The symbolic reference will be dereferenced recursively.""" + obj = self._get_object() + if obj.type == 'tag': + obj = obj.object + #END dereference tag + + if obj.type != Commit.type: + raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj) + #END handle type + return obj + + def set_commit(self, commit, logmsg = None): + """As set_object, but restricts the type of object to be a Commit + + :raise ValueError: If commit is not a Commit object or doesn't point to + a commit + :return: self""" + # check the type - assume the best if it is a base-string + is_invalid_type = False + if isinstance(commit, Object): + is_invalid_type = commit.type != Commit.type + elif isinstance(commit, SymbolicReference): + is_invalid_type = commit.object.type != Commit.type + else: + try: + is_invalid_type = self.odb.rev_parse(commit).type != Commit.type + except BadObject: + raise ValueError("Invalid object: %s" % commit) + #END handle exception + # END verify type + + if is_invalid_type: + raise ValueError("Need commit, got %r" % commit) + #END handle raise + + # we leave strings to the rev-parse method below + self.set_object(commit, logmsg) + + return self + + + commit = property(_get_commit, set_commit, doc="Query or set commits directly") + object = property(_get_object, set_object, doc="Return the object our ref currently refers to") + object_binsha = property(_get_object_sha, set_object, doc="Return the object our ref currently refers to") + + def _get_reference(self): + """:return: Reference Object we point to + :raise TypeError: If this symbolic reference is detached, hence it doesn't point + to a reference, but to a commit""" + sha, target_ref_path = self._get_ref_info(self.odb, self.path) + if target_ref_path is None: + raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) + return self.from_path(self.odb, target_ref_path) + + def set_reference(self, ref, logmsg = None): + """Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. + Otherwise an Object, given as Object instance or refspec, is assumed and if valid, + will be set which effectively detaches the refererence if it was a purely + symbolic one. + + :param ref: SymbolicReference instance, hexadecimal sha string or refspec string + Only if the ref is a SymbolicRef instance, we will point to it. Everthiny + else is dereferenced to obtain the actual object. + :param logmsg: If set to a string, the message will be used in the reflog. + Otherwise, a reflog entry is not written for the changed reference. + The previous commit of the entry will be the commit we point to now. + + See also: log_append() + + :return: self + :note: This symbolic reference will not be dereferenced. For that, see + ``set_object(...)``""" + write_value = None + obj = None + if isinstance(ref, SymbolicReference): + write_value = "ref: %s" % ref.path + elif isinstance(ref, Object): + obj = ref + write_value = ref.hexsha + elif isinstance(ref, basestring): + try: + obj = self.odb.rev_parse(ref+"^{}") # optionally deref tags + write_value = obj.hexsha + except BadObject: + raise ValueError("Could not extract object from %s" % ref) + # END end try string + else: + raise ValueError("Unrecognized Value: %r" % ref) + # END try commit attribute + + # typecheck + if obj is not None and self._points_to_commits_only and obj.type != Commit.type: + raise TypeError("Require commit, got %r" % obj) + #END verify type + + oldbinsha = None + if logmsg is not None: + try: + oldbinsha = self.commit.binsha + except ValueError: + oldbinsha = Commit.NULL_BIN_SHA + #END handle non-existing + #END retrieve old hexsha + + fpath = self.abspath + assure_directory_exists(fpath, is_file=True) + + lfd = LockedFD(fpath) + fd = lfd.open(write=True, stream=True) + fd.write(write_value) + lfd.commit() + + # Adjust the reflog + if logmsg is not None: + self.log_append(oldbinsha, logmsg) + #END handle reflog + + return self + + + # aliased reference + reference = property(_get_reference, set_reference, doc="Returns the Reference we point to") + ref = reference + + def is_valid(self): + """ + :return: + True if the reference is valid, hence it can be read and points to + a valid object or reference.""" + try: + self.object + except (OSError, ValueError): + return False + else: + return True + + @property + def is_detached(self): + """ + :return: + True if we are a detached reference, hence we point to a specific commit + instead to another reference""" + try: + self.ref + return False + except TypeError: + return True + + def log(self): + """ + :return: RefLog for this reference. Its last entry reflects the latest change + applied to this reference + + .. note:: As the log is parsed every time, its recommended to cache it for use + instead of calling this method repeatedly. It should be considered read-only.""" + return RefLog.from_file(RefLog.path(self)) + + def log_append(self, oldbinsha, message, newbinsha=None): + """Append a logentry to the logfile of this ref + + :param oldbinsha: binary sha this ref used to point to + :param message: A message describing the change + :param newbinsha: The sha the ref points to now. If None, our current commit sha + will be used + :return: added RefLogEntry instance""" + return RefLog.append_entry(self.odb.config_reader(), RefLog.path(self), oldbinsha, + (newbinsha is None and self.commit.binsha) or newbinsha, + message) + + def log_entry(self, index): + """:return: RefLogEntry at the given index + :param index: python list compatible positive or negative index + + .. note:: This method must read part of the reflog during execution, hence + it should be used sparringly, or only if you need just one index. + In that case, it will be faster than the ``log()`` method""" + return RefLog.entry_at(RefLog.path(self), index) + + @classmethod + def to_full_path(cls, path): + """ + :return: string with a full repository-relative path which can be used to initialize + a Reference instance, for instance by using ``Reference.from_path``""" + if isinstance(path, SymbolicReference): + path = path.path + full_ref_path = path + if not cls._common_path_default: + return full_ref_path + if not path.startswith(cls._common_path_default+"/"): + full_ref_path = '%s/%s' % (cls._common_path_default, path) + return full_ref_path + + @classmethod + def delete(cls, odb, path): + """Delete the reference at the given path + + :param odb: + Repository to delete the reference from + + :param path: + Short or full path pointing to the reference, i.e. refs/myreference + or just "myreference", hence 'refs/' is implied. + Alternatively the symbolic reference to be deleted""" + full_ref_path = cls.to_full_path(path) + abs_path = join(odb.git_dir, full_ref_path) + if exists(abs_path): + os.remove(abs_path) + else: + # check packed refs + pack_file_path = cls._get_packed_refs_path(odb) + try: + reader = open(pack_file_path) + except (OSError,IOError): + pass # it didnt exist at all + else: + new_lines = list() + made_change = False + dropped_last_line = False + for line in reader: + # keep line if it is a comment or if the ref to delete is not + # in the line + # If we deleted the last line and this one is a tag-reference object, + # we drop it as well + if ( line.startswith('#') or full_ref_path not in line ) and \ + ( not dropped_last_line or dropped_last_line and not line.startswith('^') ): + new_lines.append(line) + dropped_last_line = False + continue + # END skip comments and lines without our path + + # drop this line + made_change = True + dropped_last_line = True + # END for each line in packed refs + reader.close() + + # write the new lines + if made_change: + open(pack_file_path, 'w').writelines(new_lines) + # END open exception handling + # END handle deletion + + # delete the reflog + reflog_path = RefLog.path(cls(odb, full_ref_path)) + if os.path.isfile(reflog_path): + os.remove(reflog_path) + #END remove reflog + + + @classmethod + def _create(cls, odb, path, resolve, reference, force, logmsg=None): + """internal method used to create a new symbolic reference. + If resolve is False, the reference will be taken as is, creating + a proper symbolic reference. Otherwise it will be resolved to the + corresponding object and a detached symbolic reference will be created + instead""" + full_ref_path = cls.to_full_path(path) + abs_ref_path = join(odb.git_dir, full_ref_path) + + # figure out target data + target = reference + if resolve: + target = odb.rev_parse(str(reference)) + + if not force and isfile(abs_ref_path): + target_data = str(target) + if isinstance(target, SymbolicReference): + target_data = target.path + if not resolve: + target_data = "ref: " + target_data + existing_data = open(abs_ref_path, 'rb').read().strip() + if existing_data != target_data: + raise OSError("Reference at %r does already exist, pointing to %r, requested was %r" % (full_ref_path, existing_data, target_data)) + # END no force handling + + ref = cls(odb, full_ref_path) + ref.set_reference(target, logmsg) + return ref + + @classmethod + def create(cls, odb, path, reference='HEAD', force=False, logmsg=None): + """Create a new symbolic reference, hence a reference pointing to another reference. + + :param odb: + Repository to create the reference in + + :param path: + full path at which the new symbolic reference is supposed to be + created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" + + :param reference: + The reference to which the new symbolic reference should point to. + If it is a commit'ish, the symbolic ref will be detached. + + :param force: + if True, force creation even if a symbolic reference with that name already exists. + Raise OSError otherwise + + :param logmsg: + If not None, the message to append to the reflog. Otherwise no reflog + entry is written. + + :return: Newly created symbolic Reference + + :raise OSError: + If a (Symbolic)Reference with the same name but different contents + already exists. + + :note: This does not alter the current HEAD, index or Working Tree""" + return cls._create(odb, path, cls._resolve_ref_on_create, reference, force, logmsg) + + def rename(self, new_path, force=False): + """Rename self to a new path + + :param new_path: + Either a simple name or a full path, i.e. new_name or features/new_name. + The prefix refs/ is implied for references and will be set as needed. + In case this is a symbolic ref, there is no implied prefix + + :param force: + If True, the rename will succeed even if a head with the target name + already exists. It will be overwritten in that case + + :return: self + :raise OSError: In case a file at path but a different contents already exists """ + new_path = self.to_full_path(new_path) + if self.path == new_path: + return self + + new_abs_path = join(self.odb.git_dir, new_path) + cur_abs_path = join(self.odb.git_dir, self.path) + if isfile(new_abs_path): + if not force: + # if they point to the same file, its not an error + if open(new_abs_path,'rb').read().strip() != open(cur_abs_path,'rb').read().strip(): + raise OSError("File at path %r already exists" % new_abs_path) + # else: we could remove ourselves and use the otherone, but + # but clarity we just continue as usual + # END not force handling + os.remove(new_abs_path) + # END handle existing target file + + dname = dirname(new_abs_path) + if not isdir(dname): + os.makedirs(dname) + # END create directory + + rename(cur_abs_path, new_abs_path) + self.path = new_path + + return self + + @classmethod + def _iter_items(cls, odb, common_path = None): + if common_path is None: + common_path = cls._common_path_default + rela_paths = set() + + # walk loose refs + # Currently we do not follow links + for root, dirs, files in os.walk(join_path_native(odb.git_dir, common_path)): + if 'refs/' not in root: # skip non-refs subfolders + refs_id = [ i for i,d in enumerate(dirs) if d == 'refs' ] + if refs_id: + dirs[0:] = ['refs'] + # END prune non-refs folders + + for f in files: + abs_path = to_native_path_linux(join_path(root, f)) + rela_paths.add(abs_path.replace(to_native_path_linux(odb.git_dir) + '/', "")) + # END for each file in root directory + # END for each directory to walk + + # read packed refs + for sha, rela_path in cls._iter_packed_refs(odb): + if rela_path.startswith(common_path): + rela_paths.add(rela_path) + # END relative path matches common path + # END packed refs reading + + # return paths in sorted order + for path in sorted(rela_paths): + try: + yield cls.from_path(odb, path) + except ValueError: + continue + # END for each sorted relative refpath + + @classmethod + def iter_items(cls, odb, common_path = None): + """Find all refs in the repository + + :param odb: is the odb + + :param common_path: + Optional keyword argument to the path which is to be shared by all + returned Ref objects. + Defaults to class specific portion if None assuring that only + refs suitable for the actual class are returned. + + :return: + git.SymbolicReference[], each of them is guaranteed to be a symbolic + ref which is not detached. + + List is lexigraphically sorted + The returned objects represent actual subclasses, such as Head or TagReference""" + return ( r for r in cls._iter_items(odb, common_path) if r.__class__ == cls or not r.is_detached ) + + @classmethod + def from_path(cls, odb, path): + """ + :param path: full .git-directory-relative path name to the Reference to instantiate + :note: use to_full_path() if you only have a partial path of a known Reference Type + :return: + Instance of type Reference, Head, or Tag + depending on the given path""" + if not path: + raise ValueError("Cannot create Reference from %r" % path) + + for ref_type in (cls.HEADCls, cls.HeadCls, cls.RemoteReferenceCls, cls.TagReferenceCls, cls.ReferenceCls, cls): + try: + instance = ref_type(odb, path) + if instance.__class__ == SymbolicReference and instance.is_detached: + raise ValueError("SymbolRef was detached, we drop it") + return instance + except ValueError: + pass + # END exception handling + # END for each type to try + raise ValueError("Could not find reference type suitable to handle path %r" % path) diff --git a/gitdb/ref/tag.py b/gitdb/ref/tag.py new file mode 100644 index 0000000..89e67ac --- /dev/null +++ b/gitdb/ref/tag.py @@ -0,0 +1,50 @@ +from reference import Reference + +__all__ = ["TagReference", "Tag"] + + + +class TagReference(Reference): + """Class representing a lightweight tag reference which either points to a commit + ,a tag object or any other object. In the latter case additional information, + like the signature or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagReference.list_items(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message""" + + __slots__ = tuple() + _common_path_default = "refs/tags" + + @property + def commit(self): + """:return: Commit object the tag ref points to""" + obj = self.object + if obj.type == "commit": + return obj + elif obj.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return obj.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + + @property + def tag(self): + """ + :return: Tag object this tag ref points to or None in case + we are a light weight tag""" + obj = self.object + if obj.type == "tag": + return obj + return None + + # make object read-only + # It should be reasonably hard to adjust an existing tag + object = property(Reference._get_object) + +# provide an alias +Tag = TagReference diff --git a/gitdb/test/db/test_pack.py b/gitdb/test/db/test_pack.py index e8ba6f8..0e4d3b7 100644 --- a/gitdb/test/db/test_pack.py +++ b/gitdb/test/db/test_pack.py @@ -13,7 +13,6 @@ class TestPackDB(TestDBBase): - @with_rw_directory @with_packs_rw def test_writing(self, path): pdb = PackedDB(path) diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py index 50645be..d411ac9 100644 --- a/gitdb/test/lib.py +++ b/gitdb/test/lib.py @@ -5,6 +5,7 @@ """Utilities used in ODB testing""" from gitdb import ( OStream, + GitDB ) from gitdb.stream import ( Sha1Writer, @@ -26,14 +27,6 @@ import gc -#{ Bases - -class TestBase(unittest.TestCase): - """Base class for all tests""" - - -#} END bases - #{ Decorators def with_rw_directory(func): @@ -65,9 +58,30 @@ def wrapper(self): return wrapper +def with_rw_repo(func): + """Create a copy of our repository and put it into a writable location. It will + be removed if the test doesn't result in an error. + As we can currently only copy the fully working tree, tests must not rely on + being on a certain branch or on anything really except for the default tags + that should exist + Wrapped function obtains a git repository """ + def wrapper(self, path): + src_dir = os.path.dirname(os.path.dirname(__file__)) + assert(os.path.isdir(path)) + os.rmdir(path) # created by wrapper + shutil.copytree(src_dir, path) + return func(self, GitDB(os.path.join(path, '.git'))) + #END wrapper + wrapper.__name__ = func.__name__ + return with_rw_directory(wrapper) + + + def with_packs_rw(func): """Function that provides a path into which the packs for testing should be - copied. Will pass on the path to the actual function afterwards""" + copied. Will pass on the path to the actual function afterwards + + :note: needs with_rw_directory wrapped around it""" def wrapper(self, path): src_pack_glob = fixture_path('packs/*') copy_files_globbed(src_pack_glob, path, hard_link_ok=True) @@ -75,12 +89,27 @@ def wrapper(self, path): # END wrapper wrapper.__name__ = func.__name__ - return wrapper + return with_rw_directory(wrapper) #} END decorators #{ Routines +def repo_dir(): + """:return: path to our own repository, being our own .git directory. + :note: doesn't work in bare repositories""" + base = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), '.git') + assert os.path.isdir(base) + return base + + +def maketemp(*args): + """Wrapper around default tempfile.mktemp to fix an osx issue""" + tdir = tempfile.mktemp(*args) + if sys.platform == 'darwin': + tdir = '/private' + tdir + return tdir + def fixture_path(relapath=''): """:return: absolute path into the fixture directory :param relapath: relative path into the fixtures directory, or '' @@ -160,3 +189,11 @@ def _assert(self): #} END stream utilitiess +#{ Bases + +class TestBase(unittest.TestCase): + """Base class for all tests""" + rorepo = GitDB(repo_dir()) + +#} END bases + diff --git a/gitdb/test/test_refs.py b/gitdb/test/test_refs.py new file mode 100644 index 0000000..cd6a8b9 --- /dev/null +++ b/gitdb/test/test_refs.py @@ -0,0 +1,523 @@ +# test_refs.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from gitdb.test.lib import * +from gitdb.ref import * +import gitdb.ref as ref + +from git.util import Actor +from git.objects.tag import TagObject + +from itertools import chain +import os + +class TestRefs(TestBase): + + def test_from_path(self): + # should be able to create any reference directly + for ref_type in ( Reference, Head, TagReference, RemoteReference ): + for name in ('rela_name', 'path/rela_name'): + full_path = ref_type.to_full_path(name) + instance = ref_type.from_path(self.rorepo, full_path) + assert isinstance(instance, ref_type) + # END for each name + # END for each type + + def test_tag_base(self): + tag_object_refs = list() + for tag in self.rorepo.tags: + assert "refs/tags" in tag.path + assert tag.name + assert isinstance( tag.commit, Commit ) + if tag.tag is not None: + tag_object_refs.append( tag ) + tagobj = tag.tag + # have no dict + self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1) + assert isinstance( tagobj, TagObject ) + assert tagobj.tag == tag.name + assert isinstance( tagobj.tagger, Actor ) + assert isinstance( tagobj.tagged_date, int ) + assert isinstance( tagobj.tagger_tz_offset, int ) + assert tagobj.message + assert tag.object == tagobj + # can't assign the object + self.failUnlessRaises(AttributeError, setattr, tag, 'object', tagobj) + # END if we have a tag object + # END for tag in repo-tags + assert tag_object_refs + assert isinstance(self.rorepo.tags['0.1.5'], TagReference) + + def test_tags(self): + # tag refs can point to tag objects or to commits + s = set() + ref_count = 0 + for ref in chain(self.rorepo.tags, self.rorepo.heads): + ref_count += 1 + assert isinstance(ref, ref.Reference) + assert str(ref) == ref.name + assert repr(ref) + assert ref == ref + assert not ref != ref + s.add(ref) + # END for each ref + assert len(s) == ref_count + assert len(s|s) == ref_count + + @with_rw_repo + def test_heads(self, rw_repo): + for head in rw_repo.heads: + assert head.name + assert head.path + assert "refs/heads" in head.path + prev_object = head.object + cur_object = head.object + assert prev_object == cur_object # represent the same git object + assert prev_object is not cur_object # but are different instances + + writer = head.config_writer() + tv = "testopt" + writer.set_value(tv, 1) + assert writer.get_value(tv) == 1 + del(writer) + assert head.config_reader().get_value(tv) == 1 + head.config_writer().remove_option(tv) + + # after the clone, we might still have a tracking branch setup + head.set_tracking_branch(None) + assert head.tracking_branch() is None + remote_ref = rw_repo.remotes[0].refs[0] + assert head.set_tracking_branch(remote_ref) is head + assert head.tracking_branch() == remote_ref + head.set_tracking_branch(None) + assert head.tracking_branch() is None + # END for each head + + # verify REFLOG gets altered + head = rw_repo.head + cur_head = head.ref + cur_commit = cur_head.commit + pcommit = cur_head.commit.parents[0].parents[0] + hlog_len = len(head.log()) + blog_len = len(cur_head.log()) + assert head.set_reference(pcommit, 'detached head') is head + # one new log-entry + thlog = head.log() + assert len(thlog) == hlog_len + 1 + assert thlog[-1].oldhexsha == cur_commit.hexsha + assert thlog[-1].newhexsha == pcommit.hexsha + + # the ref didn't change though + assert len(cur_head.log()) == blog_len + + # head changes once again, cur_head doesn't change + head.set_reference(cur_head, 'reattach head') + assert len(head.log()) == hlog_len+2 + assert len(cur_head.log()) == blog_len + + # adjusting the head-ref also adjust the head, so both reflogs are + # altered + cur_head.set_commit(pcommit, 'changing commit') + assert len(cur_head.log()) == blog_len+1 + assert len(head.log()) == hlog_len+3 + + + # with automatic dereferencing + assert head.set_commit(cur_commit, 'change commit once again') is head + assert len(head.log()) == hlog_len+4 + assert len(cur_head.log()) == blog_len+2 + + # a new branch has just a single entry + other_head = Head.create(rw_repo, 'mynewhead', pcommit, logmsg='new head created') + log = other_head.log() + assert len(log) == 1 + assert log[0].oldhexsha == pcommit.NULL_HEX_SHA + assert log[0].newhexsha == pcommit.hexsha + + + def test_refs(self): + types_found = set() + for ref in self.rorepo.refs: + types_found.add(type(ref)) + assert len(types_found) >= 3 + + def test_is_valid(self): + assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False + assert self.rorepo.head.is_valid() + assert self.rorepo.head.reference.is_valid() + assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False + + def test_orig_head(self): + assert type(self.rorepo.head.orig_head()) == SymbolicReference + + #@with_rw_repo('0.1.6') + # todo reenable + def _disabled_test_head_reset(self, rw_repo): + cur_head = rw_repo.head + old_head_commit = cur_head.commit + new_head_commit = cur_head.ref.commit.parents[0] + cur_head.reset(new_head_commit, index=True) # index only + assert cur_head.reference.commit == new_head_commit + + self.failUnlessRaises(ValueError, cur_head.reset, new_head_commit, index=False, working_tree=True) + new_head_commit = new_head_commit.parents[0] + cur_head.reset(new_head_commit, index=True, working_tree=True) # index + wt + assert cur_head.reference.commit == new_head_commit + + # paths - make sure we have something to do + rw_repo.index.reset(old_head_commit.parents[0]) + cur_head.reset(cur_head, paths = "test") + cur_head.reset(new_head_commit, paths = "lib") + # hard resets with paths don't work, its all or nothing + self.failUnlessRaises(GitCommandError, cur_head.reset, new_head_commit, working_tree=True, paths = "lib") + + # we can do a mixed reset, and then checkout from the index though + cur_head.reset(new_head_commit) + rw_repo.index.checkout(["lib"], force=True)# + + + # now that we have a write write repo, change the HEAD reference - its + # like git-reset --soft + heads = rw_repo.heads + assert heads + for head in heads: + cur_head.reference = head + assert cur_head.reference == head + assert isinstance(cur_head.reference, Head) + assert cur_head.commit == head.commit + assert not cur_head.is_detached + # END for each head + + # detach + active_head = heads[0] + curhead_commit = active_head.commit + cur_head.reference = curhead_commit + assert cur_head.commit == curhead_commit + assert cur_head.is_detached + self.failUnlessRaises(TypeError, getattr, cur_head, "reference") + + # tags are references, hence we can point to them + some_tag = rw_repo.tags[0] + cur_head.reference = some_tag + assert not cur_head.is_detached + assert cur_head.commit == some_tag.commit + assert isinstance(cur_head.reference, TagReference) + + # put HEAD back to a real head, otherwise everything else fails + cur_head.reference = active_head + + # type check + self.failUnlessRaises(ValueError, setattr, cur_head, "reference", "that") + + # head handling + commit = 'HEAD' + prev_head_commit = cur_head.commit + for count, new_name in enumerate(("my_new_head", "feature/feature1")): + actual_commit = commit+"^"*count + new_head = Head.create(rw_repo, new_name, actual_commit) + assert new_head.is_detached + assert cur_head.commit == prev_head_commit + assert isinstance(new_head, Head) + # already exists, but has the same value, so its fine + Head.create(rw_repo, new_name, new_head.commit) + + # its not fine with a different value + self.failUnlessRaises(OSError, Head.create, rw_repo, new_name, new_head.commit.parents[0]) + + # force it + new_head = Head.create(rw_repo, new_name, actual_commit, force=True) + old_path = new_head.path + old_name = new_head.name + + assert new_head.rename("hello").name == "hello" + assert new_head.rename("hello/world").name == "hello/world" + assert new_head.rename(old_name).name == old_name and new_head.path == old_path + + # rename with force + tmp_head = Head.create(rw_repo, "tmphead") + self.failUnlessRaises(GitCommandError, tmp_head.rename, new_head) + tmp_head.rename(new_head, force=True) + assert tmp_head == new_head and tmp_head.object == new_head.object + + logfile = RefLog.path(tmp_head) + assert os.path.isfile(logfile) + Head.delete(rw_repo, tmp_head) + # deletion removes the log as well + assert not os.path.isfile(logfile) + heads = rw_repo.heads + assert tmp_head not in heads and new_head not in heads + # force on deletion testing would be missing here, code looks okay though ;) + # END for each new head name + self.failUnlessRaises(TypeError, RemoteReference.create, rw_repo, "some_name") + + # tag ref + tag_name = "1.0.2" + light_tag = TagReference.create(rw_repo, tag_name) + self.failUnlessRaises(GitCommandError, TagReference.create, rw_repo, tag_name) + light_tag = TagReference.create(rw_repo, tag_name, "HEAD~1", force = True) + assert isinstance(light_tag, TagReference) + assert light_tag.name == tag_name + assert light_tag.commit == cur_head.commit.parents[0] + assert light_tag.tag is None + + # tag with tag object + other_tag_name = "releases/1.0.2RC" + msg = "my mighty tag\nsecond line" + obj_tag = TagReference.create(rw_repo, other_tag_name, message=msg) + assert isinstance(obj_tag, TagReference) + assert obj_tag.name == other_tag_name + assert obj_tag.commit == cur_head.commit + assert obj_tag.tag is not None + + TagReference.delete(rw_repo, light_tag, obj_tag) + tags = rw_repo.tags + assert light_tag not in tags and obj_tag not in tags + + # remote deletion + remote_refs_so_far = 0 + remotes = rw_repo.remotes + assert remotes + for remote in remotes: + refs = remote.refs + RemoteReference.delete(rw_repo, *refs) + remote_refs_so_far += len(refs) + for ref in refs: + assert ref.remote_name == remote.name + # END for each ref to delete + assert remote_refs_so_far + + for remote in remotes: + # remotes without references throw + self.failUnlessRaises(AssertionError, getattr, remote, 'refs') + # END for each remote + + # change where the active head points to + if cur_head.is_detached: + cur_head.reference = rw_repo.heads[0] + + head = cur_head.reference + old_commit = head.commit + head.commit = old_commit.parents[0] + assert head.commit == old_commit.parents[0] + assert head.commit == cur_head.commit + head.commit = old_commit + + # setting a non-commit as commit fails, but succeeds as object + head_tree = head.commit.tree + self.failUnlessRaises(ValueError, setattr, head, 'commit', head_tree) + assert head.commit == old_commit # and the ref did not change + # we allow heds to point to any object + head.object = head_tree + assert head.object == head_tree + # cannot query tree as commit + self.failUnlessRaises(TypeError, getattr, head, 'commit') + + # set the commit directly using the head. This would never detach the head + assert not cur_head.is_detached + head.object = old_commit + cur_head.reference = head.commit + assert cur_head.is_detached + parent_commit = head.commit.parents[0] + assert cur_head.is_detached + cur_head.commit = parent_commit + assert cur_head.is_detached and cur_head.commit == parent_commit + + cur_head.reference = head + assert not cur_head.is_detached + cur_head.commit = parent_commit + assert not cur_head.is_detached + assert head.commit == parent_commit + + # test checkout + active_branch = rw_repo.active_branch + for head in rw_repo.heads: + checked_out_head = head.checkout() + assert checked_out_head == head + # END for each head to checkout + + # checkout with branch creation + new_head = active_branch.checkout(b="new_head") + assert active_branch != rw_repo.active_branch + assert new_head == rw_repo.active_branch + + # checkout with force as we have a changed a file + # clear file + open(new_head.commit.tree.blobs[-1].abspath,'w').close() + assert len(new_head.commit.diff(None)) + + # create a new branch that is likely to touch the file we changed + far_away_head = rw_repo.create_head("far_head",'HEAD~100') + self.failUnlessRaises(GitCommandError, far_away_head.checkout) + assert active_branch == active_branch.checkout(force=True) + assert rw_repo.head.reference != far_away_head + + # test reference creation + partial_ref = 'sub/ref' + full_ref = 'refs/%s' % partial_ref + ref = Reference.create(rw_repo, partial_ref) + assert ref.path == full_ref + assert ref.object == rw_repo.head.commit + + self.failUnlessRaises(OSError, Reference.create, rw_repo, full_ref, 'HEAD~20') + # it works if it is at the same spot though and points to the same reference + assert Reference.create(rw_repo, full_ref, 'HEAD').path == full_ref + Reference.delete(rw_repo, full_ref) + + # recreate the reference using a full_ref + ref = Reference.create(rw_repo, full_ref) + assert ref.path == full_ref + assert ref.object == rw_repo.head.commit + + # recreate using force + ref = Reference.create(rw_repo, partial_ref, 'HEAD~1', force=True) + assert ref.path == full_ref + assert ref.object == rw_repo.head.commit.parents[0] + + # rename it + orig_obj = ref.object + for name in ('refs/absname', 'rela_name', 'feature/rela_name'): + ref_new_name = ref.rename(name) + assert isinstance(ref_new_name, Reference) + assert name in ref_new_name.path + assert ref_new_name.object == orig_obj + assert ref_new_name == ref + # END for each name type + + # References that don't exist trigger an error if we want to access them + self.failUnlessRaises(ValueError, getattr, Reference(rw_repo, "refs/doesntexist"), 'commit') + + # exists, fail unless we force + ex_ref_path = far_away_head.path + self.failUnlessRaises(OSError, ref.rename, ex_ref_path) + # if it points to the same commit it works + far_away_head.commit = ref.commit + ref.rename(ex_ref_path) + assert ref.path == ex_ref_path and ref.object == orig_obj + assert ref.rename(ref.path).path == ex_ref_path # rename to same name + + # create symbolic refs + symref_path = "symrefs/sym" + symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) + assert symref.path == symref_path + assert symref.reference == cur_head.reference + + self.failUnlessRaises(OSError, SymbolicReference.create, rw_repo, symref_path, cur_head.reference.commit) + # it works if the new ref points to the same reference + SymbolicReference.create(rw_repo, symref.path, symref.reference).path == symref.path + SymbolicReference.delete(rw_repo, symref) + # would raise if the symref wouldn't have been deletedpbl + symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) + + # test symbolic references which are not at default locations like HEAD + # or FETCH_HEAD - they may also be at spots in refs of course + symbol_ref_path = "refs/symbol_ref" + symref = SymbolicReference(rw_repo, symbol_ref_path) + assert symref.path == symbol_ref_path + symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path) + + # set it + symref.reference = new_head + assert symref.reference == new_head + assert os.path.isfile(symbol_ref_abspath) + assert symref.commit == new_head.commit + + for name in ('absname','folder/rela_name'): + symref_new_name = symref.rename(name) + assert isinstance(symref_new_name, SymbolicReference) + assert name in symref_new_name.path + assert symref_new_name.reference == new_head + assert symref_new_name == symref + assert not symref.is_detached + # END for each ref + + # create a new non-head ref just to be sure we handle it even if packed + Reference.create(rw_repo, full_ref) + + # test ref listing - assure we have packed refs + rw_repo.git.pack_refs(all=True, prune=True) + heads = rw_repo.heads + assert heads + assert new_head in heads + assert active_branch in heads + assert rw_repo.tags + + # we should be able to iterate all symbolic refs as well - in that case + # we should expect only symbolic references to be returned + for symref in SymbolicReference.iter_items(rw_repo): + assert not symref.is_detached + + # when iterating references, we can get references and symrefs + # when deleting all refs, I'd expect them to be gone ! Even from + # the packed ones + # For this to work, we must not be on any branch + rw_repo.head.reference = rw_repo.head.commit + deleted_refs = set() + for ref in Reference.iter_items(rw_repo): + if ref.is_detached: + ref.delete(rw_repo, ref) + deleted_refs.add(ref) + # END delete ref + # END for each ref to iterate and to delete + assert deleted_refs + + for ref in Reference.iter_items(rw_repo): + if ref.is_detached: + assert ref not in deleted_refs + # END for each ref + + # reattach head - head will not be returned if it is not a symbolic + # ref + rw_repo.head.reference = Head.create(rw_repo, "master") + + # At least the head should still exist + assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD')) + refs = list(SymbolicReference.iter_items(rw_repo)) + assert len(refs) == 1 + + + # test creation of new refs from scratch + for path in ("basename", "dir/somename", "dir2/subdir/basename"): + # REFERENCES + ############ + fpath = Reference.to_full_path(path) + ref_fp = Reference.from_path(rw_repo, fpath) + assert not ref_fp.is_valid() + ref = Reference(rw_repo, fpath) + assert ref == ref_fp + + # can be created by assigning a commit + ref.commit = rw_repo.head.commit + assert ref.is_valid() + + # if the assignment raises, the ref doesn't exist + Reference.delete(ref.repo, ref.path) + assert not ref.is_valid() + self.failUnlessRaises(ValueError, setattr, ref, 'commit', "nonsense") + assert not ref.is_valid() + + # I am sure I had my reason to make it a class method at first, but + # now it doesn't make so much sense anymore, want an instance method as well + # See http://byronimo.lighthouseapp.com/projects/51787-gitpython/tickets/27 + Reference.delete(ref.repo, ref.path) + assert not ref.is_valid() + + ref.object = rw_repo.head.commit + assert ref.is_valid() + + Reference.delete(ref.repo, ref.path) + assert not ref.is_valid() + self.failUnlessRaises(ValueError, setattr, ref, 'object', "nonsense") + assert not ref.is_valid() + + # END for each path + + def test_dereference_recursive(self): + # for now, just test the HEAD + assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD') + + def test_reflog(self): + assert isinstance(self.rorepo.heads.master.log(), RefLog) + diff --git a/gitdb/typ.py b/gitdb/typ.py index e84dd24..6783e2e 100644 --- a/gitdb/typ.py +++ b/gitdb/typ.py @@ -6,9 +6,17 @@ #{ String types +# For compatability only, use ObjectType instead str_blob_type = "blob" str_commit_type = "commit" str_tree_type = "tree" str_tag_type = "tag" +class ObjectType(object): + """Enumeration providing object types as strings and ids""" + blob = str_blob_type + commit = str_commit_type + tree = str_tree_type + tag = str_tag_type + #} END string types diff --git a/gitdb/util.py b/gitdb/util.py index 4bb3c73..432eec4 100644 --- a/gitdb/util.py +++ b/gitdb/util.py @@ -191,6 +191,55 @@ def to_bin_sha(sha): return sha return hex_to_bin(sha) +def join_path(a, *p): + """Join path tokens together similar to os.path.join, but always use + '/' instead of possibly '\' on windows.""" + path = a + for b in p: + if b.startswith('/'): + path += b[1:] + elif path == '' or path.endswith('/'): + path += b + else: + path += '/' + b + return path + +def to_native_path_windows(path): + return path.replace('/','\\') + +def to_native_path_linux(path): + return path.replace('\\','/') + + +if sys.platform.startswith('win'): + to_native_path = to_native_path_windows +else: + # no need for any work on linux + def to_native_path_linux(path): + return path + to_native_path = to_native_path_linux + +def join_path_native(a, *p): + """ + As join path, but makes sure an OS native path is returned. This is only + needed to play it safe on my dear windows and to assure nice paths that only + use '\'""" + return to_native_path(join_path(a, *p)) + +def assure_directory_exists(path, is_file=False): + """Assure that the directory pointed to by path exists. + + :param is_file: If True, path is assumed to be a file and handled correctly. + Otherwise it must be a directory + :return: True if the directory was created, False if it already existed""" + if is_file: + path = os.path.dirname(path) + #END handle file + if not os.path.isdir(path): + os.makedirs(path) + return True + return False + #} END routines @@ -352,5 +401,254 @@ def _end_writing(self, successful=True): # just delete the file so far, we failed os.remove(lockfile) # END successful handling + + +class LockFile(object): + """Provides methods to obtain, check for, and release a file based lock which + should be used to handle concurrent access to the same file. + + As we are a utility class to be derived from, we only use protected methods. + + Locks will automatically be released on destruction""" + __slots__ = ("_file_path", "_owns_lock") + + def __init__(self, file_path): + self._file_path = file_path + self._owns_lock = False + + def __del__(self): + self._release_lock() + + def _lock_file_path(self): + """:return: Path to lockfile""" + return "%s.lock" % (self._file_path) + + def _has_lock(self): + """:return: True if we have a lock and if the lockfile still exists + :raise AssertionError: if our lock-file does not exist""" + if not self._owns_lock: + return False + + return True + + def _obtain_lock_or_raise(self): + """Create a lock file as flag for other instances, mark our instance as lock-holder + + :raise IOError: if a lock was already present or a lock file could not be written""" + if self._has_lock(): + return + lock_file = self._lock_file_path() + if os.path.isfile(lock_file): + raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) + + try: + fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) + os.close(fd) + except OSError,e: + raise IOError(str(e)) + + self._owns_lock = True + + def _obtain_lock(self): + """The default implementation will raise if a lock cannot be obtained. + Subclasses may override this method to provide a different implementation""" + return self._obtain_lock_or_raise() + + def _release_lock(self): + """Release our lock if we have one""" + if not self._has_lock(): + return + + # if someone removed our file beforhand, lets just flag this issue + # instead of failing, to make it more usable. + lfp = self._lock_file_path() + try: + # on bloody windows, the file needs write permissions to be removable. + # Why ... + if os.name == 'nt': + os.chmod(lfp, 0777) + # END handle win32 + os.remove(lfp) + except OSError: + pass + self._owns_lock = False + +class BlockingLockFile(LockFile): + """The lock file will block until a lock could be obtained, or fail after + a specified timeout. + + :note: If the directory containing the lock was removed, an exception will + be raised during the blocking period, preventing hangs as the lock + can never be obtained.""" + __slots__ = ("_check_interval", "_max_block_time") + def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): + """Configure the instance + + :parm check_interval_s: + Period of time to sleep until the lock is checked the next time. + By default, it waits a nearly unlimited time + + :parm max_block_time_s: Maximum amount of seconds we may lock""" + super(BlockingLockFile, self).__init__(file_path) + self._check_interval = check_interval_s + self._max_block_time = max_block_time_s + + def _obtain_lock(self): + """This method blocks until it obtained the lock, or raises IOError if + it ran out of time or if the parent directory was not available anymore. + If this method returns, you are guranteed to own the lock""" + starttime = time.time() + maxtime = starttime + float(self._max_block_time) + while True: + try: + super(BlockingLockFile, self)._obtain_lock() + except IOError: + # synity check: if the directory leading to the lockfile is not + # readable anymore, raise an execption + curtime = time.time() + if not os.path.isdir(os.path.dirname(self._lock_file_path())): + msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) + raise IOError(msg) + # END handle missing directory + + if curtime >= maxtime: + msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) + raise IOError(msg) + # END abort if we wait too long + time.sleep(self._check_interval) + else: + break + # END endless loop + + +class Actor(object): + """Actors hold information about a person acting on the repository. They + can be committers and authors or anything with a name and an email as + mentioned in the git log entries.""" + # PRECOMPILED REGEX + name_only_regex = re.compile( r'<(.+)>' ) + name_email_regex = re.compile( r'(.*) <(.+?)>' ) + + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_name = "GIT_AUTHOR_NAME" + env_author_email = "GIT_AUTHOR_EMAIL" + env_committer_name = "GIT_COMMITTER_NAME" + env_committer_email = "GIT_COMMITTER_EMAIL" + + # CONFIGURATION KEYS + conf_name = 'name' + conf_email = 'email' + + __slots__ = ('name', 'email') + + def __init__(self, name, email): + self.name = name + self.email = email + + def __eq__(self, other): + return self.name == other.name and self.email == other.email + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash((self.name, self.email)) + + def __str__(self): + return self.name + + def __repr__(self): + return '">' % (self.name, self.email) + + @classmethod + def _from_string(cls, string): + """Create an Actor from a string. + :param string: is the string, which is expected to be in regular git format + + John Doe + + :return: Actor """ + m = cls.name_email_regex.search(string) + if m: + name, email = m.groups() + return cls(name, email) + else: + m = cls.name_only_regex.search(string) + if m: + return cls(m.group(1), None) + else: + # assume best and use the whole string as name + return cls(string, None) + # END special case name + # END handle name/email matching + + @classmethod + def _main_actor(cls, env_name, env_email, config_reader=None): + actor = cls('', '') + default_email = get_user_id() + default_name = default_email.split('@')[0] + + for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name), + ('email', env_email, cls.conf_email, default_email)): + try: + setattr(actor, attr, os.environ[evar]) + except KeyError: + if config_reader is not None: + setattr(actor, attr, config_reader.get_value('user', cvar, default)) + #END config-reader handling + if not getattr(actor, attr): + setattr(actor, attr, default) + #END handle name + #END for each item to retrieve + return actor + + + @classmethod + def committer(cls, config_reader=None): + """ + :return: Actor instance corresponding to the configured committer. It behaves + similar to the git implementation, such that the environment will override + configuration values of config_reader. If no value is set at all, it will be + generated + :param config_reader: ConfigReader to use to retrieve the values from in case + they are not set in the environment""" + return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader) + + @classmethod + def author(cls, config_reader=None): + """Same as committer(), but defines the main author. It may be specified in the environment, + but defaults to the committer""" + return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) + + +class Iterable(object): + """Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository""" + __slots__ = tuple() + _id_attribute_ = "attribute that most suitably identifies your instance" + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + :note: Favor the iter_items method as it will + + :return:list(Item,...) list of item instances""" + out_list = IterableList( cls._id_attribute_ ) + out_list.extend(cls.iter_items(repo, *args, **kwargs)) + return out_list + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """For more information about the arguments, see list_items + :return: iterator yielding Items""" + raise NotImplementedError("To be implemented by Subclass") + + #} END utilities From dba71a0c727aba19319d3e868d0ca4b8009bcef4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2011 20:15:51 +0200 Subject: [PATCH 03/15] Initial version of objects. The submodule implementation is left in git-python for now as it requires plenty of additional features which are currently only available via the git command --- gitdb/object/__init__.py | 14 ++ gitdb/object/base.py | 176 +++++++++++++++++ gitdb/object/blob.py | 34 ++++ gitdb/object/commit.py | 259 +++++++++++++++++++++++++ gitdb/object/fun.py | 199 +++++++++++++++++++ gitdb/object/submodule.py | 12 ++ gitdb/object/tag.py | 79 ++++++++ gitdb/object/tree.py | 287 ++++++++++++++++++++++++++++ gitdb/object/util.py | 315 +++++++++++++++++++++++++++++++ gitdb/test/object/test_blob.py | 23 +++ gitdb/test/object/test_commit.py | 275 +++++++++++++++++++++++++++ gitdb/test/object/test_tree.py | 144 ++++++++++++++ gitdb/test/test_util.py | 27 +++ gitdb/typ.py | 5 + gitdb/util.py | 61 ++++++ 15 files changed, 1910 insertions(+) create mode 100644 gitdb/object/__init__.py create mode 100644 gitdb/object/base.py create mode 100644 gitdb/object/blob.py create mode 100644 gitdb/object/commit.py create mode 100644 gitdb/object/fun.py create mode 100644 gitdb/object/submodule.py create mode 100644 gitdb/object/tag.py create mode 100644 gitdb/object/tree.py create mode 100644 gitdb/object/util.py create mode 100644 gitdb/test/object/test_blob.py create mode 100644 gitdb/test/object/test_commit.py create mode 100644 gitdb/test/object/test_tree.py diff --git a/gitdb/object/__init__.py b/gitdb/object/__init__.py new file mode 100644 index 0000000..09c1bca --- /dev/null +++ b/gitdb/object/__init__.py @@ -0,0 +1,14 @@ +""" +Import all submodules main classes into the package space +""" +import inspect + +from base import * +from tag import * +from blob import * +from commit import * +from tree import * +from submodule import * + +__all__ = [ name for name, obj in locals().items() + if not (name.startswith('_') or inspect.ismodule(obj)) ] \ No newline at end of file diff --git a/gitdb/object/base.py b/gitdb/object/base.py new file mode 100644 index 0000000..2403e8d --- /dev/null +++ b/gitdb/object/base.py @@ -0,0 +1,176 @@ +# base.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +from util import get_object_type_by_name +from gitdb.util import ( + hex_to_bin, + bin_to_hex, + basename, + LazyMixin, + join_path_native, + stream_copy + ) + +from gitdb.typ import ObjectType + +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" + +__all__ = ("Object", "IndexObject") + +class Object(LazyMixin): + """Implements an Object which may be Blobs, Trees, Commits and Tags""" + NULL_HEX_SHA = '0'*40 + NULL_BIN_SHA = '\0'*20 + + TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag) + __slots__ = ("odb", "binsha", "size" ) + + type = None # to be set by subclass + type_id = None # to be set by subclass + + def __init__(self, odb, binsha): + """Initialize an object by identifying it by its binary sha. + All keyword arguments will be set on demand if None. + + :param odb: repository this object is located in + + :param binsha: 20 byte SHA1""" + super(Object,self).__init__() + self.odb = odb + self.binsha = binsha + assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) + + @classmethod + def new(cls, odb, id): + """ + :return: New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a binsha even though + the input id may have been a Reference or Rev-Spec + + :param id: reference, rev-spec, or hexsha + + :note: This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a binsha.""" + return odb.rev_parse(str(id)) + + @classmethod + def new_from_sha(cls, odb, sha1): + """ + :return: new object instance of a type appropriate to represent the given + binary sha1 + :param sha1: 20 byte binary sha1""" + if sha1 == cls.NULL_BIN_SHA: + # the NULL binsha is always the root commit + return get_object_type_by_name('commit')(odb, sha1) + #END handle special case + oinfo = odb.odb.info(sha1) + inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha) + inst.size = oinfo.size + return inst + + def _set_cache_(self, attr): + """Retrieve object information""" + if attr == "size": + oinfo = self.odb.odb.info(self.binsha) + self.size = oinfo.size + # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """:return: True if the objects have the same SHA1""" + return self.binsha == other.binsha + + def __ne__(self, other): + """:return: True if the objects do not have the same SHA1 """ + return self.binsha != other.binsha + + def __hash__(self): + """:return: Hash of our id allowing objects to be used in dicts and sets""" + return hash(self.binsha) + + def __str__(self): + """:return: string of our SHA1 as understood by all git commands""" + return bin_to_hex(self.binsha) + + def __repr__(self): + """:return: string with pythonic representation of our object""" + return '' % (self.__class__.__name__, self.hexsha) + + @property + def hexsha(self): + """:return: 40 byte hex version of our 20 byte binary sha""" + return bin_to_hex(self.binsha) + + @property + def data_stream(self): + """ :return: File Object compatible stream to the uncompressed raw data of the object + :note: returned streams must be read in order""" + return self.odb.odb.stream(self.binsha) + + def stream_data(self, ostream): + """Writes our data directly to the given output stream + :param ostream: File object compatible stream object. + :return: self""" + istream = self.odb.odb.stream(self.binsha) + stream_copy(istream, ostream) + return self + + +class IndexObject(Object): + """Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects""" + __slots__ = ("path", "mode") + + # for compatability with iterable lists + _id_attribute_ = 'path' + + def __init__(self, odb, binsha, mode=None, path=None): + """Initialize a newly instanced IndexObject + :param odb: is the Repo we are located in + :param binsha: 20 byte sha1 + :param mode: is the stat compatible file mode as int, use the stat module + to evaluate the infomration + :param path: + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + :note: + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree.""" + super(IndexObject, self).__init__(odb, binsha) + if mode is not None: + self.mode = mode + if path is not None: + self.path = path + + def __hash__(self): + """:return: + Hash of our path as index items are uniquely identifyable by path, not + by their data !""" + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + # END hanlde slot attribute + + @property + def name(self): + """:return: Name portion of the path, effectively being the basename""" + return basename(self.path) + + @property + def abspath(self): + """ + :return: + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. """ + return join_path_native(self.odb.working_tree_dir, self.path) + diff --git a/gitdb/object/blob.py b/gitdb/object/blob.py new file mode 100644 index 0000000..0d23532 --- /dev/null +++ b/gitdb/object/blob.py @@ -0,0 +1,34 @@ +# blob.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from mimetypes import guess_type +from gitdb.typ import ObjectType +import base + +__all__ = ('Blob', ) + +class Blob(base.IndexObject): + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = ObjectType.blob + type_id = ObjectType.blob_id + + # valid blob modes + executable_mode = 0100755 + file_mode = 0100644 + link_mode = 0120000 + + __slots__ = tuple() + + @property + def mime_type(self): + """ + :return: String describing the mime type of this file (based on the filename) + :note: Defaults to 'text/plain' in case the actual file type is unknown. """ + guesses = None + if self.path: + guesses = guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/gitdb/object/commit.py b/gitdb/object/commit.py new file mode 100644 index 0000000..a42660b --- /dev/null +++ b/gitdb/object/commit.py @@ -0,0 +1,259 @@ +# commit.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +from gitdb.typ import ObjectType +from tree import Tree +from cStringIO import StringIO + +import base +from gitdb.util import ( + hex_to_bin, + Actor, + ) +from util import ( + Traversable, + Serializable, + altz_to_utctz_str, + parse_actor_and_date + ) +import sys + +__all__ = ('Commit', ) + +class Commit(base.Object, Traversable, Serializable): + """Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary.""" + + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_date = "GIT_AUTHOR_DATE" + env_committer_date = "GIT_COMMITTER_DATE" + + # CONFIGURATION KEYS + conf_encoding = 'i18n.commitencoding' + + # INVARIANTS + default_encoding = "UTF-8" + + + # object configuration + type = ObjectType.commit + type_id = ObjectType.commit_id + + __slots__ = ("tree", + "author", "authored_date", "author_tz_offset", + "committer", "committed_date", "committer_tz_offset", + "message", "parents", "encoding") + _id_attribute_ = "binsha" + + def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, + committer=None, committed_date=None, committer_tz_offset=None, + message=None, parents=None, encoding=None): + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. + + :param binsha: 20 byte sha1 + :param parents: tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + :param tree: Tree + Tree object + :param author: Actor + is the author string ( will be implicitly converted into an Actor object ) + :param authored_date: int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + :param author_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param committer: Actor + is the committer string + :param committed_date: int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + :param committer_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param message: string + is the commit message + :param encoding: string + encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph + :return: git.Commit + + :note: Timezone information is in the same format and in the same sign + as what time.altzone returns. The sign is inverted compared to git's + UTC timezone.""" + super(Commit,self).__init__(odb, binsha) + if tree is not None: + assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + if attr in Commit.__slots__: + # read the data in a chunk, its faster - then provide a file wrapper + binsha, typename, self.size, stream = self.odb.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) + else: + super(Commit, self)._set_cache_(attr) + # END handle attrs + + @property + def summary(self): + """:return: First line of the commit message""" + return self.message.split('\n', 1)[0] + + @classmethod + def _iter_from_process_or_stream(cls, odb, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database + + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" + stream = proc_or_stream + if not hasattr(stream,'readline'): + stream = proc_or_stream.stdout + + readline = stream.readline + while True: + line = readline() + if not line: + break + hexsha = line.strip() + if len(hexsha) > 40: + # split additional information, as returned by bisect for instance + hexsha, rest = line.split(None, 1) + # END handle extra info + + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield cls(odb, hex_to_bin(hexsha)) + # END for each line in stream + + #{ Serializable Implementation + + def _serialize(self, stream): + write = stream.write + write("tree %s\n" % self.tree) + for p in self.parents: + write("parent %s\n" % p) + + a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + + c = self.committer + fmt = "%s %s <%s> %s %s\n" + write(fmt % ("author", aname, a.email, + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))) + + # encode committer + aname = c.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + write(fmt % ("committer", aname, c.email, + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))) + + if self.encoding != self.default_encoding: + write("encoding %s\n" % self.encoding) + + write("\n") + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding + return self + + def _deserialize(self, stream): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + readline = stream.readline + self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + + self.parents = list() + next_line = None + while True: + parent_line = readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1]))) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + + + # now we can have the encoding line, or an empty line followed by the optional + # message. + self.encoding = self.default_encoding + # read encoding or empty line to separate message + enc = readline() + enc = enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # now comes the message separator + readline() + # END handle encoding + + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) + # END handle author's encoding + + # decode committer name + try: + self.committer.name = self.committer.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) + # END handle author's encoding + + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) + # END exception handling + return self + + #} END serializable implementation diff --git a/gitdb/object/fun.py b/gitdb/object/fun.py new file mode 100644 index 0000000..9b0a377 --- /dev/null +++ b/gitdb/object/fun.py @@ -0,0 +1,199 @@ +"""Module with functions which are supposed to be as fast as possible""" +from stat import S_ISDIR + +__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', + 'traverse_tree_recursive') + + + + +def tree_to_stream(entries, write): + """Write the give list of entries into a stream using its write method + :param entries: **sorted** list of tuples with (binsha, mode, name) + :param write: write method which takes a data string""" + ord_zero = ord('0') + bit_mask = 7 # 3 bits set + + for binsha, mode, name in entries: + mode_str = '' + for i in xrange(6): + mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str + # END for each 8 octal value + + # git slices away the first octal if its zero + if mode_str[0] == '0': + mode_str = mode_str[1:] + # END save a byte + + # here it comes: if the name is actually unicode, the replacement below + # will not work as the binsha is not part of the ascii unicode encoding - + # hence we must convert to an utf8 string for it to work properly. + # According to my tests, this is exactly what git does, that is it just + # takes the input literally, which appears to be utf8 on linux. + if isinstance(name, unicode): + name = name.encode("utf8") + write("%s %s\0%s" % (mode_str, name, binsha)) + # END for each item + + +def tree_entries_from_data(data): + """Reads the binary representation of a tree and returns tuples of Tree items + :param data: data block with tree data + :return: list(tuple(binsha, mode, tree_relative_path), ...)""" + ord_zero = ord('0') + len_data = len(data) + i = 0 + out = list() + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded + name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + out.append((sha, mode, name)) + # END for each byte in data stream + return out + + +def _find_by_name(tree_data, name, is_dir, start_at): + """return data entry matching the given name and tree mode + or None. + Before the item is returned, the respective data item is set + None in the tree_data list to mark it done""" + try: + item = tree_data[start_at] + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[start_at] = None + return item + except IndexError: + pass + # END exception handling + for index, item in enumerate(tree_data): + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[index] = None + return item + # END if item matches + # END for each item + return None + +def _to_full_path(item, path_prefix): + """Rebuild entry with given path prefix""" + if not item: + return item + return (item[0], item[1], path_prefix+item[2]) + +def traverse_trees_recursive(odb, tree_shas, path_prefix): + """ + :return: list with entries according to the given binary tree-shas. + The result is encoded in a list + of n tuple|None per blob/commit, (n == len(tree_shas)), where + * [0] == 20 byte sha + * [1] == mode as int + * [2] == path relative to working tree root + The entry tuple is None if the respective blob/commit did not + exist in the given tree. + :param tree_shas: iterable of shas pointing to trees. All trees must + be on the same level. A tree-sha may be None in which case None + :param path_prefix: a prefix to be added to the returned paths on this level, + set it '' for the first iteration + :note: The ordering of the returned items will be partially lost""" + trees_data = list() + nt = len(tree_shas) + for tree_sha in tree_shas: + if tree_sha is None: + data = list() + else: + data = tree_entries_from_data(odb.stream(tree_sha).read()) + # END handle muted trees + trees_data.append(data) + # END for each sha to get data for + + out = list() + out_append = out.append + + # find all matching entries and recursively process them together if the match + # is a tree. If the match is a non-tree item, put it into the result. + # Processed items will be set None + for ti, tree_data in enumerate(trees_data): + for ii, item in enumerate(tree_data): + if not item: + continue + # END skip already done items + entries = [ None for n in range(nt) ] + entries[ti] = item + sha, mode, name = item # its faster to unpack + is_dir = S_ISDIR(mode) # type mode bits + + # find this item in all other tree data items + # wrap around, but stop one before our current index, hence + # ti+nt, not ti+1+nt + for tio in range(ti+1, ti+nt): + tio = tio % nt + entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) + # END for each other item data + + # if we are a directory, enter recursion + if is_dir: + out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) + else: + out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) + # END handle recursion + + # finally mark it done + tree_data[ii] = None + # END for each item + + # we are done with one tree, set all its data empty + del(tree_data[:]) + # END for each tree_data chunk + return out + +def traverse_tree_recursive(odb, tree_sha, path_prefix): + """ + :return: list of entries of the tree pointed to by the binary tree_sha. An entry + has the following format: + * [0] 20 byte sha + * [1] mode as int + * [2] path relative to the repository + :param path_prefix: prefix to prepend to the front of all returned paths""" + entries = list() + data = tree_entries_from_data(odb.stream(tree_sha).read()) + + # unpacking/packing is faster than accessing individual items + for sha, mode, name in data: + if S_ISDIR(mode): + entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) + else: + entries.append((sha, mode, path_prefix+name)) + # END for each item + + return entries diff --git a/gitdb/object/submodule.py b/gitdb/object/submodule.py new file mode 100644 index 0000000..77669b3 --- /dev/null +++ b/gitdb/object/submodule.py @@ -0,0 +1,12 @@ +from base import IndexObject + +class Submodule(IndexObject): + """Dummy type representing submodules. At some point an implemenation might be add + ( it currently is located in GitPython )""" + + # this is a bogus type for base class compatability + type = 'submodule' + # this type doesn't really have a type id + type_id = 0 + + diff --git a/gitdb/object/tag.py b/gitdb/object/tag.py new file mode 100644 index 0000000..ce702c7 --- /dev/null +++ b/gitdb/object/tag.py @@ -0,0 +1,79 @@ +# objects.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" Module containing all object based types. """ +import base +from gitdb.util import hex_to_bin +from util import ( + get_object_type_by_name, + parse_actor_and_date + ) +from gitdb.typ import ObjectType + +__all__ = ("TagObject", ) + +class TagObject(base.Object): + """Non-Lightweight tag carrying additional information about an object we are pointing to.""" + type = ObjectType.tag + type_id = ObjectType.tag_id + + __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) + + def __init__(self, odb, binsha, object=None, tag=None, + tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): + """Initialize a tag object with additional data + + :param odb: repository this object is located in + :param binsha: 20 byte SHA1 + :param object: Object instance of object we are pointing to + :param tag: name of this tag + :param tagger: Actor identifying the tagger + :param tagged_date: int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the + authored_date is in, in a format similar to time.altzone""" + super(TagObject, self).__init__(odb, binsha ) + if object is not None: + self.object = object + if tag is not None: + self.tag = tag + if tagger is not None: + self.tagger = tagger + if tagged_date is not None: + self.tagged_date = tagged_date + if tagger_tz_offset is not None: + self.tagger_tz_offset = tagger_tz_offset + if message is not None: + self.message = message + + def _set_cache_(self, attr): + """Cache all our attributes at once""" + if attr in TagObject.__slots__: + ostream = self.odb.stream(self.binsha) + lines = ostream.read().splitlines() + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha)) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in case there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + + diff --git a/gitdb/object/tree.py b/gitdb/object/tree.py new file mode 100644 index 0000000..8dabd6e --- /dev/null +++ b/gitdb/object/tree.py @@ -0,0 +1,287 @@ +# tree.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +import util +from gitdb.typ import ObjectType +from base import IndexObject +from blob import Blob +from submodule import Submodule + +from fun import ( + tree_entries_from_data, + tree_to_stream + ) + +from gitdb.util import ( + to_bin_sha, + join_path + ) + +__all__ = ("TreeModifier", "Tree") + +class TreeModifier(object): + """A utility class providing methods to alter the underlying cache in a list-like fashion. + + Once all adjustments are complete, the _cache, which really is a refernce to + the cache of a tree, will be sorted. Assuring it will be in a serializable state""" + __slots__ = '_cache' + + def __init__(self, cache): + self._cache = cache + + def _index_by_name(self, name): + """:return: index of an item with name, or -1 if not found""" + for i, t in enumerate(self._cache): + if t[2] == name: + return i + # END found item + # END for each item in cache + return -1 + + #{ Interface + def set_done(self): + """Call this method once you are done modifying the tree information. + It may be called several times, but be aware that each call will cause + a sort operation + :return self:""" + self._cache.sort(key=lambda t: t[2]) # sort by name + return self + #} END interface + + #{ Mutators + def add(self, sha, mode, name, force=False): + """Add the given item to the tree. If an item with the given name already + exists, nothing will be done, but a ValueError will be raised if the + sha and mode of the existing item do not match the one you add, unless + force is True + + :param sha: The 20 or 40 byte sha of the item to add + :param mode: int representing the stat compatible mode of the item + :param force: If True, an item with your name and information will overwrite + any existing item with the same name, no matter which information it has + :return: self""" + if '/' in name: + raise ValueError("Name must not contain '/' characters") + if (mode >> 12) not in Tree._map_id_to_type: + raise ValueError("Invalid object type according to mode %o" % mode) + + sha = to_bin_sha(sha) + index = self._index_by_name(name) + item = (sha, mode, name) + if index == -1: + self._cache.append(item) + else: + if force: + self._cache[index] = item + else: + ex_item = self._cache[index] + if ex_item[0] != sha or ex_item[1] != mode: + raise ValueError("Item %r existed with different properties" % name) + # END handle mismatch + # END handle force + # END handle name exists + return self + + def add_unchecked(self, binsha, mode, name): + """Add the given item to the tree, its correctness is assumed, which + puts the caller into responsibility to assure the input is correct. + For more information on the parameters, see ``add`` + :param binsha: 20 byte binary sha""" + self._cache.append((binsha, mode, name)) + + def __delitem__(self, name): + """Deletes an item with the given name if it exists""" + index = self._index_by_name(name) + if index > -1: + del(self._cache[index]) + + #} END mutators + + +class Tree(IndexObject, util.Traversable, util.Serializable): + """Tree objects represent an ordered list of Blobs and other Trees. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + """ + + type = ObjectType.tree + type_id = ObjectType.tree_id + + __slots__ = "_cache" + + # actual integer ids for comparison + commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link + blob_id = 010 + symlink_id = 012 + tree_id = 004 + + #{ Configuration + + # override in subclass if you would like your own types to be instantiated instead + _map_id_to_type = { + commit_id : Submodule, + blob_id : Blob, + symlink_id : Blob + # tree id added once Tree is defined + } + + #} end configuration + + + def __init__(self, repo, binsha, mode=tree_id<<12, path=None): + super(Tree, self).__init__(repo, binsha, mode, path) + + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return tuple(index_object._iter_convert_to_object(index_object._cache)) + return tuple() + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + ostream = self.odb.stream(self.binsha) + self._cache = tree_entries_from_data(ostream.read()) + else: + super(Tree, self)._set_cache_(attr) + # END handle attribute + + def _iter_convert_to_object(self, iterable): + """Iterable yields tuples of (binsha, mode, name), which will be converted + to the respective object representation""" + for binsha, mode, name in iterable: + path = join_path(self.path, name) + try: + yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) + except KeyError: + raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) + # END for each item + + def __div__(self, file): + """Find the named object in this tree's contents + :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` + + :raise KeyError: if given file or tree does not exist in tree""" + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for info in self._cache: + if info[2] == file: # [2] == name + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + # END for each obj + raise KeyError( msg % file ) + # END handle long paths + + + @property + def trees(self): + """:return: list(Tree, ...) list of trees directly below this tree""" + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """:return: list(Blob, ...) list of blobs directly below this tree""" + return [ i for i in self if i.type == "blob" ] + + @property + def cache(self): + """ + :return: An object allowing to modify the internal cache. This can be used + to change the tree's contents. When done, make sure you call ``set_done`` + on the tree modifier, or serialization behaviour will be incorrect. + See the ``TreeModifier`` for more information on how to alter the cache""" + return TreeModifier(self._cache) + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see util.Traversable.traverse + Trees are set to visit_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + + # List protocol + def __getslice__(self, i, j): + return list(self._iter_convert_to_object(self._cache[i:j])) + + def __iter__(self): + return self._iter_convert_to_object(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self, item): + if isinstance(item, int): + info = self._cache[item] + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self, item): + if isinstance(item, IndexObject): + for info in self._cache: + if item.binsha == info[0]: + return True + # END compare sha + # END for each entry + # END handle item is index object + # compatability + + # treat item as repo-relative path + path = self.path + for info in self._cache: + if item == join_path(path, info[2]): + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._iter_convert_to_object(self._cache)) + + def _serialize(self, stream): + """Serialize this tree into the stream. Please note that we will assume + our tree data to be in a sorted state. If this is not the case, serialization + will not generate a correct tree representation as these are assumed to be sorted + by algorithms""" + tree_to_stream(self._cache, stream.write) + return self + + def _deserialize(self, stream): + self._cache = tree_entries_from_data(stream.read()) + return self + + +# END tree + +# finalize map definition +Tree._map_id_to_type[Tree.tree_id] = Tree diff --git a/gitdb/object/util.py b/gitdb/object/util.py new file mode 100644 index 0000000..e63cb30 --- /dev/null +++ b/gitdb/object/util.py @@ -0,0 +1,315 @@ +# util.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module for general utility functions""" +from gitdb.util import ( + IterableList, + Actor + ) + +import re +from collections import deque as Deque + +from string import digits +import time +import os + +__all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', + 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', + 'verify_utctz', 'Actor') + +#{ Functions + +def mode_str_to_int(modestr): + """ + :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used + :return: + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example.""" + mode = 0 + for iteration, char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode + +def get_object_type_by_name(object_type_name): + """ + :return: type suitable to handle the given object type name. + Use the type to create new instances. + + :param object_type_name: Member of TYPES + + :raise ValueError: In case object_type_name is unknown""" + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + +def utctz_to_altz(utctz): + """we convert utctz to the timezone in seconds, it is the format time.altzone + returns. Git stores it as UTC timezone which has the opposite sign as well, + which explains the -1 * ( that was made explicit here ) + :param utctz: git utc timezone string, i.e. +0200""" + return -1 * int(float(utctz)/100*3600) + +def altz_to_utctz_str(altz): + """As above, but inverses the operation, returning a string that can be used + in commit objects""" + utci = -1 * int((altz / 3600)*100) + utcs = str(abs(utci)) + utcs = "0"*(4-len(utcs)) + utcs + prefix = (utci < 0 and '-') or '+' + return prefix + utcs + + +def verify_utctz(offset): + """:raise ValueError: if offset is incorrect + :return: offset""" + fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) + if len(offset) != 5: + raise fmt_exc + if offset[0] not in "+-": + raise fmt_exc + if offset[1] not in digits or \ + offset[2] not in digits or \ + offset[3] not in digits or \ + offset[4] not in digits: + raise fmt_exc + # END for each char + return offset + +def parse_date(string_date): + """ + Parse the given date as one of the following + + * Git internal format: timestamp offset + * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. + * ISO 8601 2005-04-07T22:13:13 + The T can be a space as well + + :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch + :raise ValueError: If the format could not be understood + :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY""" + # git time + try: + if string_date.count(' ') == 1 and string_date.rfind(':') == -1: + timestamp, offset = string_date.split() + timestamp = int(timestamp) + return timestamp, utctz_to_altz(verify_utctz(offset)) + else: + offset = "+0000" # local time by default + if string_date[-5] in '-+': + offset = verify_utctz(string_date[-5:]) + string_date = string_date[:-6] # skip space as well + # END split timezone info + + # now figure out the date and time portion - split time + date_formats = list() + splitter = -1 + if ',' in string_date: + date_formats.append("%a, %d %b %Y") + splitter = string_date.rfind(' ') + else: + # iso plus additional + date_formats.append("%Y-%m-%d") + date_formats.append("%Y.%m.%d") + date_formats.append("%m/%d/%Y") + date_formats.append("%d.%m.%Y") + + splitter = string_date.rfind('T') + if splitter == -1: + splitter = string_date.rfind(' ') + # END handle 'T' and ' ' + # END handle rfc or iso + + assert splitter > -1 + + # split date and time + time_part = string_date[splitter+1:] # skip space + date_part = string_date[:splitter] + + # parse time + tstruct = time.strptime(time_part, "%H:%M:%S") + + for fmt in date_formats: + try: + dtstruct = time.strptime(date_part, fmt) + fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, + tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec, + dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst)) + return int(time.mktime(fstruct)), utctz_to_altz(offset) + except ValueError: + continue + # END exception handling + # END for each fmt + + # still here ? fail + raise ValueError("no format matched") + # END handle format + except Exception: + raise ValueError("Unsupported date format: %s" % string_date) + # END handle exceptions + + +# precompiled regex +_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$') + +def parse_actor_and_date(line): + """Parse out the actor (author or committer) info from a line like:: + + author Tom Preston-Werner 1191999972 -0700 + + :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" + m = _re_actor_epoch.search(line) + actor, epoch, offset = m.groups() + return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) + + +#} END functions + + +#{ Classes + +class ProcessStreamAdapter(object): + """Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope.""" + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) + + +class Traversable(object): + """Simple interface to perforam depth-first or breadth-first traversals + into one direction. + Subclasses only need to implement one function. + Instances of the Subclass must be hashable""" + __slots__ = tuple() + + @classmethod + def _get_intermediate_items(cls, item): + """ + Returns: + List of items connected to the given item. + Must be implemented in subclass + """ + raise NotImplementedError("To be implemented in subclass") + + def list_traverse(self, *args, **kwargs): + """ + :return: IterableList with the results of the traversal as produced by + traverse()""" + out = IterableList(self._id_attribute_) + out.extend(self.traverse(*args, **kwargs)) + return out + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = True, ignore_self=1, as_edge = False ): + """:return: iterator yieling of items found when traversing self + + :param predicate: f(i,d) returns False if item i at depth d should not be included in the result + + :param prune: + f(i,d) return True if the search should stop at item i at depth d. + Item i will not be returned. + + :param depth: + define at which level the iteration should not go deeper + if -1, there is no limit + if 0, you would effectively only get self, the root of the iteration + i.e. if 1, you would only get the first level of predessessors/successors + + :param branch_first: + if True, items will be returned branch first, otherwise depth first + + :param visit_once: + if True, items will only be returned once, although they might be encountered + several times. Loops are prevented that way. + + :param ignore_self: + if True, self will be ignored and automatically pruned from + the result. Otherwise it will be the first item to be returned. + If as_edge is True, the source of the first edge is None + + :param as_edge: + if True, return a pair of items, first being the source, second the + destinatination, i.e. tuple(src, dest) with the edge spanning from + source to destination""" + visited = set() + stack = Deque() + stack.append( ( 0 ,self, None ) ) # self is always depth level 0 + + def addToStack( stack, item, branch_first, depth ): + lst = self._get_intermediate_items( item ) + if not lst: + return + if branch_first: + stack.extendleft( ( depth , i, item ) for i in lst ) + else: + reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) + stack.extend( reviter ) + # END addToStack local method + + while stack: + d, item, src = stack.pop() # depth of item, item, item_source + + if visit_once and item in visited: + continue + + if visit_once: + visited.add(item) + + rval = ( as_edge and (src, item) ) or item + if prune( rval, d ): + continue + + skipStartItem = ignore_self and ( item is self ) + if not skipStartItem and predicate( rval, d ): + yield rval + + # only continue to next level if this is appropriate ! + nd = d + 1 + if depth > -1 and nd > depth: + continue + + addToStack( stack, item, branch_first, nd ) + # END for each item on work stack + + +class Serializable(object): + """Defines methods to serialize and deserialize objects from and into a data stream""" + __slots__ = tuple() + + def _serialize(self, stream): + """Serialize the data of this object into the given data stream + :note: a serialized object would ``_deserialize`` into the same objet + :param stream: a file-like object + :return: self""" + raise NotImplementedError("To be implemented in subclass") + + def _deserialize(self, stream): + """Deserialize all information regarding this object from the stream + :param stream: a file-like object + :return: self""" + raise NotImplementedError("To be implemented in subclass") diff --git a/gitdb/test/object/test_blob.py b/gitdb/test/object/test_blob.py new file mode 100644 index 0000000..661c050 --- /dev/null +++ b/gitdb/test/object/test_blob.py @@ -0,0 +1,23 @@ +# test_blob.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from git.test.lib import * +from git import * +from gitdb.util import hex_to_bin + +class TestBlob(TestBase): + + def test_mime_type_should_return_mime_type_for_known_types(self): + blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA, 'path': 'foo.png'}) + assert_equal("image/png", blob.mime_type) + + def test_mime_type_should_return_text_plain_for_unknown_types(self): + blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA,'path': 'something'}) + assert_equal("text/plain", blob.mime_type) + + def test_nodict(self): + self.failUnlessRaises(AttributeError, setattr, self.rorepo.tree()['AUTHORS'], 'someattr', 2) + diff --git a/gitdb/test/object/test_commit.py b/gitdb/test/object/test_commit.py new file mode 100644 index 0000000..4a8d8b8 --- /dev/null +++ b/gitdb/test/object/test_commit.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# test_commit.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from git.test.lib import * +from git import * +from gitdb import IStream +from gitdb.util import hex_to_bin + +from cStringIO import StringIO +import time +import sys + + +def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False): + """traverse all commits in the history of commit identified by commit_id and check + if the serialization works. + :param print_performance_info: if True, we will show how fast we are""" + ns = 0 # num serializations + nds = 0 # num deserializations + + st = time.time() + for cm in rwrepo.commit(commit_id).traverse(): + nds += 1 + + # assert that we deserialize commits correctly, hence we get the same + # sha on serialization + stream = StringIO() + cm._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + + istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream)) + assert istream.hexsha == cm.hexsha + + nc = Commit(rwrepo, Commit.NULL_BIN_SHA, cm.tree, + cm.author, cm.authored_date, cm.author_tz_offset, + cm.committer, cm.committed_date, cm.committer_tz_offset, + cm.message, cm.parents, cm.encoding) + + assert nc.parents == cm.parents + stream = StringIO() + nc._serialize(stream) + ns += 1 + streamlen = stream.tell() + stream.seek(0) + + # reuse istream + istream.size = streamlen + istream.stream = stream + istream.binsha = None + nc.binsha = rwrepo.odb.store(istream).binsha + + # if it worked, we have exactly the same contents ! + assert nc.hexsha == cm.hexsha + # END check commits + elapsed = time.time() - st + + if print_performance_info: + print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed) + # END handle performance info + + +class TestCommit(TestBase): + + def test_bake(self): + + commit = self.rorepo.commit('2454ae89983a4496a445ce347d7a41c0bb0ea7ae') + # commits have no dict + self.failUnlessRaises(AttributeError, setattr, commit, 'someattr', 1) + commit.author # bake + + assert_equal("Sebastian Thiel", commit.author.name) + assert_equal("byronimo@gmail.com", commit.author.email) + assert commit.author == commit.committer + assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int) + assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int) + assert commit.message == "Added missing information to docstrings of commit and stats module\n" + + + def test_stats(self): + commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781') + stats = commit.stats + + def check_entries(d): + assert isinstance(d, dict) + for key in ("insertions", "deletions", "lines"): + assert key in d + # END assertion helper + assert stats.files + assert stats.total + + check_entries(stats.total) + assert "files" in stats.total + + for filepath, d in stats.files.items(): + check_entries(d) + # END for each stated file + + # assure data is parsed properly + michael = Actor._from_string("Michael Trier ") + assert commit.author == michael + assert commit.committer == michael + assert commit.authored_date == 1210193388 + assert commit.committed_date == 1210193388 + assert commit.author_tz_offset == 14400, commit.author_tz_offset + assert commit.committer_tz_offset == 14400, commit.committer_tz_offset + assert commit.message == "initial project\n" + + def test_unicode_actor(self): + # assure we can parse unicode actors correctly + name = "Üäöß ÄußÉ".decode("utf-8") + assert len(name) == 9 + special = Actor._from_string(u"%s " % name) + assert special.name == name + assert isinstance(special.name, unicode) + + def test_traversal(self): + start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff") + first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") + p0 = start.parents[0] + p1 = start.parents[1] + p00 = p0.parents[0] + p10 = p1.parents[0] + + # basic branch first, depth first + dfirst = start.traverse(branch_first=False) + bfirst = start.traverse(branch_first=True) + assert dfirst.next() == p0 + assert dfirst.next() == p00 + + assert bfirst.next() == p0 + assert bfirst.next() == p1 + assert bfirst.next() == p00 + assert bfirst.next() == p10 + + # at some point, both iterations should stop + assert list(bfirst)[-1] == first + stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) + l = list(stoptraverse) + assert len(l[0]) == 2 + + # ignore self + assert start.traverse(ignore_self=False).next() == start + + # depth + assert len(list(start.traverse(ignore_self=False, depth=0))) == 1 + + # prune + assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1 + + # predicate + assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1 + + # traversal should stop when the beginning is reached + self.failUnlessRaises(StopIteration, first.traverse().next) + + # parents of the first commit should be empty ( as the only parent has a null + # sha ) + assert len(first.parents) == 0 + + def test_iteration(self): + # we can iterate commits + all_commits = Commit.list_items(self.rorepo, self.rorepo.head) + assert all_commits + assert all_commits == list(self.rorepo.iter_commits()) + + # this includes merge commits + mcomit = self.rorepo.commit('d884adc80c80300b4cc05321494713904ef1df2d') + assert mcomit in all_commits + + # we can limit the result to paths + ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES')) + assert ltd_commits and len(ltd_commits) < len(all_commits) + + # show commits of multiple paths, resulting in a union of commits + less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS'))) + assert len(ltd_commits) < len(less_ltd_commits) + + def test_iter_items(self): + # pretty not allowed + self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw") + + def test_rev_list_bisect_all(self): + """ + 'git rev-list --bisect-all' returns additional information + in the commit header. This test ensures that we properly parse it. + """ + revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474', + first_parent=True, + bisect_all=True) + + commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs)) + expected_ids = ( + '7156cece3c49544abb6bf7a0c218eb36646fad6d', + '1f66cfbbce58b4b552b041707a12d437cc5f400a', + '33ebe7acec14b25c5f84f35a664803fcab2f7781', + '933d23bf95a5bd1624fbcdf328d904e1fa173474' + ) + for sha1, commit in zip(expected_ids, commits): + assert_equal(sha1, commit.hexsha) + + def test_count(self): + assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143 + + def test_list(self): + assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)[hex_to_bin('5117c9c8a4d3af19a9958677e45cda9269de1541')], Commit) + + def test_str(self): + commit = Commit(self.rorepo, Commit.NULL_BIN_SHA) + assert_equal(Commit.NULL_HEX_SHA, str(commit)) + + def test_repr(self): + commit = Commit(self.rorepo, Commit.NULL_BIN_SHA) + assert_equal('' % Commit.NULL_HEX_SHA, repr(commit)) + + def test_equality(self): + commit1 = Commit(self.rorepo, Commit.NULL_BIN_SHA) + commit2 = Commit(self.rorepo, Commit.NULL_BIN_SHA) + commit3 = Commit(self.rorepo, "\1"*20) + assert_equal(commit1, commit2) + assert_not_equal(commit2, commit3) + + def test_iter_parents(self): + # should return all but ourselves, even if skip is defined + c = self.rorepo.commit('0.1.5') + for skip in (0, 1): + piter = c.iter_parents(skip=skip) + first_parent = piter.next() + assert first_parent != c + assert first_parent == c.parents[0] + # END for each + + def test_base(self): + name_rev = self.rorepo.head.commit.name_rev + assert isinstance(name_rev, basestring) + + @with_rw_repo('HEAD', bare=True) + def test_serialization(self, rwrepo): + # create all commits of our repo + assert_commit_serialization(rwrepo, '0.1.6') + + def test_serialization_unicode_support(self): + assert Commit.default_encoding.lower() == 'utf-8' + + # create a commit with unicode in the message, and the author's name + # Verify its serialization and deserialization + cmt = self.rorepo.commit('0.1.6') + assert isinstance(cmt.message, unicode) # it automatically decodes it as such + assert isinstance(cmt.author.name, unicode) # same here + + cmt.message = "üäêèß".decode("utf-8") + assert len(cmt.message) == 5 + + cmt.author.name = "äüß".decode("utf-8") + assert len(cmt.author.name) == 3 + + cstream = StringIO() + cmt._serialize(cstream) + cstream.seek(0) + assert len(cstream.getvalue()) + + ncmt = Commit(self.rorepo, cmt.binsha) + ncmt._deserialize(cstream) + + assert cmt.author.name == ncmt.author.name + assert cmt.message == ncmt.message + # actually, it can't be printed in a shell as repr wants to have ascii only + # it appears + cmt.author.__repr__() + diff --git a/gitdb/test/object/test_tree.py b/gitdb/test/object/test_tree.py new file mode 100644 index 0000000..ec10e96 --- /dev/null +++ b/gitdb/test/object/test_tree.py @@ -0,0 +1,144 @@ +# test_tree.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import os +from git.test.lib import * +from git import * +from git.objects.fun import ( + traverse_tree_recursive, + traverse_trees_recursive + ) +from cStringIO import StringIO + +class TestTree(TestBase): + + def test_serializable(self): + # tree at the given commit contains a submodule as well + roottree = self.rorepo.tree('6c1faef799095f3990e9970bc2cb10aa0221cf9c') + for item in roottree.traverse(ignore_self=False): + if item.type != Tree.type: + continue + # END skip non-trees + tree = item + # trees have no dict + self.failUnlessRaises(AttributeError, setattr, tree, 'someattr', 1) + + orig_data = tree.data_stream.read() + orig_cache = tree._cache + + stream = StringIO() + tree._serialize(stream) + assert stream.getvalue() == orig_data + + stream.seek(0) + testtree = Tree(self.rorepo, Tree.NULL_BIN_SHA, 0, '') + testtree._deserialize(stream) + assert testtree._cache == orig_cache + + + # TEST CACHE MUTATOR + mod = testtree.cache + self.failUnlessRaises(ValueError, mod.add, "invalid sha", 0, "name") + self.failUnlessRaises(ValueError, mod.add, Tree.NULL_HEX_SHA, 0, "invalid mode") + self.failUnlessRaises(ValueError, mod.add, Tree.NULL_HEX_SHA, tree.mode, "invalid/name") + + # add new item + name = "fake_dir" + mod.add(testtree.NULL_HEX_SHA, tree.mode, name) + assert name in testtree + + # its available in the tree immediately + assert isinstance(testtree[name], Tree) + + # adding it again will not cause multiple of them to be presents + cur_count = len(testtree) + mod.add(testtree.NULL_HEX_SHA, tree.mode, name) + assert len(testtree) == cur_count + + # fails with a different sha - name exists + hexsha = "1"*40 + self.failUnlessRaises(ValueError, mod.add, hexsha, tree.mode, name) + + # force it - replace existing one + mod.add(hexsha, tree.mode, name, force=True) + assert testtree[name].hexsha == hexsha + assert len(testtree) == cur_count + + # unchecked addition always works, even with invalid items + invalid_name = "hi/there" + mod.add_unchecked(hexsha, 0, invalid_name) + assert len(testtree) == cur_count + 1 + + del(mod[invalid_name]) + assert len(testtree) == cur_count + # del again, its fine + del(mod[invalid_name]) + + # have added one item, we are done + mod.set_done() + mod.set_done() # multiple times are okay + + # serialize, its different now + stream = StringIO() + testtree._serialize(stream) + stream.seek(0) + assert stream.getvalue() != orig_data + + # replaces cache, but we make sure of it + del(testtree._cache) + testtree._deserialize(stream) + assert name in testtree + assert invalid_name not in testtree + # END for each item in tree + + def test_traverse(self): + root = self.rorepo.tree('0.1.6') + num_recursive = 0 + all_items = list() + for obj in root.traverse(): + if "/" in obj.path: + num_recursive += 1 + + assert isinstance(obj, (Blob, Tree)) + all_items.append(obj) + # END for each object + assert all_items == root.list_traverse() + + # limit recursion level to 0 - should be same as default iteration + assert all_items + assert 'CHANGES' in root + assert len(list(root)) == len(list(root.traverse(depth=1))) + + # only choose trees + trees_only = lambda i,d: i.type == "tree" + trees = list(root.traverse(predicate = trees_only)) + assert len(trees) == len(list( i for i in root.traverse() if trees_only(i,0) )) + + # test prune + lib_folder = lambda t,d: t.path == "lib" + pruned_trees = list(root.traverse(predicate = trees_only,prune = lib_folder)) + assert len(pruned_trees) < len(trees) + + # trees and blobs + assert len(set(trees)|set(root.trees)) == len(trees) + assert len(set(b for b in root if isinstance(b, Blob)) | set(root.blobs)) == len( root.blobs ) + subitem = trees[0][0] + assert "/" in subitem.path + assert subitem.name == os.path.basename(subitem.path) + + # assure that at some point the traversed paths have a slash in them + found_slash = False + for item in root.traverse(): + assert os.path.isabs(item.abspath) + if '/' in item.path: + found_slash = True + # END check for slash + + # slashes in paths are supported as well + assert root[item.path] == item == root/item.path + # END for each item + assert found_slash + diff --git a/gitdb/test/test_util.py b/gitdb/test/test_util.py index 90f4156..690242f 100644 --- a/gitdb/test/test_util.py +++ b/gitdb/test/test_util.py @@ -103,3 +103,30 @@ def test_lockedfd(self): self.fail("expected OSError") # END handle exceptions + +class TestActor(TestBase): + def test_from_string_should_separate_name_and_email(self): + a = Actor._from_string("Michael Trier ") + assert_equal("Michael Trier", a.name) + assert_equal("mtrier@example.com", a.email) + + # base type capabilities + assert a == a + assert not ( a != a ) + m = set() + m.add(a) + m.add(a) + assert len(m) == 1 + + def test_from_string_should_handle_just_name(self): + a = Actor._from_string("Michael Trier") + assert_equal("Michael Trier", a.name) + assert_equal(None, a.email) + + def test_should_display_representation(self): + a = Actor._from_string("Michael Trier ") + assert_equal('">', repr(a)) + + def test_str_should_alias_name(self): + a = Actor._from_string("Michael Trier ") + assert_equal(a.name, str(a)) diff --git a/gitdb/typ.py b/gitdb/typ.py index 6783e2e..a2e719b 100644 --- a/gitdb/typ.py +++ b/gitdb/typ.py @@ -19,4 +19,9 @@ class ObjectType(object): tree = str_tree_type tag = str_tag_type + commit_id = 1 + tree_id = 2 + blob_id = 3 + tag_id = 4 + #} END string types diff --git a/gitdb/util.py b/gitdb/util.py index 432eec4..650dc57 100644 --- a/gitdb/util.py +++ b/gitdb/util.py @@ -110,6 +110,21 @@ def __getslice__(self, start, end): #{ Routines +def stream_copy(source, destination, chunk_size=512*1024): + """Copy all data from the source stream into the destination stream in chunks + of size chunk_size + + :return: amount of bytes written""" + br = 0 + while True: + chunk = source.read(chunk_size) + destination.write(chunk) + br += len(chunk) + if len(chunk) < chunk_size: + break + # END reading output stream + return br + def make_sha(source=''): """A python2.4 workaround for the sha/hashlib module fiasco :note: From the dulwich project """ @@ -650,5 +665,51 @@ def iter_items(cls, repo, *args, **kwargs): :return: iterator yielding Items""" raise NotImplementedError("To be implemented by Subclass") + +class IterableList(list): + """ + List of iterable objects allowing to query an object by id or by named index:: + + heads = repo.heads + heads.master + heads['master'] + heads[0] + + It requires an id_attribute name to be set which will be queried from its + contained items to have a means for comparison. + + A prefix can be specified which is to be used in case the id returned by the + items always contains a prefix that does not matter to the user, so it + can be left out.""" + __slots__ = ('_id_attr', '_prefix') + + def __new__(cls, id_attr, prefix=''): + return super(IterableList,cls).__new__(cls) + + def __init__(self, id_attr, prefix=''): + self._id_attr = id_attr + self._prefix = prefix + if not isinstance(id_attr, basestring): + raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") + # END help debugging ! + + def __getattr__(self, attr): + attr = self._prefix + attr + for item in self: + if getattr(item, self._id_attr) == attr: + return item + # END for each item + return list.__getattribute__(self, attr) + + def __getitem__(self, index): + if isinstance(index, int): + return list.__getitem__(self,index) + try: + return getattr(self, index) + except AttributeError: + raise IndexError( "No item found with id %r" % (self._prefix + index) ) + + + #} END utilities From a6778e0dde176b87ebad124837b508a5d7bb1251 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 25 Apr 2011 17:00:17 +0200 Subject: [PATCH 04/15] Fixed a few broken tests. test_refs is still not working as it needs more throrough changes based on the interfaces actually available in gitdb. This should work though as all references have iter_* methods which do the actual work. Added git config parser to the mix, including working test - the module is not very interdependent, fortunately. --- AUTHORS | 1 + gitdb/config.py | 421 ++++++++ gitdb/odict.py | 1399 +++++++++++++++++++++++++ gitdb/test/fixtures/git_config | 23 + gitdb/test/fixtures/git_config_global | 24 + gitdb/test/test_config.py | 102 ++ gitdb/test/test_refs.py | 4 +- gitdb/test/test_util.py | 15 +- gitdb/util.py | 1 + 9 files changed, 1981 insertions(+), 9 deletions(-) create mode 100644 gitdb/config.py create mode 100644 gitdb/odict.py create mode 100644 gitdb/test/fixtures/git_config create mode 100644 gitdb/test/fixtures/git_config_global create mode 100644 gitdb/test/test_config.py diff --git a/AUTHORS b/AUTHORS index 490baad..a8b505e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1 +1,2 @@ Creator: Sebastian Thiel +Contributor: Michael Trier (code taken in a modified form from GitPython) diff --git a/gitdb/config.py b/gitdb/config.py new file mode 100644 index 0000000..a8efe46 --- /dev/null +++ b/gitdb/config.py @@ -0,0 +1,421 @@ +# config.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# Heavily modified by Sebastian Thiel (byronimo@gmail.com) +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module containing module parser implementation able to properly read and write +configuration files""" + +import re +import os +import ConfigParser as cp +import inspect +import cStringIO + +from gitdb.odict import OrderedDict +from gitdb.util import LockFile + +__all__ = ('GitConfigParser', 'SectionConstraint') + +class MetaParserBuilder(type): + """Utlity class wrapping base-class methods into decorators that assure read-only properties""" + def __new__(metacls, name, bases, clsdict): + """ + Equip all base-class methods with a needs_values decorator, and all non-const methods + with a set_dirty_and_flush_changes decorator in addition to that.""" + kmm = '_mutating_methods_' + if kmm in clsdict: + mutating_methods = clsdict[kmm] + for base in bases: + methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) + for name, method in methods: + if name in clsdict: + continue + method_with_values = needs_values(method) + if name in mutating_methods: + method_with_values = set_dirty_and_flush_changes(method_with_values) + # END mutating methods handling + + clsdict[name] = method_with_values + # END for each name/method pair + # END for each base + # END if mutating methods configuration is set + + new_type = super(MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) + return new_type + + + +def needs_values(func): + """Returns method assuring we read values (on demand) before we try to access them""" + def assure_data_present(self, *args, **kwargs): + self.read() + return func(self, *args, **kwargs) + # END wrapper method + assure_data_present.__name__ = func.__name__ + return assure_data_present + +def set_dirty_and_flush_changes(non_const_func): + """Return method that checks whether given non constant function may be called. + If so, the instance will be set dirty. + Additionally, we flush the changes right to disk""" + def flush_changes(self, *args, **kwargs): + rval = non_const_func(self, *args, **kwargs) + self.write() + return rval + # END wrapper method + flush_changes.__name__ = non_const_func.__name__ + return flush_changes + + +class SectionConstraint(object): + """Constrains a ConfigParser to only option commands which are constrained to + always use the section we have been initialized with. + + It supports all ConfigParser methods that operate on an option""" + __slots__ = ("_config", "_section_name") + _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option", + "remove_section", "remove_option", "options") + + def __init__(self, config, section): + self._config = config + self._section_name = section + + def __getattr__(self, attr): + if attr in self._valid_attrs_: + return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) + return super(SectionConstraint,self).__getattribute__(attr) + + def _call_config(self, method, *args, **kwargs): + """Call the configuration at the given method which must take a section name + as first argument""" + return getattr(self._config, method)(self._section_name, *args, **kwargs) + + @property + def config(self): + """return: Configparser instance we constrain""" + return self._config + + +class GitConfigParser(cp.RawConfigParser, object): + """Implements specifics required to read git style configuration files. + + This variation behaves much like the git.config command such that the configuration + will be read on demand based on the filepath given during initialization. + + The changes will automatically be written once the instance goes out of scope, but + can be triggered manually as well. + + The configuration file will be locked if you intend to change values preventing other + instances to write concurrently. + + :note: + The config is case-sensitive even when queried, hence section and option names + must match perfectly.""" + __metaclass__ = MetaParserBuilder + + + #{ Configuration + # The lock type determines the type of lock to use in new configuration readers. + # They must be compatible to the LockFile interface. + # A suitable alternative would be the BlockingLockFile + t_lock = LockFile + + #} END configuration + + OPTCRE = re.compile( + r'\s?(?P