gitpython-developers
diff --git a/‎lib/git/odb/fun.py
Lines changed: 12 additions & 5 deletions b/‎lib/git/odb/fun.py
Lines changed: 12 additions & 5 deletions
diff --git a/‎lib/git/odb/stream.py
Lines changed: 40 additions & 26 deletions b/‎lib/git/odb/stream.py
Lines changed: 40 additions & 26 deletions
diff --git a/‎test/git/performance/test_streams.py
Lines changed: 2 additions & 11 deletions b/‎test/git/performance/test_streams.py
Lines changed: 2 additions & 11 deletions
@@ -83,26 +83,33 @@ def write_object(type, size, read, write, chunk_size=chunk_size):
 	:param size: amount of bytes to write from source_stream
 	:param read: read method of a stream providing the content data
 	:param write: write method of the output stream
-	:param close_target_stream: if True, the target stream will be closed when 
+	:param close_target_stream: if True, the target stream will be closed when
 		the routine exits, even if an error is thrown
 	:return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
 	tbw = 0												# total num bytes written
-	dbw = 0												# num data bytes written
 
 	# WRITE HEADER: type SP size NULL
 	tbw += write("%s %i\0" % (type, size))
+	tbw += stream_copy(read, write, size, chunk_size)
+	
+	return tbw
 
+def stream_copy(read, write, size, chunk_size):
+	"""Copy a stream up to size bytes using the provided read and write methods, 
+	in chunks of chunk_size
+	:note: its much like stream_copy utility, but operates just using methods"""
+	dbw = 0												# num data bytes written
+	
 	# WRITE ALL DATA UP TO SIZE
 	while True:
 		cs = min(chunk_size, size-dbw)
 		data_len = write(read(cs))
 		dbw += data_len
 		if data_len < cs or dbw == size:
-			tbw += dbw
 			break
 		# END check for stream end
 	# END duplicate data
-	return tbw
-
+	return dbw
+	
 
 #} END routines
@@ -75,7 +75,7 @@ def is_compressed(self):
 		""":return: True if reads of this stream yield zlib compressed data. Default False
 		:note: this does not imply anything about the actual internal storage.
 			Hence the data could be uncompressed, but read compressed, or vice versa"""
-		raise False
+		return False
 
 	#} END interface
 
@@ -105,10 +105,12 @@ def __init__(self, type, size, stream, sha=None, compressed=None):
 
 	#{ Interface 
 
+	@property
 	def hexsha(self):
 		""":return: our sha, hex encoded, 40 bytes"""
 		return to_hex_sha(self[0])
-	
+		
+	@property
 	def binsha(self):
 		""":return: our sha as binary, 20 bytes"""
 		return to_bin_sha(self[0])
@@ -229,10 +231,11 @@ class DecompressMemMapReader(object):
 		and decompress it into chunks, thats all ... """
 	__slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close')
 
-	max_read_size = 512*1024
+	max_read_size = 512*1024		# currently unused
 
 	def __init__(self, m, close_on_deletion, size):
-		"""Initialize with mmap for stream reading"""
+		"""Initialize with mmap for stream reading
+		:param m: must be content data - use new if you have object data and no size"""
 		self._m = m
 		self._zip = zlib.decompressobj()
 		self._buf = None						# buffer of decompressed bytes
@@ -248,32 +251,38 @@ def __del__(self):
 			self._m.close()
 		# END handle resource freeing
 
-	@classmethod
-	def new(self, m, close_on_deletion=False):
-		"""Create a new DecompressMemMapReader instance for acting as a read-only stream
-		This method parses the object header from m and returns the parsed 
-		type and size, as well as the created stream instance.
-		:param m: memory map on which to oparate
-		:param close_on_deletion: if True, the memory map will be closed once we are 
-			being deleted"""
-		inst = DecompressMemMapReader(m, close_on_deletion, 0)
-		
+	def _parse_header_info(self):
+		"""If this stream contains object data, parse the header info and skip the 
+		stream to a point where each read will yield object content
+		:return: parsed type_string, size"""
 		# read header
 		maxb = 512				# should really be enough, cgit uses 8192 I believe
-		inst._s = maxb
-		hdr = inst.read(maxb)
+		self._s = maxb
+		hdr = self.read(maxb)
 		hdrend = hdr.find("\0")
 		type, size = hdr[:hdrend].split(" ")
 		size = int(size)
-		inst._s = size
+		self._s = size
 
 		# adjust internal state to match actual header length that we ignore
 		# The buffer will be depleted first on future reads
-		inst._br = 0
+		self._br = 0
 		hdrend += 1									# count terminating \0
-		inst._buf = StringIO(hdr[hdrend:])
-		inst._buflen = len(hdr) - hdrend
+		self._buf = StringIO(hdr[hdrend:])
+		self._buflen = len(hdr) - hdrend
+		
+		return type, size
 
+	@classmethod
+	def new(self, m, close_on_deletion=False):
+		"""Create a new DecompressMemMapReader instance for acting as a read-only stream
+		This method parses the object header from m and returns the parsed 
+		type and size, as well as the created stream instance.
+		:param m: memory map on which to oparate. It must be object data ( header + contents )
+		:param close_on_deletion: if True, the memory map will be closed once we are 
+			being deleted"""
+		inst = DecompressMemMapReader(m, close_on_deletion, 0)
+		type, size = inst._parse_header_info()
 		return type, size, inst
 
 	def read(self, size=-1):
@@ -355,17 +364,22 @@ def read(self, size=-1):
 			# needs to be as large as the uncompressed bytes we want to read.
 			self._cws = self._cwe - len(tail)
 			self._cwe = self._cws + size
-			
-			
-			indata = self._m[self._cws:self._cwe]		# another copy ... :(
-			# get the actual window end to be sure we don't use it for computations
-			self._cwe = self._cws + len(indata) 
 		else:
 			cws = self._cws
 			self._cws = self._cwe
 			self._cwe = cws + size 
-			indata = self._m[self._cws:self._cwe]		# ... copy it again :(
 		# END handle tail
+		
+		
+		# if window is too small, make it larger so zip can decompress something
+		win_size = self._cwe - self._cws 
+		if win_size < 8:
+			self._cwe = self._cws + 8
+		# END adjust winsize
+		indata = self._m[self._cws:self._cwe]		# another copy ... :(
+		
+		# get the actual window end to be sure we don't use it for computations
+		self._cwe = self._cws + len(indata)
 
 		dcompdat = self._zip.decompress(indata, size)
 
 
@@ -3,13 +3,11 @@
 from test.testlib import *
 from git.odb import *
 
-from array import array
 from cStringIO import StringIO
 from time import time
 import os
 import sys
 import stat
-import random
 import subprocess
 
 
@@ -18,18 +16,11 @@
 	)
 
 
-
 def make_memory_file(size_in_bytes, randomize=False):
 	""":return: tuple(size_of_stream, stream)
 	:param randomize: try to produce a very random stream"""
-	actual_size = size_in_bytes / 4
-	producer = xrange(actual_size)
-	if randomize:
-		producer = list(producer)
-		random.shuffle(producer)
-	# END randomize
-	a = array('i', producer)
-	return actual_size*4, StringIO(a.tostring())
+	d = make_bytes(size_in_bytes, randomize)
+	return len(d), StringIO(d)
 
 
 class TestObjDBPerformance(TestBigRepoR):