Improved pack writing test to show that the pack generation can be lightning fast with nearly now overhead if the data streams in fast enough (~30 MB/s when writing a pack). This shows that there is huge potential for sending packs, considering that we are actually recompressing them (without deltification). To be faster in future, we could probably just send ref-deltas or full objects as found in the pack without doing any recompression.

Byron · Byron · commit 3c12de3762ab · 2011-06-06T20:46:53.000+02:00
diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py
@@ -33,31 +33,38 @@ class TestPurePackedODB(TestPurePackedODBPerformanceBase):
 	PackedODBCls = PurePackedODB
 	#} END configuration
 	
+	def test_pack_writing_note(self):
+		sys.stderr.write("test_pack_writing should be adjusted to support different databases to read from - see test for more info")
+		raise SkipTest()
+	
 	def test_pack_writing(self):
 		# see how fast we can write a pack from object streams.
 		# This will not be fast, as we take time for decompressing the streams as well
+		# For now we test the fast streaming and slow streaming versions manually
 		ostream = CountedNullStream()
-		pdb = self.ropdb
-		
-		ni = 5000
-		count = 0
-		total_size = 0
-		st = time()
-		objs = list()
-		for sha in pdb.sha_iter():
-			count += 1
-			objs.append(pdb.stream(sha))
-			if count == ni:
-				break
-		#END gather objects for pack-writing
-		elapsed = time() - st
-		print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed)
-		
-		st = time()
-		PackEntity.write_pack(objs, ostream.write)
-		elapsed = time() - st
-		total_kb = ostream.bytes_written() / 1000
-		print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
+		# NOTE: We use the same repo twice to see whether OS caching helps
+		for rorepo in (self.rorepo, self.rorepo, self.ropdb):
+			
+			ni = 5000
+			count = 0
+			total_size = 0
+			st = time()
+			objs = list()
+			for sha in rorepo.sha_iter():
+				count += 1
+				objs.append(rorepo.stream(sha))
+				if count == ni:
+					break
+			#END gather objects for pack-writing
+			elapsed = time() - st
+			print >> sys.stderr, "PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )" % (ni, rorepo.__class__.__name__, elapsed, ni / elapsed)
+			
+			st = time()
+			PackEntity.write_pack(objs, ostream.write)
+			elapsed = time() - st
+			total_kb = ostream.bytes_written() / 1000
+			print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
+		#END for each rorepo
 		
 	
 	def test_stream_reading(self):