Skip to content

Commit ffc3af6

Browse files
author
Katrina Owen
committed
Support gzip for schema cache
This adds gzip support for both the YAML and the Marshal serialization strategies. Particularly large schema caches can become a problem when deploying to Kubernetes, as there is currently a 1*1024*1024 byte limit for the ConfigMap. For large databases, the schema cache can exceed this limit.
1 parent 4185614 commit ffc3af6

File tree

2 files changed

+108
-3
lines changed

2 files changed

+108
-3
lines changed

activerecord/lib/active_record/connection_adapters/schema_cache.rb

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,21 @@ class SchemaCache
88
def self.load_from(filename)
99
return unless File.file?(filename)
1010

11-
file = File.read(filename)
12-
filename.include?(".dump") ? Marshal.load(file) : YAML.load(file)
11+
read(filename) do |file|
12+
filename.include?(".dump") ? Marshal.load(file) : YAML.load(file)
13+
end
14+
end
15+
16+
def self.read(filename, &block)
17+
if File.extname(filename) == ".gz"
18+
Zlib::GzipReader.open(filename) { |gz|
19+
yield gz.read
20+
}
21+
else
22+
yield File.read(filename)
23+
end
1324
end
25+
private_class_method :read
1426

1527
attr_reader :version
1628
attr_accessor :connection
@@ -143,7 +155,7 @@ def clear_data_source_cache!(name)
143155
def dump_to(filename)
144156
clear!
145157
connection.data_sources.each { |table| add(table) }
146-
File.atomic_write(filename) { |f|
158+
open(filename) { |f|
147159
if filename.include?(".dump")
148160
f.write(Marshal.dump(self))
149161
else
@@ -194,6 +206,19 @@ def deep_deduplicate(value)
194206
def prepare_data_sources
195207
connection.data_sources.each { |source| @data_sources[source] = true }
196208
end
209+
210+
def open(filename)
211+
File.atomic_write(filename) do |file|
212+
if File.extname(filename) == ".gz"
213+
zipper = Zlib::GzipWriter.new file
214+
yield zipper
215+
zipper.flush
216+
zipper.close
217+
else
218+
yield file
219+
end
220+
end
221+
end
197222
end
198223
end
199224
end

activerecord/test/cases/connection_adapters/schema_cache_test.rb

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,49 @@ def test_yaml_dump_and_load
4343
tempfile.unlink
4444
end
4545

46+
def test_yaml_dump_and_load_with_gzip
47+
# Create an empty cache.
48+
cache = SchemaCache.new @connection
49+
50+
tempfile = Tempfile.new(["schema_cache-", ".yml.gz"])
51+
# Dump it. It should get populated before dumping.
52+
cache.dump_to(tempfile.path)
53+
54+
# Unzip and load manually.
55+
cache = Zlib::GzipReader.open(tempfile.path) { |gz| YAML.load(gz.read) }
56+
57+
# Give it a connection. Usually the connection
58+
# would get set on the cache when it's retrieved
59+
# from the pool.
60+
cache.connection = @connection
61+
62+
assert_no_queries do
63+
assert_equal 12, cache.columns("posts").size
64+
assert_equal 12, cache.columns_hash("posts").size
65+
assert cache.data_sources("posts")
66+
assert_equal "id", cache.primary_keys("posts")
67+
assert_equal 1, cache.indexes("posts").size
68+
assert_equal @database_version.to_s, cache.database_version.to_s
69+
end
70+
71+
# Load the cache the usual way.
72+
cache = SchemaCache.load_from(tempfile.path)
73+
74+
# Give it a connection.
75+
cache.connection = @connection
76+
77+
assert_no_queries do
78+
assert_equal 12, cache.columns("posts").size
79+
assert_equal 12, cache.columns_hash("posts").size
80+
assert cache.data_sources("posts")
81+
assert_equal "id", cache.primary_keys("posts")
82+
assert_equal 1, cache.indexes("posts").size
83+
assert_equal @database_version.to_s, cache.database_version.to_s
84+
end
85+
ensure
86+
tempfile.unlink
87+
end
88+
4689
def test_yaml_loads_5_1_dump
4790
cache = SchemaCache.load_from(schema_dump_path)
4891
cache.connection = @connection
@@ -162,6 +205,43 @@ def test_marshal_dump_and_load_via_disk
162205
tempfile.unlink
163206
end
164207

208+
def test_marshal_dump_and_load_with_gzip
209+
# Create an empty cache.
210+
cache = SchemaCache.new @connection
211+
212+
tempfile = Tempfile.new(["schema_cache-", ".dump.gz"])
213+
# Dump it. It should get populated before dumping.
214+
cache.dump_to(tempfile.path)
215+
216+
# Load a new cache manually.
217+
cache = Zlib::GzipReader.open(tempfile.path) { |gz| Marshal.load(gz.read) }
218+
cache.connection = @connection
219+
220+
assert_no_queries do
221+
assert_equal 12, cache.columns("posts").size
222+
assert_equal 12, cache.columns_hash("posts").size
223+
assert cache.data_sources("posts")
224+
assert_equal "id", cache.primary_keys("posts")
225+
assert_equal 1, cache.indexes("posts").size
226+
assert_equal @database_version.to_s, cache.database_version.to_s
227+
end
228+
229+
# Load a new cache.
230+
cache = SchemaCache.load_from(tempfile.path)
231+
cache.connection = @connection
232+
233+
assert_no_queries do
234+
assert_equal 12, cache.columns("posts").size
235+
assert_equal 12, cache.columns_hash("posts").size
236+
assert cache.data_sources("posts")
237+
assert_equal "id", cache.primary_keys("posts")
238+
assert_equal 1, cache.indexes("posts").size
239+
assert_equal @database_version.to_s, cache.database_version.to_s
240+
end
241+
ensure
242+
tempfile.unlink
243+
end
244+
165245
def test_data_source_exist
166246
assert @cache.data_source_exists?("posts")
167247
assert_not @cache.data_source_exists?("foo")

0 commit comments

Comments
 (0)