Skip to content

Commit 298bbc1

Browse files
billyjacobsoncrwilcoxTakashi Matsuo
authored
bigtable: Handle dev instances and use storage utilization in metric scaler (GoogleCloudPlatform#3119)
* WIP handle development instances in metric scaler * use storage utilization and tested * Fix metric queries * remove tests for low storage util * cleanup metric query * EOF new line * use uuid instead of random * lint * fix uuid length * fix uuid length * fix uuid length (again) Co-authored-by: Christopher Wilcox <crwilcox@google.com> Co-authored-by: Takashi Matsuo <tmatsuo@google.com>
1 parent 8b569cb commit 298bbc1

File tree

2 files changed

+130
-28
lines changed

2 files changed

+130
-28
lines changed

bigtable/metricscaler/metricscaler.py

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from google.cloud import bigtable
2323
from google.cloud import monitoring_v3
24+
from google.cloud.bigtable import enums
2425
from google.cloud.monitoring_v3 import query
2526

2627
PROJECT = os.environ['GCLOUD_PROJECT']
@@ -39,12 +40,29 @@ def get_cpu_load():
3940
metric_type='bigtable.googleapis.com/'
4041
'cluster/cpu_load',
4142
minutes=5)
42-
time_series = list(cpu_query)
43-
recent_time_series = time_series[0]
44-
return recent_time_series.points[0].value.double_value
43+
cpu = next(cpu_query.iter())
44+
return cpu.points[0].value.double_value
4545
# [END bigtable_cpu]
4646

4747

48+
def get_storage_utilization():
49+
"""Returns the most recent Cloud Bigtable storage utilization measurement.
50+
51+
Returns:
52+
float: The most recent Cloud Bigtable storage utilization metric
53+
"""
54+
# [START bigtable_metric_scaler_storage_utilization]
55+
client = monitoring_v3.MetricServiceClient()
56+
utilization_query = query.Query(client,
57+
project=PROJECT,
58+
metric_type='bigtable.googleapis.com/'
59+
'cluster/storage_utilization',
60+
minutes=5)
61+
utilization = next(utilization_query.iter())
62+
return utilization.points[0].value.double_value
63+
# [END bigtable_metric_scaler_storage_utilization]
64+
65+
4866
def scale_bigtable(bigtable_instance, bigtable_cluster, scale_up):
4967
"""Scales the number of Cloud Bigtable nodes up or down.
5068
@@ -79,6 +97,9 @@ def scale_bigtable(bigtable_instance, bigtable_cluster, scale_up):
7997
instance = bigtable_client.instance(bigtable_instance)
8098
instance.reload()
8199

100+
if instance.type_ == enums.Instance.Type.DEVELOPMENT:
101+
raise ValueError("Development instances cannot be scaled.")
102+
82103
cluster = instance.cluster(bigtable_cluster)
83104
cluster.reload()
84105

@@ -104,33 +125,43 @@ def scale_bigtable(bigtable_instance, bigtable_cluster, scale_up):
104125

105126

106127
def main(
107-
bigtable_instance,
108-
bigtable_cluster,
109-
high_cpu_threshold,
110-
low_cpu_threshold,
111-
short_sleep,
112-
long_sleep):
128+
bigtable_instance,
129+
bigtable_cluster,
130+
high_cpu_threshold,
131+
low_cpu_threshold,
132+
high_storage_threshold,
133+
short_sleep,
134+
long_sleep
135+
):
113136
"""Main loop runner that autoscales Cloud Bigtable.
114137
115138
Args:
116139
bigtable_instance (str): Cloud Bigtable instance ID to autoscale
117140
high_cpu_threshold (float): If CPU is higher than this, scale up.
118141
low_cpu_threshold (float): If CPU is lower than this, scale down.
142+
high_storage_threshold (float): If storage is higher than this,
143+
scale up.
119144
short_sleep (int): How long to sleep after no operation
120145
long_sleep (int): How long to sleep after the number of nodes is
121146
changed
122147
"""
123148
cluster_cpu = get_cpu_load()
149+
cluster_storage = get_storage_utilization()
124150
print('Detected cpu of {}'.format(cluster_cpu))
125-
if cluster_cpu > high_cpu_threshold:
126-
scale_bigtable(bigtable_instance, bigtable_cluster, True)
127-
time.sleep(long_sleep)
128-
elif cluster_cpu < low_cpu_threshold:
129-
scale_bigtable(bigtable_instance, bigtable_cluster, False)
130-
time.sleep(long_sleep)
131-
else:
132-
print('CPU within threshold, sleeping.')
133-
time.sleep(short_sleep)
151+
print('Detected storage utilization of {}'.format(cluster_storage))
152+
try:
153+
if cluster_cpu > high_cpu_threshold or cluster_storage > high_storage_threshold:
154+
scale_bigtable(bigtable_instance, bigtable_cluster, True)
155+
time.sleep(long_sleep)
156+
elif cluster_cpu < low_cpu_threshold:
157+
if cluster_storage < high_storage_threshold:
158+
scale_bigtable(bigtable_instance, bigtable_cluster, False)
159+
time.sleep(long_sleep)
160+
else:
161+
print('CPU within threshold, sleeping.')
162+
time.sleep(short_sleep)
163+
except Exception as e:
164+
print("Error during scaling: %s", e)
134165

135166

136167
if __name__ == '__main__':
@@ -150,6 +181,11 @@ def main(
150181
'--low_cpu_threshold',
151182
help='If Cloud Bigtable CPU usage is below this threshold, scale down',
152183
default=0.2)
184+
parser.add_argument(
185+
'--high_storage_threshold',
186+
help='If Cloud Bigtable storage utilization is above this threshold, '
187+
'scale up',
188+
default=0.6)
153189
parser.add_argument(
154190
'--short_sleep',
155191
help='How long to sleep in seconds between checking metrics after no '
@@ -168,5 +204,6 @@ def main(
168204
args.bigtable_cluster,
169205
float(args.high_cpu_threshold),
170206
float(args.low_cpu_threshold),
207+
float(args.high_storage_threshold),
171208
int(args.short_sleep),
172209
int(args.long_sleep))

bigtable/metricscaler/metricscaler_test.py

Lines changed: 75 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,24 @@
1616

1717
import os
1818
import time
19-
20-
import random
19+
import uuid
2120

2221
import pytest
2322
from google.cloud import bigtable
2423
from google.cloud.bigtable import enums
2524
from mock import patch
2625

2726
from metricscaler import get_cpu_load
27+
from metricscaler import get_storage_utilization
2828
from metricscaler import main
2929
from metricscaler import scale_bigtable
3030

3131
PROJECT = os.environ['GCLOUD_PROJECT']
3232
BIGTABLE_ZONE = os.environ['BIGTABLE_ZONE']
3333
SIZE_CHANGE_STEP = 3
3434
INSTANCE_ID_FORMAT = 'metric-scale-test-{}'
35-
INSTANCE_ID_RANGE = 10000
36-
BIGTABLE_INSTANCE = INSTANCE_ID_FORMAT.format(
37-
random.randrange(INSTANCE_ID_RANGE))
35+
BIGTABLE_INSTANCE = INSTANCE_ID_FORMAT.format(str(uuid.uuid4())[:10])
36+
BIGTABLE_DEV_INSTANCE = INSTANCE_ID_FORMAT.format(str(uuid.uuid4())[:10])
3837

3938

4039
# System tests to verify API calls succeed
@@ -44,6 +43,10 @@ def test_get_cpu_load():
4443
assert float(get_cpu_load()) > 0.0
4544

4645

46+
def test_get_storage_utilization():
47+
assert float(get_storage_utilization()) > 0.0
48+
49+
4750
@pytest.fixture()
4851
def instance():
4952
cluster_id = BIGTABLE_INSTANCE
@@ -68,6 +71,29 @@ def instance():
6871
instance.delete()
6972

7073

74+
@pytest.fixture()
75+
def dev_instance():
76+
cluster_id = BIGTABLE_DEV_INSTANCE
77+
78+
client = bigtable.Client(project=PROJECT, admin=True)
79+
80+
storage_type = enums.StorageType.SSD
81+
development = enums.Instance.Type.DEVELOPMENT
82+
labels = {'dev-label': 'dev-label'}
83+
instance = client.instance(BIGTABLE_DEV_INSTANCE,
84+
instance_type=development,
85+
labels=labels)
86+
87+
if not instance.exists():
88+
cluster = instance.cluster(cluster_id, location_id=BIGTABLE_ZONE,
89+
default_storage_type=storage_type)
90+
instance.create(clusters=[cluster])
91+
92+
yield
93+
94+
instance.delete()
95+
96+
7197
def test_scale_bigtable(instance):
7298
bigtable_client = bigtable.Client(admin=True)
7399

@@ -103,31 +129,70 @@ def test_scale_bigtable(instance):
103129
raise
104130

105131

106-
# Unit test for logic
132+
def test_handle_dev_instance(capsys, dev_instance):
133+
with pytest.raises(ValueError):
134+
scale_bigtable(BIGTABLE_DEV_INSTANCE, BIGTABLE_DEV_INSTANCE, True)
135+
136+
107137
@patch('time.sleep')
138+
@patch('metricscaler.get_storage_utilization')
108139
@patch('metricscaler.get_cpu_load')
109140
@patch('metricscaler.scale_bigtable')
110-
def test_main(scale_bigtable, get_cpu_load, sleep):
141+
def test_main(scale_bigtable, get_cpu_load, get_storage_utilization, sleep):
111142
SHORT_SLEEP = 5
112143
LONG_SLEEP = 10
144+
145+
# Test okay CPU, okay storage utilization
113146
get_cpu_load.return_value = 0.5
147+
get_storage_utilization.return_value = 0.5
114148

115-
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, SHORT_SLEEP,
149+
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, 0.6, SHORT_SLEEP,
116150
LONG_SLEEP)
117151
scale_bigtable.assert_not_called()
118152
scale_bigtable.reset_mock()
119153

154+
# Test high CPU, okay storage utilization
120155
get_cpu_load.return_value = 0.7
121-
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, SHORT_SLEEP,
156+
get_storage_utilization.return_value = 0.5
157+
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, 0.6, SHORT_SLEEP,
122158
LONG_SLEEP)
123159
scale_bigtable.assert_called_once_with(BIGTABLE_INSTANCE,
124160
BIGTABLE_INSTANCE, True)
125161
scale_bigtable.reset_mock()
126162

163+
# Test low CPU, okay storage utilization
164+
get_storage_utilization.return_value = 0.5
127165
get_cpu_load.return_value = 0.2
128-
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, SHORT_SLEEP,
166+
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, 0.6, SHORT_SLEEP,
129167
LONG_SLEEP)
130168
scale_bigtable.assert_called_once_with(BIGTABLE_INSTANCE,
131169
BIGTABLE_INSTANCE, False)
170+
scale_bigtable.reset_mock()
171+
172+
# Test okay CPU, high storage utilization
173+
get_cpu_load.return_value = 0.5
174+
get_storage_utilization.return_value = 0.7
175+
176+
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, 0.6, SHORT_SLEEP,
177+
LONG_SLEEP)
178+
scale_bigtable.assert_called_once_with(BIGTABLE_INSTANCE,
179+
BIGTABLE_INSTANCE, True)
180+
scale_bigtable.reset_mock()
132181

182+
# Test high CPU, high storage utilization
183+
get_cpu_load.return_value = 0.7
184+
get_storage_utilization.return_value = 0.7
185+
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, 0.6, SHORT_SLEEP,
186+
LONG_SLEEP)
187+
scale_bigtable.assert_called_once_with(BIGTABLE_INSTANCE,
188+
BIGTABLE_INSTANCE, True)
189+
scale_bigtable.reset_mock()
190+
191+
# Test low CPU, high storage utilization
192+
get_cpu_load.return_value = 0.2
193+
get_storage_utilization.return_value = 0.7
194+
main(BIGTABLE_INSTANCE, BIGTABLE_INSTANCE, 0.6, 0.3, 0.6, SHORT_SLEEP,
195+
LONG_SLEEP)
196+
scale_bigtable.assert_called_once_with(BIGTABLE_INSTANCE,
197+
BIGTABLE_INSTANCE, True)
133198
scale_bigtable.reset_mock()

0 commit comments

Comments
 (0)