Skip to content

Commit c2e8fe2

Browse files
author
Sam J Sharpe
committed
Merge pull request alphagov#2 from alphagov/better-role-selection
Rethink fabric node selection
2 parents 768960b + d73d540 commit c2e8fe2

File tree

4 files changed

+207
-50
lines changed

4 files changed

+207
-50
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
*.pyc
2+
.ssh/*

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,29 @@ helper script, installed in `/usr/local/bin`
1414
In order to use the fabric scripts, you will need to enable ssh-agent
1515
forwarding when you connect to the jumpboxes. For example:
1616

17-
ssh -A jumpbox-1.management.production
17+
$ ssh -A jumpbox-1.management.production
18+
19+
## Local usage
20+
21+
You can also use the fabric scripts from an external machine with a small amount
22+
of setup on your part. You will need to install Fabric,
23+
24+
$ pip install fabric
25+
26+
NB:Iif you get a "pip: command not found" error, run this first:
27+
28+
$ sudo easy_install pip
29+
30+
configure it (see [the fabric documentation][fabdoc] for more examples),
31+
32+
$ echo 'user = jimbob' >> ~/.fabricrc
33+
34+
and then you should be able to run it:
35+
36+
$ fab preview all hosts
37+
...
38+
$ fab preview class:frontend do:'uname -a'
39+
...
40+
41+
[fabdoc]: http://docs.fabfile.org/en/latest/usage/fab.html
42+

fabfile.py

Lines changed: 178 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
from __future__ import print_function
22

33
from collections import defaultdict
4-
import json
5-
import sys
6-
import subprocess
4+
import os
75
import textwrap
8-
import urllib
9-
import urllib2
106

117
from fabric import state
12-
from fabric.colors import *
13-
from fabric.api import *
8+
from fabric.api import (abort, env, get, hide, local, puts, run, runs_once,
9+
settings, sudo, task, warn)
1410
from fabric.task_utils import crawl
1511

1612
# Our command submodules
@@ -22,40 +18,129 @@
2218
import search
2319
import vm
2420

25-
env.hosts = []
26-
env.roledefs = defaultdict(list)
27-
28-
def facter(*args):
29-
facter_args = ['facter', '--json']
30-
facter_args.extend(args)
31-
proc = subprocess.Popen(facter_args, stdout=subprocess.PIPE)
32-
out, err = proc.communicate()
33-
34-
if proc.returncode != 0:
35-
raise RuntimeError("facter returned non-zero exit code! (args={0})".format(args))
36-
37-
return json.loads(out)
38-
39-
if facter('govuk_class')['govuk_class'] != 'jumpbox':
40-
print("ERROR: govuk_fab is designed to run from a jumpbox (govuk_class != jumpbox)", file=sys.stderr)
41-
sys.exit(1)
42-
43-
with hide('running'):
44-
qs = urllib.urlencode({'query': '["=", ["node", "active"], true]'})
45-
res = urllib2.urlopen('http://puppetdb.cluster/nodes?{0}'.format(qs))
46-
hosts = json.load(res)
47-
48-
for host in hosts:
49-
try:
50-
name, vdc, org = host.rsplit('.', 3)
51-
except ValueError:
52-
print("WARNING: discarding badly formatted hostname '{0}'".format(host), file=sys.stderr)
53-
continue
54-
55-
env.roledefs['all'].append(host)
56-
env.roledefs['org-%s' % org].append(host)
57-
env.roledefs['vdc-%s' % vdc].append(host)
58-
env.roledefs['class-%s' % name.rstrip('-1234567890')].append(host)
21+
HERE = os.path.dirname(__file__)
22+
SSH_DIR = os.path.join(HERE, '.ssh')
23+
24+
ABORT_MSG = textwrap.dedent("""
25+
You must select an environment before running this task, e.g.
26+
27+
fab production [task, [task, [...]]]
28+
29+
If you've called fabric with the -R flag, please instead use one of the
30+
following tasks to select a set of machines:
31+
32+
all
33+
class:<classname>
34+
vdc:<vdcname>
35+
36+
For example:
37+
38+
fab production class:cache do:uname
39+
40+
To find a list of available classes and VDCs, you can run
41+
42+
fab production classes
43+
fab production vdcs
44+
""")
45+
46+
class RoleFetcher(object):
47+
"""
48+
RoleFetcher is a helper class, an instance of which can be bound to the
49+
Fabric env.roledefs setting. It allows lazy lookup of host names by machine
50+
class and vDC.
51+
"""
52+
53+
def __init__(self):
54+
self.hosts = None
55+
self.roledefs = defaultdict(list)
56+
self.classes = set()
57+
self.vdcs = set()
58+
self.fetched = False
59+
60+
def fetch(self):
61+
if self.fetched:
62+
return
63+
64+
self.hosts = _fetch_hosts()
65+
66+
for host in self.hosts:
67+
try:
68+
name, vdc, _ = host.split('.', 3)
69+
except ValueError:
70+
warn("discarding badly formatted hostname '{0}'".format(host))
71+
continue
72+
73+
# Don't refer to foo.bar.production, as it's confusing when doing
74+
# things in preview or staging. Refer to the machines exclusively by
75+
# short name.
76+
short_host = '{0}.{1}'.format(name, vdc)
77+
78+
cls = name.rstrip('-1234567890')
79+
self.roledefs['all'].append(short_host)
80+
self.roledefs['class-%s' % cls].append(short_host)
81+
self.roledefs['vdc-%s' % vdc].append(short_host)
82+
self.classes.add(cls)
83+
self.vdcs.add(vdc)
84+
85+
self.fetched = True
86+
87+
def __contains__(self, key):
88+
return True
89+
90+
def __getitem__(self, key):
91+
def _looker_upper():
92+
self._assert_fetched()
93+
return self.roledefs[key]
94+
return _looker_upper
95+
96+
def _assert_fetched(self):
97+
if not self.fetched:
98+
abort(ABORT_MSG)
99+
100+
def _fetch_hosts():
101+
"""
102+
Fetch a list of hosts in this environment, regardless of whether we're
103+
executing from within the environment or via a gateway.
104+
"""
105+
with hide('running', 'stdout'):
106+
if env.gateway:
107+
with settings(host_string=env.gateway, gateway=None):
108+
return run('govuk_node_list').splitlines()
109+
110+
# Otherwise assume we're *in* the infrastructure
111+
else:
112+
return local('govuk_node_list').splitlines()
113+
114+
def _fetch_known_hosts():
115+
"""
116+
Fetch the system known_hosts file for the selected gateway. This downloads
117+
the remote gateway's system known_hosts file and installs it to where Fabric
118+
will look for it.
119+
120+
If your host keys are out of date, you can simply blow away SSH_DIR and
121+
rerun the command. Fabric should re-download the known_hosts file from the
122+
gateway.
123+
"""
124+
if env.gateway is None:
125+
raise RuntimeError("Tried to _fetch_known_hosts with no env.gateway set!")
126+
127+
known_hosts_file = os.path.join(SSH_DIR, env.gateway)
128+
129+
if not os.path.exists(known_hosts_file):
130+
with settings(host_string=env.gateway, gateway=None):
131+
get('/etc/ssh/ssh_known_hosts', known_hosts_file)
132+
133+
return known_hosts_file
134+
135+
def _set_gateway(name):
136+
"""
137+
Set the remote gateway box by environment name. Sets the Fabric env.gateway
138+
setting and makes sure that the correct known_hosts file will be consulted,
139+
then dynamically fetches a list of hosts from the gateway box.
140+
"""
141+
env.gateway = 'jumpbox.{0}.alphagov.co.uk'.format(name)
142+
env.system_known_hosts = _fetch_known_hosts()
143+
env.roledefs.fetch()
59144

60145
@task
61146
def help(name):
@@ -68,15 +153,58 @@ def help(name):
68153
puts(textwrap.dedent(task.__doc__).strip())
69154

70155
@task
71-
def list(role='all'):
72-
"""List known hosts"""
73-
puts('\n'.join(sorted(env.roledefs[role])))
156+
def production():
157+
"""Select production environment"""
158+
_set_gateway('production')
159+
160+
@task
161+
def staging():
162+
"""Select staging environment"""
163+
_set_gateway('staging')
74164

75165
@task
76-
def list_roles():
77-
"""List available roles"""
78-
for role in sorted(env.roledefs.keys()):
79-
print("%-30.30s : %s" % (role, len(env.roledefs[role])))
166+
def preview():
167+
"""Select preview environment"""
168+
_set_gateway('preview')
169+
170+
@task
171+
def all():
172+
"""Select all machines in current environment"""
173+
env.roles.append('all')
174+
175+
@task(name='class')
176+
def klass(class_name):
177+
"""Select a machine class"""
178+
env.roles.append('class-%s' % class_name)
179+
180+
@task
181+
def vdc(vdc_name):
182+
"""Select a virtual datacentre"""
183+
env.roles.append('vdc-%s' % vdc_name)
184+
185+
@task
186+
@runs_once
187+
def hosts():
188+
"""List selected hosts"""
189+
me = state.commands['hosts']
190+
hosts = me.get_hosts(None, None, None, env)
191+
print('\n'.join(sorted(hosts)))
192+
193+
@task
194+
@runs_once
195+
def classes():
196+
"""List available classes"""
197+
for name in sorted(env.roledefs.classes):
198+
hosts = env.roledefs['class-%s' % name]
199+
print("%-30.30s %s" % (name, len(hosts())))
200+
201+
@task
202+
@runs_once
203+
def vdcs():
204+
"""List available virtual datacentres"""
205+
for name in sorted(env.roledefs.vdcs):
206+
hosts = env.roledefs['vdc-%s' % name]
207+
print("%-30.30s %s" % (name, len(hosts())))
80208

81209
@task
82210
def do(command):
@@ -87,3 +215,5 @@ def do(command):
87215
def sdo(command):
88216
"""Execute arbitrary commands with sudo"""
89217
sudo(command)
218+
219+
env.roledefs = RoleFetcher()

util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
def use_random_host(role):
66
"""Use a randomly chosen host from the given role"""
7-
env.host_string = random.choice(env.roledefs[role])
7+
hosts = env.roledefs[role]()
8+
env.host_string = random.choice(hosts)
89

910
def rake(app, task):
1011
"""Run a rake task for the specified application"""

0 commit comments

Comments
 (0)