Skip to content

Commit eed822a

Browse files
authored
Added fix for skipping job clusters
It was creating the job clusters of primary site as interactive cluster in secondary site. Added fix for skipping job clusters. Also added a note to warn user to remind that newly created clusters will start in the secondary site immediately.
1 parent 8f291b5 commit eed822a

File tree

1 file changed

+32
-19
lines changed

1 file changed

+32
-19
lines changed

articles/azure-databricks/howto-regional-disaster-recovery.md

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -132,69 +132,82 @@ To create your own regional disaster recovery topology, follow these requirement
132132
```python
133133
from subprocess import call, check_output
134134
import json, os
135-
135+
136136
EXPORT_PROFILE = "primary"
137137
IMPORT_PROFILE = "secondary"
138-
138+
139139
# Get all clusters info from old workspace
140140
clusters_out = check_output(["databricks", "clusters", "list", "--profile", EXPORT_PROFILE])
141141
clusters_info_list = clusters_out.splitlines()
142-
142+
143143
# Create a list of all cluster ids
144144
clusters_list = []
145-
for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
146-
145+
##for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
146+
147+
for cluster_info in clusters_info_list:
148+
if cluster_info != '':
149+
clusters_list.append(cluster_info.split(None, 1)[0])
150+
147151
# Optionally filter cluster ids out manually, so as to create only required ones in new workspace
148-
152+
149153
# Create a list of mandatory / optional create request elements
150154
cluster_req_elems = ["num_workers","autoscale","cluster_name","spark_version","spark_conf","node_type_id","driver_node_type_id","custom_tags","cluster_log_conf","spark_env_vars","autotermination_minutes","enable_elastic_disk"]
151155

152156
print(str(len(clusters_list)) + " clusters found in the primary site" )
157+
153158
print ("---------------------------------------------------------")
154-
155159
# Try creating all / selected clusters in new workspace with same config as in old one.
156160
cluster_old_new_mappings = {}
157161
i = 0
158162
for cluster in clusters_list:
159163
i += 1
160-
print("Trying to migrate cluster " + str(i) + "/" + str(len(clusters_list)) + " : " + cluster)
164+
print("Checking cluster " + str(i) + "/" + str(len(clusters_list)) + " : " + cluster)
161165
cluster_get_out = check_output(["databricks", "clusters", "get", "--cluster-id", cluster, "--profile", EXPORT_PROFILE])
162166
print ("Got cluster config from old workspace")
163-
# Remove extra content from the config, as we need to build create request with allowed elements only
167+
168+
# Remove extra content from the config, as we need to build create request with allowed elements only
164169
cluster_req_json = json.loads(cluster_get_out)
165170
cluster_json_keys = cluster_req_json.keys()
166-
171+
172+
#Don't migrate Job clusters
173+
if cluster_req_json['cluster_source'] == u'JOB' :
174+
print ("Skipping this cluster as it is a Job cluster : " + cluster_req_json['cluster_id'] )
175+
print ("---------------------------------------------------------")
176+
continue
177+
167178
for key in cluster_json_keys:
168179
if key not in cluster_req_elems:
169180
cluster_req_json.pop(key, None)
170-
181+
171182
# Create the cluster, and store the mapping from old to new cluster ids
172-
183+
173184
#Create a temp file to store the current cluster info as JSON
174185
strCurrentClusterFile = "tmp_cluster_info.json"
175-
186+
176187
#delete the temp file if exists
177188
if os.path.exists(strCurrentClusterFile) :
178189
os.remove(strCurrentClusterFile)
179-
190+
180191
fClusterJSONtmp = open(strCurrentClusterFile,"w+")
181192
fClusterJSONtmp.write(json.dumps(cluster_req_json))
182193
fClusterJSONtmp.close()
183-
194+
184195
#cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
185196
cluster_create_out = check_output(["databricks", "clusters", "create", "--json-file", strCurrentClusterFile , "--profile", IMPORT_PROFILE])
186197
cluster_create_out_json = json.loads(cluster_create_out)
187198
cluster_old_new_mappings[cluster] = cluster_create_out_json['cluster_id']
188-
189-
print ("Sent cluster create request to secondary site workspace successfully")
199+
200+
print ("Cluster create request sent to secondary site workspace successfully")
190201
print ("---------------------------------------------------------")
191-
202+
192203
#delete the temp file if exists
193204
if os.path.exists(strCurrentClusterFile) :
194205
os.remove(strCurrentClusterFile)
195-
206+
196207
print ("Cluster mappings: " + json.dumps(cluster_old_new_mappings))
197208
print ("All done")
209+
print ("P.S. : Please note that all the new clusters in your secondary site are being started now!")
210+
print (" If you won't use those new clusters at the moment, please don't forget terminating your new clusters to avoid charges")
198211
```
199212

200213
6. **Migrate the jobs configuration**

0 commit comments

Comments
 (0)