Skip to content

Commit e3bca1a

Browse files
authored
Merge pull request MicrosoftDocs#32600 from farukc/patch-1
Update the Python code under section 5
2 parents 4d403ac + eed822a commit e3bca1a

File tree

1 file changed

+63
-24
lines changed

1 file changed

+63
-24
lines changed

articles/azure-databricks/howto-regional-disaster-recovery.md

+63-24
Original file line numberDiff line numberDiff line change
@@ -130,45 +130,84 @@ To create your own regional disaster recovery topology, follow these requirement
130130
Copy and save the following python script to a file, and run it in your Databricks command line. For example, `python scriptname.py`.
131131

132132
```python
133-
from subprocess import call, check_output import json
133+
from subprocess import call, check_output
134+
import json, os
134135

135136
EXPORT_PROFILE = "primary"
136137
IMPORT_PROFILE = "secondary"
137138

138-
# Get all clusters info from old workspace
139-
clusters_out = check_output(["databricks", "clusters", "list", "--profile", EXPORT_PROFILE]) clusters_info_list = clusters_out.splitlines()
139+
# Get all clusters info from old workspace
140+
clusters_out = check_output(["databricks", "clusters", "list", "--profile", EXPORT_PROFILE])
141+
clusters_info_list = clusters_out.splitlines()
142+
143+
# Create a list of all cluster ids
144+
clusters_list = []
145+
##for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
140146

141-
# Create a list of all cluster ids
142-
clusters_list = [] for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
147+
for cluster_info in clusters_info_list:
148+
if cluster_info != '':
149+
clusters_list.append(cluster_info.split(None, 1)[0])
143150

144151
# Optionally filter cluster ids out manually, so as to create only required ones in new workspace
145152

146-
# Create a list of mandatory / optional create request elements
147-
cluster_req_elems = ["num_workers","autoscale","cluster_name","spark_version","spark_conf"," node_type_id","driver_node_type_id","custom_tags","cluster_log_conf","sp ark_env_vars","autotermination_minutes","enable_elastic_disk"]
153+
# Create a list of mandatory / optional create request elements
154+
cluster_req_elems = ["num_workers","autoscale","cluster_name","spark_version","spark_conf","node_type_id","driver_node_type_id","custom_tags","cluster_log_conf","spark_env_vars","autotermination_minutes","enable_elastic_disk"]
155+
156+
print(str(len(clusters_list)) + " clusters found in the primary site" )
148157

158+
print ("---------------------------------------------------------")
149159
# Try creating all / selected clusters in new workspace with same config as in old one.
150-
cluster_old_new_mappings = {} for cluster in clusters_list: print "Trying to migrate cluster " + cluster
160+
cluster_old_new_mappings = {}
161+
i = 0
162+
for cluster in clusters_list:
163+
i += 1
164+
print("Checking cluster " + str(i) + "/" + str(len(clusters_list)) + " : " + cluster)
165+
cluster_get_out = check_output(["databricks", "clusters", "get", "--cluster-id", cluster, "--profile", EXPORT_PROFILE])
166+
print ("Got cluster config from old workspace")
167+
168+
# Remove extra content from the config, as we need to build create request with allowed elements only
169+
cluster_req_json = json.loads(cluster_get_out)
170+
cluster_json_keys = cluster_req_json.keys()
171+
172+
#Don't migrate Job clusters
173+
if cluster_req_json['cluster_source'] == u'JOB' :
174+
print ("Skipping this cluster as it is a Job cluster : " + cluster_req_json['cluster_id'] )
175+
print ("---------------------------------------------------------")
176+
continue
151177

152-
cluster_get_out = check_output(["databricks", "clusters", "get", "--cluster-id", cluster, "--profile", EXPORT_PROFILE])
153-
print "Got cluster config from old workspace"
178+
for key in cluster_json_keys:
179+
if key not in cluster_req_elems:
180+
cluster_req_json.pop(key, None)
154181

155-
# Remove extra content from the config, as we need to build create request with allowed elements only
156-
cluster_req_json = json.loads(cluster_get_out)
157-
cluster_json_keys = cluster_req_json.keys()
182+
# Create the cluster, and store the mapping from old to new cluster ids
158183

159-
for key in cluster_json_keys:
160-
if key not in cluster_req_elems:
161-
cluster_req_json.pop(key, None)
162-
163-
# Create the cluster, and store the mapping from old to new cluster ids
164-
cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
165-
cluster_create_out_json = json.loads(cluster_create_out)
166-
cluster_old_new_mappings[cluster] = cluster_create_out_json['cluster_id']
184+
#Create a temp file to store the current cluster info as JSON
185+
strCurrentClusterFile = "tmp_cluster_info.json"
167186

168-
print "Sent cluster create request to new workspace successfully"
187+
#delete the temp file if exists
188+
if os.path.exists(strCurrentClusterFile) :
189+
os.remove(strCurrentClusterFile)
169190

170-
print "Cluster mappings: " + json.dumps(cluster_old_new_mappings)
171-
print "All done"
191+
fClusterJSONtmp = open(strCurrentClusterFile,"w+")
192+
fClusterJSONtmp.write(json.dumps(cluster_req_json))
193+
fClusterJSONtmp.close()
194+
195+
#cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
196+
cluster_create_out = check_output(["databricks", "clusters", "create", "--json-file", strCurrentClusterFile , "--profile", IMPORT_PROFILE])
197+
cluster_create_out_json = json.loads(cluster_create_out)
198+
cluster_old_new_mappings[cluster] = cluster_create_out_json['cluster_id']
199+
200+
print ("Cluster create request sent to secondary site workspace successfully")
201+
print ("---------------------------------------------------------")
202+
203+
#delete the temp file if exists
204+
if os.path.exists(strCurrentClusterFile) :
205+
os.remove(strCurrentClusterFile)
206+
207+
print ("Cluster mappings: " + json.dumps(cluster_old_new_mappings))
208+
print ("All done")
209+
print ("P.S. : Please note that all the new clusters in your secondary site are being started now!")
210+
print (" If you won't use those new clusters at the moment, please don't forget terminating your new clusters to avoid charges")
172211
```
173212

174213
6. **Migrate the jobs configuration**

0 commit comments

Comments
 (0)