Skip to content

Commit 8f291b5

Browse files
authored
Update the Python code under section 5
Update the Python code under section 5 "Migrate the cluster configurations"
1 parent 7c7fa9e commit 8f291b5

File tree

1 file changed

+60
-34
lines changed

1 file changed

+60
-34
lines changed

articles/azure-databricks/howto-regional-disaster-recovery.md

Lines changed: 60 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -130,45 +130,71 @@ To create your own regional disaster recovery topology, follow these requirement
130130
Copy and save the following python script to a file, and run it in your Databricks command line. For example, `python scriptname.py`.
131131

132132
```python
133-
from subprocess import call, check_output import json
134-
133+
from subprocess import call, check_output
134+
import json, os
135+
135136
EXPORT_PROFILE = "primary"
136137
IMPORT_PROFILE = "secondary"
137-
138-
# Get all clusters info from old workspace
139-
clusters_out = check_output(["databricks", "clusters", "list", "--profile", EXPORT_PROFILE]) clusters_info_list = clusters_out.splitlines()
140-
141-
# Create a list of all cluster ids
142-
clusters_list = [] for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
143-
138+
139+
# Get all clusters info from old workspace
140+
clusters_out = check_output(["databricks", "clusters", "list", "--profile", EXPORT_PROFILE])
141+
clusters_info_list = clusters_out.splitlines()
142+
143+
# Create a list of all cluster ids
144+
clusters_list = []
145+
for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
146+
144147
# Optionally filter cluster ids out manually, so as to create only required ones in new workspace
148+
149+
# Create a list of mandatory / optional create request elements
150+
cluster_req_elems = ["num_workers","autoscale","cluster_name","spark_version","spark_conf","node_type_id","driver_node_type_id","custom_tags","cluster_log_conf","spark_env_vars","autotermination_minutes","enable_elastic_disk"]
145151

146-
# Create a list of mandatory / optional create request elements
147-
cluster_req_elems = ["num_workers","autoscale","cluster_name","spark_version","spark_conf"," node_type_id","driver_node_type_id","custom_tags","cluster_log_conf","sp ark_env_vars","autotermination_minutes","enable_elastic_disk"]
148-
152+
print(str(len(clusters_list)) + " clusters found in the primary site" )
153+
print ("---------------------------------------------------------")
154+
149155
# Try creating all / selected clusters in new workspace with same config as in old one.
150-
cluster_old_new_mappings = {} for cluster in clusters_list: print "Trying to migrate cluster " + cluster
151-
152-
cluster_get_out = check_output(["databricks", "clusters", "get", "--cluster-id", cluster, "--profile", EXPORT_PROFILE])
153-
print "Got cluster config from old workspace"
154-
155-
# Remove extra content from the config, as we need to build create request with allowed elements only
156-
cluster_req_json = json.loads(cluster_get_out)
157-
cluster_json_keys = cluster_req_json.keys()
158-
159-
for key in cluster_json_keys:
160-
if key not in cluster_req_elems:
161-
cluster_req_json.pop(key, None)
162-
163-
# Create the cluster, and store the mapping from old to new cluster ids
164-
cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
165-
cluster_create_out_json = json.loads(cluster_create_out)
166-
cluster_old_new_mappings[cluster] = cluster_create_out_json['cluster_id']
167-
168-
print "Sent cluster create request to new workspace successfully"
169-
170-
print "Cluster mappings: " + json.dumps(cluster_old_new_mappings)
171-
print "All done"
156+
cluster_old_new_mappings = {}
157+
i = 0
158+
for cluster in clusters_list:
159+
i += 1
160+
print("Trying to migrate cluster " + str(i) + "/" + str(len(clusters_list)) + " : " + cluster)
161+
cluster_get_out = check_output(["databricks", "clusters", "get", "--cluster-id", cluster, "--profile", EXPORT_PROFILE])
162+
print ("Got cluster config from old workspace")
163+
# Remove extra content from the config, as we need to build create request with allowed elements only
164+
cluster_req_json = json.loads(cluster_get_out)
165+
cluster_json_keys = cluster_req_json.keys()
166+
167+
for key in cluster_json_keys:
168+
if key not in cluster_req_elems:
169+
cluster_req_json.pop(key, None)
170+
171+
# Create the cluster, and store the mapping from old to new cluster ids
172+
173+
#Create a temp file to store the current cluster info as JSON
174+
strCurrentClusterFile = "tmp_cluster_info.json"
175+
176+
#delete the temp file if exists
177+
if os.path.exists(strCurrentClusterFile) :
178+
os.remove(strCurrentClusterFile)
179+
180+
fClusterJSONtmp = open(strCurrentClusterFile,"w+")
181+
fClusterJSONtmp.write(json.dumps(cluster_req_json))
182+
fClusterJSONtmp.close()
183+
184+
#cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
185+
cluster_create_out = check_output(["databricks", "clusters", "create", "--json-file", strCurrentClusterFile , "--profile", IMPORT_PROFILE])
186+
cluster_create_out_json = json.loads(cluster_create_out)
187+
cluster_old_new_mappings[cluster] = cluster_create_out_json['cluster_id']
188+
189+
print ("Sent cluster create request to secondary site workspace successfully")
190+
print ("---------------------------------------------------------")
191+
192+
#delete the temp file if exists
193+
if os.path.exists(strCurrentClusterFile) :
194+
os.remove(strCurrentClusterFile)
195+
196+
print ("Cluster mappings: " + json.dumps(cluster_old_new_mappings))
197+
print ("All done")
172198
```
173199

174200
6. **Migrate the jobs configuration**

0 commit comments

Comments
 (0)