@@ -130,45 +130,71 @@ To create your own regional disaster recovery topology, follow these requirement
130
130
Copy and save the following python script to a file, and run it in your Databricks command line. For example, ` python scriptname.py ` .
131
131
132
132
``` python
133
- from subprocess import call, check_output import json
134
-
133
+ from subprocess import call, check_output
134
+ import json, os
135
+
135
136
EXPORT_PROFILE = " primary"
136
137
IMPORT_PROFILE = " secondary"
137
-
138
- # Get all clusters info from old workspace
139
- clusters_out = check_output([" databricks" , " clusters" , " list" , " --profile" , EXPORT_PROFILE ]) clusters_info_list = clusters_out.splitlines()
140
-
141
- # Create a list of all cluster ids
142
- clusters_list = [] for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None , 1 )[0 ])
143
-
138
+
139
+ # Get all clusters info from old workspace
140
+ clusters_out = check_output([" databricks" , " clusters" , " list" , " --profile" , EXPORT_PROFILE ])
141
+ clusters_info_list = clusters_out.splitlines()
142
+
143
+ # Create a list of all cluster ids
144
+ clusters_list = []
145
+ for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None , 1 )[0 ])
146
+
144
147
# Optionally filter cluster ids out manually, so as to create only required ones in new workspace
148
+
149
+ # Create a list of mandatory / optional create request elements
150
+ cluster_req_elems = [" num_workers" ," autoscale" ," cluster_name" ," spark_version" ," spark_conf" ," node_type_id" ," driver_node_type_id" ," custom_tags" ," cluster_log_conf" ," spark_env_vars" ," autotermination_minutes" ," enable_elastic_disk" ]
145
151
146
- # Create a list of mandatory / optional create request elements
147
- cluster_req_elems = [ " num_workers " , " autoscale " , " cluster_name " , " spark_version " , " spark_conf " , " node_type_id " , " driver_node_type_id " , " custom_tags " , " cluster_log_conf " , " sp ark_env_vars " , " autotermination_minutes " , " enable_elastic_disk " ]
148
-
152
+ print ( str ( len (clusters_list)) + " clusters found in the primary site " )
153
+ print ( " --------------------------------------------------------- " )
154
+
149
155
# Try creating all / selected clusters in new workspace with same config as in old one.
150
- cluster_old_new_mappings = {} for cluster in clusters_list: print " Trying to migrate cluster " + cluster
151
-
152
- cluster_get_out = check_output([" databricks" , " clusters" , " get" , " --cluster-id" , cluster, " --profile" , EXPORT_PROFILE ])
153
- print " Got cluster config from old workspace"
154
-
155
- # Remove extra content from the config, as we need to build create request with allowed elements only
156
- cluster_req_json = json.loads(cluster_get_out)
157
- cluster_json_keys = cluster_req_json.keys()
158
-
159
- for key in cluster_json_keys:
160
- if key not in cluster_req_elems:
161
- cluster_req_json.pop(key, None )
162
-
163
- # Create the cluster, and store the mapping from old to new cluster ids
164
- cluster_create_out = check_output([" databricks" , " clusters" , " create" , " --json" , json.dumps(cluster_req_json), " --profile" , IMPORT_PROFILE ])
165
- cluster_create_out_json = json.loads(cluster_create_out)
166
- cluster_old_new_mappings[cluster] = cluster_create_out_json[' cluster_id' ]
167
-
168
- print " Sent cluster create request to new workspace successfully"
169
-
170
- print " Cluster mappings: " + json.dumps(cluster_old_new_mappings)
171
- print " All done"
156
+ cluster_old_new_mappings = {}
157
+ i = 0
158
+ for cluster in clusters_list:
159
+ i += 1
160
+ print (" Trying to migrate cluster " + str (i) + " /" + str (len (clusters_list)) + " : " + cluster)
161
+ cluster_get_out = check_output([" databricks" , " clusters" , " get" , " --cluster-id" , cluster, " --profile" , EXPORT_PROFILE ])
162
+ print (" Got cluster config from old workspace" )
163
+ # Remove extra content from the config, as we need to build create request with allowed elements only
164
+ cluster_req_json = json.loads(cluster_get_out)
165
+ cluster_json_keys = cluster_req_json.keys()
166
+
167
+ for key in cluster_json_keys:
168
+ if key not in cluster_req_elems:
169
+ cluster_req_json.pop(key, None )
170
+
171
+ # Create the cluster, and store the mapping from old to new cluster ids
172
+
173
+ # Create a temp file to store the current cluster info as JSON
174
+ strCurrentClusterFile = " tmp_cluster_info.json"
175
+
176
+ # delete the temp file if exists
177
+ if os.path.exists(strCurrentClusterFile) :
178
+ os.remove(strCurrentClusterFile)
179
+
180
+ fClusterJSONtmp = open (strCurrentClusterFile," w+" )
181
+ fClusterJSONtmp.write(json.dumps(cluster_req_json))
182
+ fClusterJSONtmp.close()
183
+
184
+ # cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
185
+ cluster_create_out = check_output([" databricks" , " clusters" , " create" , " --json-file" , strCurrentClusterFile , " --profile" , IMPORT_PROFILE ])
186
+ cluster_create_out_json = json.loads(cluster_create_out)
187
+ cluster_old_new_mappings[cluster] = cluster_create_out_json[' cluster_id' ]
188
+
189
+ print (" Sent cluster create request to secondary site workspace successfully" )
190
+ print (" ---------------------------------------------------------" )
191
+
192
+ # delete the temp file if exists
193
+ if os.path.exists(strCurrentClusterFile) :
194
+ os.remove(strCurrentClusterFile)
195
+
196
+ print (" Cluster mappings: " + json.dumps(cluster_old_new_mappings))
197
+ print (" All done" )
172
198
```
173
199
174
200
6 . ** Migrate the jobs configuration**
0 commit comments