@@ -130,45 +130,84 @@ To create your own regional disaster recovery topology, follow these requirement
130
130
Copy and save the following python script to a file, and run it in your Databricks command line. For example, ` python scriptname.py ` .
131
131
132
132
``` python
133
- from subprocess import call, check_output import json
133
+ from subprocess import call, check_output
134
+ import json, os
134
135
135
136
EXPORT_PROFILE = " primary"
136
137
IMPORT_PROFILE = " secondary"
137
138
138
- # Get all clusters info from old workspace
139
- clusters_out = check_output([" databricks" , " clusters" , " list" , " --profile" , EXPORT_PROFILE ]) clusters_info_list = clusters_out.splitlines()
139
+ # Get all clusters info from old workspace
140
+ clusters_out = check_output([" databricks" , " clusters" , " list" , " --profile" , EXPORT_PROFILE ])
141
+ clusters_info_list = clusters_out.splitlines()
142
+
143
+ # Create a list of all cluster ids
144
+ clusters_list = []
145
+ # #for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
140
146
141
- # Create a list of all cluster ids
142
- clusters_list = [] for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None , 1 )[0 ])
147
+ for cluster_info in clusters_info_list:
148
+ if cluster_info != ' ' :
149
+ clusters_list.append(cluster_info.split(None , 1 )[0 ])
143
150
144
151
# Optionally filter cluster ids out manually, so as to create only required ones in new workspace
145
152
146
- # Create a list of mandatory / optional create request elements
147
- cluster_req_elems = [" num_workers" ," autoscale" ," cluster_name" ," spark_version" ," spark_conf" ," node_type_id" ," driver_node_type_id" ," custom_tags" ," cluster_log_conf" ," sp ark_env_vars" ," autotermination_minutes" ," enable_elastic_disk" ]
153
+ # Create a list of mandatory / optional create request elements
154
+ cluster_req_elems = [" num_workers" ," autoscale" ," cluster_name" ," spark_version" ," spark_conf" ," node_type_id" ," driver_node_type_id" ," custom_tags" ," cluster_log_conf" ," spark_env_vars" ," autotermination_minutes" ," enable_elastic_disk" ]
155
+
156
+ print (str (len (clusters_list)) + " clusters found in the primary site" )
148
157
158
+ print (" ---------------------------------------------------------" )
149
159
# Try creating all / selected clusters in new workspace with same config as in old one.
150
- cluster_old_new_mappings = {} for cluster in clusters_list: print " Trying to migrate cluster " + cluster
160
+ cluster_old_new_mappings = {}
161
+ i = 0
162
+ for cluster in clusters_list:
163
+ i += 1
164
+ print (" Checking cluster " + str (i) + " /" + str (len (clusters_list)) + " : " + cluster)
165
+ cluster_get_out = check_output([" databricks" , " clusters" , " get" , " --cluster-id" , cluster, " --profile" , EXPORT_PROFILE ])
166
+ print (" Got cluster config from old workspace" )
167
+
168
+ # Remove extra content from the config, as we need to build create request with allowed elements only
169
+ cluster_req_json = json.loads(cluster_get_out)
170
+ cluster_json_keys = cluster_req_json.keys()
171
+
172
+ # Don't migrate Job clusters
173
+ if cluster_req_json[' cluster_source' ] == u ' JOB' :
174
+ print (" Skipping this cluster as it is a Job cluster : " + cluster_req_json[' cluster_id' ] )
175
+ print (" ---------------------------------------------------------" )
176
+ continue
151
177
152
- cluster_get_out = check_output([" databricks" , " clusters" , " get" , " --cluster-id" , cluster, " --profile" , EXPORT_PROFILE ])
153
- print " Got cluster config from old workspace"
178
+ for key in cluster_json_keys:
179
+ if key not in cluster_req_elems:
180
+ cluster_req_json.pop(key, None )
154
181
155
- # Remove extra content from the config, as we need to build create request with allowed elements only
156
- cluster_req_json = json.loads(cluster_get_out)
157
- cluster_json_keys = cluster_req_json.keys()
182
+ # Create the cluster, and store the mapping from old to new cluster ids
158
183
159
- for key in cluster_json_keys:
160
- if key not in cluster_req_elems:
161
- cluster_req_json.pop(key, None )
162
-
163
- # Create the cluster, and store the mapping from old to new cluster ids
164
- cluster_create_out = check_output([" databricks" , " clusters" , " create" , " --json" , json.dumps(cluster_req_json), " --profile" , IMPORT_PROFILE ])
165
- cluster_create_out_json = json.loads(cluster_create_out)
166
- cluster_old_new_mappings[cluster] = cluster_create_out_json[' cluster_id' ]
184
+ # Create a temp file to store the current cluster info as JSON
185
+ strCurrentClusterFile = " tmp_cluster_info.json"
167
186
168
- print " Sent cluster create request to new workspace successfully"
187
+ # delete the temp file if exists
188
+ if os.path.exists(strCurrentClusterFile) :
189
+ os.remove(strCurrentClusterFile)
169
190
170
- print " Cluster mappings: " + json.dumps(cluster_old_new_mappings)
171
- print " All done"
191
+ fClusterJSONtmp = open (strCurrentClusterFile," w+" )
192
+ fClusterJSONtmp.write(json.dumps(cluster_req_json))
193
+ fClusterJSONtmp.close()
194
+
195
+ # cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
196
+ cluster_create_out = check_output([" databricks" , " clusters" , " create" , " --json-file" , strCurrentClusterFile , " --profile" , IMPORT_PROFILE ])
197
+ cluster_create_out_json = json.loads(cluster_create_out)
198
+ cluster_old_new_mappings[cluster] = cluster_create_out_json[' cluster_id' ]
199
+
200
+ print (" Cluster create request sent to secondary site workspace successfully" )
201
+ print (" ---------------------------------------------------------" )
202
+
203
+ # delete the temp file if exists
204
+ if os.path.exists(strCurrentClusterFile) :
205
+ os.remove(strCurrentClusterFile)
206
+
207
+ print (" Cluster mappings: " + json.dumps(cluster_old_new_mappings))
208
+ print (" All done" )
209
+ print (" P.S. : Please note that all the new clusters in your secondary site are being started now!" )
210
+ print (" If you won't use those new clusters at the moment, please don't forget terminating your new clusters to avoid charges" )
172
211
```
173
212
174
213
6 . ** Migrate the jobs configuration**
0 commit comments