@@ -132,69 +132,82 @@ To create your own regional disaster recovery topology, follow these requirement
132
132
``` python
133
133
from subprocess import call, check_output
134
134
import json, os
135
-
135
+
136
136
EXPORT_PROFILE = " primary"
137
137
IMPORT_PROFILE = " secondary"
138
-
138
+
139
139
# Get all clusters info from old workspace
140
140
clusters_out = check_output([" databricks" , " clusters" , " list" , " --profile" , EXPORT_PROFILE ])
141
141
clusters_info_list = clusters_out.splitlines()
142
-
142
+
143
143
# Create a list of all cluster ids
144
144
clusters_list = []
145
- for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None , 1 )[0 ])
146
-
145
+ # #for cluster_info in clusters_info_list: clusters_list.append(cluster_info.split(None, 1)[0])
146
+
147
+ for cluster_info in clusters_info_list:
148
+ if cluster_info != ' ' :
149
+ clusters_list.append(cluster_info.split(None , 1 )[0 ])
150
+
147
151
# Optionally filter cluster ids out manually, so as to create only required ones in new workspace
148
-
152
+
149
153
# Create a list of mandatory / optional create request elements
150
154
cluster_req_elems = [" num_workers" ," autoscale" ," cluster_name" ," spark_version" ," spark_conf" ," node_type_id" ," driver_node_type_id" ," custom_tags" ," cluster_log_conf" ," spark_env_vars" ," autotermination_minutes" ," enable_elastic_disk" ]
151
155
152
156
print (str (len (clusters_list)) + " clusters found in the primary site" )
157
+
153
158
print (" ---------------------------------------------------------" )
154
-
155
159
# Try creating all / selected clusters in new workspace with same config as in old one.
156
160
cluster_old_new_mappings = {}
157
161
i = 0
158
162
for cluster in clusters_list:
159
163
i += 1
160
- print (" Trying to migrate cluster " + str (i) + " /" + str (len (clusters_list)) + " : " + cluster)
164
+ print (" Checking cluster " + str (i) + " /" + str (len (clusters_list)) + " : " + cluster)
161
165
cluster_get_out = check_output([" databricks" , " clusters" , " get" , " --cluster-id" , cluster, " --profile" , EXPORT_PROFILE ])
162
166
print (" Got cluster config from old workspace" )
163
- # Remove extra content from the config, as we need to build create request with allowed elements only
167
+
168
+ # Remove extra content from the config, as we need to build create request with allowed elements only
164
169
cluster_req_json = json.loads(cluster_get_out)
165
170
cluster_json_keys = cluster_req_json.keys()
166
-
171
+
172
+ # Don't migrate Job clusters
173
+ if cluster_req_json[' cluster_source' ] == u ' JOB' :
174
+ print (" Skipping this cluster as it is a Job cluster : " + cluster_req_json[' cluster_id' ] )
175
+ print (" ---------------------------------------------------------" )
176
+ continue
177
+
167
178
for key in cluster_json_keys:
168
179
if key not in cluster_req_elems:
169
180
cluster_req_json.pop(key, None )
170
-
181
+
171
182
# Create the cluster, and store the mapping from old to new cluster ids
172
-
183
+
173
184
# Create a temp file to store the current cluster info as JSON
174
185
strCurrentClusterFile = " tmp_cluster_info.json"
175
-
186
+
176
187
# delete the temp file if exists
177
188
if os.path.exists(strCurrentClusterFile) :
178
189
os.remove(strCurrentClusterFile)
179
-
190
+
180
191
fClusterJSONtmp = open (strCurrentClusterFile," w+" )
181
192
fClusterJSONtmp.write(json.dumps(cluster_req_json))
182
193
fClusterJSONtmp.close()
183
-
194
+
184
195
# cluster_create_out = check_output(["databricks", "clusters", "create", "--json", json.dumps(cluster_req_json), "--profile", IMPORT_PROFILE])
185
196
cluster_create_out = check_output([" databricks" , " clusters" , " create" , " --json-file" , strCurrentClusterFile , " --profile" , IMPORT_PROFILE ])
186
197
cluster_create_out_json = json.loads(cluster_create_out)
187
198
cluster_old_new_mappings[cluster] = cluster_create_out_json[' cluster_id' ]
188
-
189
- print (" Sent cluster create request to secondary site workspace successfully" )
199
+
200
+ print (" Cluster create request sent to secondary site workspace successfully" )
190
201
print (" ---------------------------------------------------------" )
191
-
202
+
192
203
# delete the temp file if exists
193
204
if os.path.exists(strCurrentClusterFile) :
194
205
os.remove(strCurrentClusterFile)
195
-
206
+
196
207
print (" Cluster mappings: " + json.dumps(cluster_old_new_mappings))
197
208
print (" All done" )
209
+ print (" P.S. : Please note that all the new clusters in your secondary site are being started now!" )
210
+ print (" If you won't use those new clusters at the moment, please don't forget terminating your new clusters to avoid charges" )
198
211
```
199
212
200
213
6 . ** Migrate the jobs configuration**
0 commit comments