forked from AppScale/gts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdjinn.rb
3505 lines (2793 loc) · 110 KB
/
djinn.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/ruby -w
# Imports within Ruby's standard libraries
require 'monitor'
require 'net/http'
require 'openssl'
require 'socket'
require 'soap/rpc/driver'
require 'syslog'
require 'yaml'
# Imports for RubyGems
require 'rubygems'
require 'httparty'
require 'json'
require 'right_aws'
require 'zookeeper'
# Imports for AppController libraries
$:.unshift File.join(File.dirname(__FILE__), "lib")
require 'helperfunctions'
require 'cron_helper'
require 'haproxy'
require 'collectd'
require 'nginx'
require 'pbserver'
require 'blobstore'
require 'rabbitmq'
require 'app_controller_client'
require 'user_app_client'
require 'ejabberd'
require 'repo'
require 'zkinterface'
require 'godinterface'
require 'infrastructure_manager_client'
require 'neptune_manager_client'
class AppScaleException < Exception
end
WANT_OUTPUT = true
NO_OUTPUT = false
# A list of App Engine apps that the AppController will start and control
# outside of the normal start_appengine method.
RESTRICTED_APPS = ["sisyphus"]
$:.unshift File.join(File.dirname(__FILE__), "..", "AppDB", "zkappscale")
require "zookeeper_helper"
# A HTTP client that assumes that responses returned are JSON, and automatically
# loads them, returning the result. Raises a NoMethodError if the host/URL is
# down or otherwise unreachable.
class JSONClient
include HTTParty
# Assume the response is JSON and load it accordingly.
parser(
Proc.new do |body, format|
JSON.load(body)
end
)
end
# The string that should be returned to the caller if they call a publicly
# exposed SOAP method but provide an incorrect secret.
BAD_SECRET_MSG = "false: bad secret"
# The location on the local file system where we store information about
# where ZooKeeper clients are located, used to backup and restore
# AppController information.
ZK_LOCATIONS_FILE = "/etc/appscale/zookeeper_locations.json"
# Djinn (interchangeably known as 'the AppController') automatically
# configures and deploys all services for a single node. It relies on other
# Djinns or the AppScale Tools to tell it what services (roles) it should
# be hosting, and exposes these methods via a SOAP interface (as is provided
# in DjinnServer).
class Djinn
# An Array of DjinnJobData objects, each of which containing information about
# a node in the currently running AppScale deployment.
attr_accessor :nodes
# A Hash containing all the parameters needed to configure any service
# on any node. At a minimum, this is all the information from the AppScale
# Tools, including information about database parameters and the roles
# for all nodes.
attr_accessor :creds
# An Array of Strings, each of which corresponding to the name of an App
# Engine app that should be loaded.
attr_accessor :app_names
# An Array of Strings, each of which corresponding to the name of an App
# Engine app that has been loaded on this node.
attr_accessor :apps_loaded
# A boolean that is used to let remote callers know when this AppController
# is done initializing itself, but not necessarily done starting or
# stopping roles.
attr_accessor :done_initializing
# A boolean that is used to let remote callers know when this AppController
# is done starting all the services it is responsible for.
attr_accessor :done_loading
# The port that nginx will listen to for the next App Engine app that is
# uploaded into the system.
attr_accessor :nginx_port
# The port that haproxy will listen to for the next App Engine app that is
# uploaded into the system.
attr_accessor :haproxy_port
# The public IP address (or FQDN) that the UserAppServer can be found at,
# initally set to a dummy value to tell callers not to use it until a real
# value is set.
attr_accessor :userappserver_public_ip
# The public IP address (or FQDN) that the UserAppServer can be found at,
# initally set to a dummy value to tell callers not to use it until a real
# value is set.
attr_accessor :userappserver_private_ip
# The human-readable state that this AppController is in.
attr_accessor :state
# A boolean that is used to let remote callers start the shutdown process
# on this AppController, which will cleanly shut down and terminate all
# services on this node.
attr_accessor :kill_sig_received
# An Integer that indexes into @nodes, to return information about this node.
attr_accessor :my_index
# The number of nodes that are running in this AppScale deployment.
# TODO(cgb): It would seem like we could always calculate this with
# @nodes.length, so replace it accordingly.
attr_accessor :total_boxes
# The number of dev_appservers that should run for every App Engine
# application.
attr_accessor :num_appengines
# A boolean that indicates if we are done restoring state from a previously
# running AppScale deployment.
attr_accessor :restored
# A Hash that maps information about each successfully completed Neptune
# job to information about the job, that will one day be used to provide
# hints to future jobs about how to schedule them optimally.
attr_accessor :neptune_jobs
# An Array of DjinnJobData objects that correspond to nodes used for
# Neptune computation. Nodes are reclaimed every hour if they are not in
# use (to avoid being charged for them for another hour).
attr_accessor :neptune_nodes
# A Hash that lists the status of each Google App Engine API made available
# within AppScale. Keys are the names of the APIs (e.g., memcache), and
# values are the statuses of those APIs (e.g., running).
attr_accessor :api_status
# For Babel jobs via Neptune, we keep a list of queues that may have tasks
# stored for execution, as well as the parameters needed to execute them
# (e.g., input location, output location, cloud credentials).
attr_accessor :queues_to_read
# Each component that writes log data to Sisyphus must register itself
# first, so this boolean ensures that we only register ourselves once.
attr_accessor :registered_with_sisyphus
# An integer timestamp that corresponds to the last time this AppController
# has updated @nodes, which we use to compare with a similar timestamp in
# ZooKeeper to see when data in @nodes has changed on other nodes.
attr_accessor :last_updated
# The port that the AppController runs on by default
SERVER_PORT = 17443
# The port that SSH connections are hosted over, by default.
SSH_PORT = 22
# A boolean that should be used when we are waiting for a specific port
# to open, and only if that port needs SSL to talk over it.
USE_SSL = true
# A boolean that indicates whether or not we should turn the firewall on,
# and continuously keep it on. Should definitely be on for releases, and
# on whenever possible.
FIREWALL_IS_ON = false
# The location on the local filesystem where the AppController writes
# information about Neptune jobs that have finished. One day this information
# may be used to more intelligently schedule jobs on the fly.
NEPTUNE_INFO = "/etc/appscale/neptune_info.txt"
# The location on the local filesystem where the AppController writes
# information about the status of App Engine APIs, which the AppLoadBalancer
# will read and display to users.
HEALTH_FILE = "/etc/appscale/health.json"
# The location on the local filesystem where the AppController periodically
# writes its state to, and recovers its state from if it crashes.
STATE_FILE = "/etc/appscale/appcontroller-state.json"
APPSCALE_HOME = ENV['APPSCALE_HOME']
# The message that we display to the user if they call a SOAP-accessible
# function with a malformed input (e.g., of the wrong class or format).
BAD_INPUT_MSG = JSON.dump({'success' => false, 'message' => 'bad input'})
# The message that we display to the user if they want to scale up services
# in an Xen/KVM deployment but don't have enough open nodes to do so.
NOT_ENOUGH_OPEN_NODES = JSON.dump({'success' => false,
'message' => 'not enough open nodes'})
# The options that should be used when invoking wget, so that the
# AppController can automatically probe a site to see if it's up.
WGET_OPTIONS = "--tries=1000 --no-check-certificate -q -O /dev/null"
# How often we should attempt to increase the number of AppServers on a
# given node.
SCALEUP_TIME_THRESHOLD = 60 # seconds
# How often we should attempt to decrease the number of AppServers on a
# given node.
SCALEDOWN_TIME_THRESHOLD = 300 # seconds
# The size of the rotating buffers that we use to keep information on
# the request rate and number of enqueued requests.
NUM_DATA_POINTS = 10
# The minimum number of AppServers (for all applications) that should be run
# on this node.
MIN_APPSERVERS_ON_THIS_NODE = 1
# The maximum number of AppServers (for all applications) that should be run
# on this node.
MAX_APPSERVERS_ON_THIS_NODE = 10
# The position in the haproxy profiling information where the name of
# the service (e.g., the frontend or backend) is specified.
SERVICE_NAME_INDEX = 1
# The position in the haproxy profiling information where the number of
# enqueued requests is specified.
REQ_IN_QUEUE_INDEX = 2
# The position in the haproxy profiling information where the request rate
# is specified.
REQ_RATE_INDEX = 46
# Scales up the number of AppServers used to host an application if the
# request rate rises above this value.
SCALEUP_REQUEST_RATE_THRESHOLD = 5
# Scales down the number of AppServers used to host an application if the
# request rate falls below this value.
SCALEDOWN_REQUEST_RATE_THRESHOLD = 2
# The minimum number of requests that have to sit in haproxy's wait queue for
# an App Engine application before we will scale up the number of AppServers
# that serve that application.
SCALEUP_QUEUE_SIZE_THRESHOLD = 5
# The path to the file where we will store information about AppServer
# scaling decisions.
AUTOSCALE_LOG_FILE = "/var/log/appscale/autoscale.log"
# CPU limits that determine when to stop adding AppServers on a node. Because
# AppServers in different languages consume different amounts of CPU, set
# different limits per language.
MAX_CPU_FOR_APPSERVERS = {'python' => 80.00, 'java' => 75.00, 'go' => 70.00}
# Memory limits that determine when to stop adding AppServers on a node.
# Because AppServers in different languages consume different amounts of
# memory, set different limits per language.
MAX_MEM_FOR_APPSERVERS = {'python' => 90.00, 'java' => 95.00, 'go' => 90.00}
# Creates a new Djinn, which holds all the information needed to configure
# and deploy all the services on this node.
def initialize()
# The password, or secret phrase, that is required for callers to access
# methods exposed via SOAP.
@@secret = HelperFunctions.get_secret()
# AppController logs (see self.log_debug) are printed to stdout for
# immediate reading, and are buffered for delayed sending to Sisyphus, for
# later viewing via web.
@@log_buffer = Queue.new
@nodes = []
@my_index = nil
@creds = {}
@app_names = []
@apps_loaded = []
@kill_sig_received = false
@done_initializing = false
@done_loading = false
@nginx_port = Nginx::START_PORT
@haproxy_port = HAProxy::START_PORT
@appengine_port = 20000
@userappserver_public_ip = "not-up-yet"
@userappserver_private_ip = "not-up-yet"
@state = "AppController just started"
@total_boxes = 0
@num_appengines = 1
@restored = false
@neptune_jobs = {}
@neptune_nodes = []
@api_status = {}
@queues_to_read = []
@registered_with_sisyphus = false
@last_updated = 0
@app_info_map = {}
@scaling_in_progress = false
@last_decision = {}
@initialized_apps = {}
@req_rate = {}
@req_in_queue = {}
end
# A SOAP-exposed method that callers can use to determine if this node
# has received information from another node and is starting up.
def is_done_initializing(secret)
if valid_secret?(secret)
return @done_initializing
else
return BAD_SECRET_MSG
end
end
# A SOAP-exposed method that callers use to determine if this node has
# finished starting all the roles it should run when it initially starts.
def is_done_loading(secret)
if valid_secret?(secret)
return @done_loading
else
return BAD_SECRET_MSG
end
end
# A SOAP-exposed method that callers can use to get information about what
# roles each node in the AppScale deployment are running.
def get_role_info(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
all_nodes = []
@nodes.each { |node|
all_nodes << node.to_hash()
}
return all_nodes
end
def kill(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
@kill_sig_received = true
if is_hybrid_cloud?
Thread.new {
Kernel.sleep(5)
HelperFunctions.terminate_hybrid_vms(creds)
}
elsif is_cloud?
Thread.new {
Kernel.sleep(5)
infrastructure = creds["infrastructure"]
keyname = creds["keyname"]
HelperFunctions.terminate_all_vms(infrastructure, keyname)
}
else
# in xen/kvm deployments we actually want to keep the boxes
# turned on since that was the state they started in
stop_ejabberd if my_node.is_login?
stop_sisyphus if my_node.is_appengine?
Repo.stop if my_node.is_shadow? or my_node.is_appengine?
jobs_to_run = my_node.jobs
commands = {
"load_balancer" => "stop_load_balancer",
"appengine" => "stop_appengine",
"db_master" => "stop_db_master",
"db_slave" => "stop_db_slave",
"zookeeper" => "stop_zookeeper"
}
my_node.jobs.each { |job|
if commands.include?(job)
Djinn.log_debug("About to run [#{commands[job]}]")
send(commands[job].to_sym)
else
Djinn.log_debug("Unable to find command for job #{job}. Skipping it.")
end
}
if has_soap_server?(my_node)
stop_soap_server
stop_pbserver
end
stop_neptune_manager
stop_infrastructure_manager
end
GodInterface.shutdown
FileUtils.rm_rf(STATE_FILE)
return "OK"
end
# Validates and sets the instance variables that Djinn needs before it can
# begin configuring and deploying services on a given node (and if it is the
# first Djinn, starting up the other Djinns).
def set_parameters(djinn_locations, database_credentials, app_names, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
Djinn.log_debug("Djinn locations class: #{djinn_locations.class}")
Djinn.log_debug("DB Credentials class: #{database_credentials.class}")
Djinn.log_debug("Apps to load class: #{app_names.class}")
if djinn_locations.class != Array
msg = "Error: djinn_locations wasn't an Array, but was a " +
djinn_locations.class.to_s
Djinn.log_debug(msg)
return msg
end
if database_credentials.class != Array
msg = "Error: database_credentials wasn't an Array, but was a " +
database_credentials.class.to_s
Djinn.log_debug(msg)
return msg
end
if app_names.class != Array
msg = "Error: app_names wasn't an Array, but was a " +
app_names.class.to_s
Djinn.log_debug(msg)
return msg
end
# credentials is an array that we're converting to
# hash tables, so we need to make sure that every key maps to a value
# e.g., ['foo', 'bar'] becomes {'foo' => 'bar'}
# so we need to make sure that the array has an even number of elements
if database_credentials.length % 2 != 0
error_msg = "Error: DB Credentials wasn't of even length: Len = " + \
"#{database_credentials.length}"
Djinn.log_debug(error_msg)
return error_msg
end
possible_credentials = Hash[*database_credentials]
if !valid_format_for_credentials(possible_credentials)
return "Error: Credential format wrong"
end
Djinn.log_debug("Parameters were valid")
keyname = possible_credentials["keyname"]
@nodes = Djinn.convert_location_array_to_class(djinn_locations, keyname)
@creds = possible_credentials
@app_names = app_names
convert_fqdns_to_ips
@creds = sanitize_credentials
Djinn.log_debug("(set_parameters) locations: #{@nodes.join(', ')}")
Djinn.log_debug("(set_parameters) DB Credentials: #{HelperFunctions.obscure_creds(@creds).inspect}")
Djinn.log_debug("Apps to load: #{@app_names.join(', ')}")
find_me_in_locations
if @my_index.nil?
return "Error: Couldn't find me in the node map"
end
Djinn.log_debug("(set_parameters) My index = #{@my_index}")
ENV['EC2_URL'] = @creds['ec2_url']
return "OK"
end
# Validates and sets the list of applications that should be loaded on this
# node.
def set_apps(app_names, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
if app_names.class != Array
return "app names was not an Array but was a #{app_names.class}"
end
@app_names = app_names
return "App names is now #{@app_names.join(', ')}"
end
def status(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
stats = get_stats(secret)
stats_str = <<-STATUS
Currently using #{stats['cpu']} Percent CPU and #{stats['memory']} Percent Memory
Hard disk is #{stats['disk']} Percent full
Is currently: #{stats['roles'].join(', ')}
Database is at #{stats['db_location']}
Is in cloud: #{stats['cloud']}
Current State: #{stats['state']}
STATUS
if my_node.is_appengine?
app_names = []
stats['apps'].each { |k, v|
app_names << k
}
stats_str << " Hosting the following apps: #{app_names.join(', ')}\n"
stats['apps'].each { |app_name, is_loaded|
next if !is_loaded
if !@app_info_map[app_name][:appengine].nil?
stats_str << " The number of AppServers for app #{app_name} is: " +
"#{@app_info_map[app_name][:appengine].length}\n"
end
}
end
return stats_str
end
def get_stats(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
usage = HelperFunctions.get_usage
mem = sprintf("%3.2f", usage['mem'])
jobs = my_node.jobs or ["none"]
# don't use an actual % below, or it will cause a string format exception
stats = {
'ip' => my_node.public_ip,
'cpu' => usage['cpu'],
'memory' => mem,
'disk' => usage['disk'],
'roles' => jobs,
'db_location' => @userappserver_public_ip,
'cloud' => my_node.cloud,
'state' => @state
}
stats['apps'] = {}
@app_names.each { |name|
stats['apps'][name] = @apps_loaded.include?(name)
}
return stats
end
def stop_app(app_name, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
app_name.gsub!(/[^\w\d\-]/, "")
Djinn.log_debug("(stop_app): Shutting down app named [#{app_name}]")
result = ""
Djinn.log_run("rm -rf /var/apps/#{app_name}")
# app shutdown process can take more than 30 seconds
# so run it in a new thread to avoid 'execution expired'
# error messages and have the tools poll it
Thread.new {
# Tell other nodes to shutdown this application
if @app_names.include?(app_name) and !my_node.is_appengine?
@nodes.each { |node|
next if node.private_ip == my_node.private_ip
if node.is_appengine? or node.is_login?
ip = node.private_ip
acc = AppControllerClient.new(ip, @@secret)
result = acc.stop_app(app_name)
Djinn.log_debug("(stop_app): Removing application #{app_name} --- #{ip} returned #{result} (#{result.class})")
end
}
end
# Contact the soap server and remove the application
if (@app_names.include?(app_name) and !my_node.is_appengine?) or @nodes.length == 1
ip = HelperFunctions.read_file("#{APPSCALE_HOME}/.appscale/masters")
uac = UserAppClient.new(ip, @@secret)
result = uac.delete_app(app_name)
Djinn.log_debug("(stop_app) Delete app: #{ip} returned #{result} (#{result.class})")
end
# may need to stop XMPP listener
if my_node.is_login?
pid_files = `ls #{APPSCALE_HOME}/.appscale/xmpp-#{app_name}.pid`.split
unless pid_files.nil? # not an error here - XMPP is optional
pid_files.each { |pid_file|
pid = HelperFunctions.read_file(pid_file)
Djinn.log_run("kill -9 #{pid}")
}
result = "true"
end
end
if my_node.is_appengine?
GodInterface.stop(app_name)
GodInterface.remove(app_name)
Nginx.remove_app(app_name)
Collectd.remove_app(app_name)
HAProxy.remove_app(app_name)
Nginx.reload
Collectd.restart
ZKInterface.remove_app_entry(app_name)
# If this node has any information about AppServers for this app,
# clear that information out.
if !@app_info_map[app_name].nil?
@app_info_map.delete(app_name)
end
# TODO God does not shut down the application, so do it here for
# A temp fix.
Djinn.log_run("ps -ef | grep dev_appserver | grep #{app_name} | grep -v grep | grep cookie_secret | awk '{print $2}' | xargs kill -9")
result = "true"
end
@apps_loaded = @apps_loaded - [app_name]
@app_names = @app_names - [app_name]
if @apps_loaded.empty?
@apps_loaded << "none"
end
if @app_names.empty?
@app_names << "none"
end
}
return "true"
end
def update(app_names, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
apps = @app_names - app_names + app_names
@nodes.each_index { |index|
ip = @nodes[index].private_ip
acc = AppControllerClient.new(ip, @@secret)
result = acc.set_apps(apps)
Djinn.log_debug("Update #{ip} returned #{result} (#{result.class})")
@everyone_else_is_done = false if !result
}
# now that another app is running we can take out 'none' from the list
# if it was there (e.g., run-instances with no app given)
@app_names = @app_names - ["none"]
return "OK"
end
def get_all_public_ips(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
public_ips = []
@nodes.each { |node|
public_ips << node.public_ip
}
Djinn.log_debug("All public ips are [#{public_ips.join(', ')}]")
return public_ips
end
def job_start(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
start_infrastructure_manager
if restore_appcontroller_state
parse_creds
else
wait_for_data
parse_creds
change_job
end
start_neptune_manager
@done_loading = true
write_our_node_info
wait_for_nodes_to_finish_loading(@nodes)
while !@kill_sig_received do
@state = "Done starting up AppScale, now in heartbeat mode"
write_database_info
write_zookeeper_locations
write_neptune_info
update_api_status
send_logs_to_sisyphus
update_local_nodes
if my_node.is_shadow?
Djinn.log_debug("my node is #{my_node}")
# Since we now backup state to ZK, don't make everyone do it.
# The Shadow has the most up-to-date info, so let it handle this
backup_appcontroller_state
end
# Login nodes host the AppLoadBalancer app, which has links to each
# of the apps running in AppScale. Update the files it reads to
# reflect the most up-to-date info.
if my_node.is_login?
@nodes.each { |node|
get_status(node)
}
end
ensure_all_roles_are_running
# TODO: consider only calling this if new apps are found
start_appengine
scale_appservers
Kernel.sleep(20)
end
end
# Starts the InfrastructureManager service on this machine, which exposes
# a SOAP interface by which we can dynamically add and remove nodes in this
# AppScale deployment.
def start_infrastructure_manager
if HelperFunctions.is_port_open?("localhost",
InfrastructureManagerClient::SERVER_PORT, HelperFunctions::USE_SSL)
Djinn.log_debug("InfrastructureManager is already running locally - " +
"don't start it again.")
return
end
start_cmd = "ruby #{APPSCALE_HOME}/InfrastructureManager/infrastructure_manager_server.rb"
stop_cmd = "pkill -9 infrastructure_manager_server"
port = [InfrastructureManagerClient::SERVER_PORT]
GodInterface.start(:iaas_manager, start_cmd, stop_cmd, port)
Djinn.log_debug("Started InfrastructureManager successfully!")
end
def stop_infrastructure_manager
Djinn.log_debug("Stopping InfrastructureManager")
GodInterface.stop(:iaas_manager)
end
# Starts the NeptuneManager service on this machine, which exposes
# a SOAP interface by which we can run programs in arbitrary languages
# in this AppScale deployment.
def start_neptune_manager
write_cloud_info()
if HelperFunctions.is_port_open?("localhost",
NeptuneManagerClient::SERVER_PORT, HelperFunctions::USE_SSL)
Djinn.log_debug("NeptuneManager is already running locally - " +
"don't start it again.")
return
end
start_cmd = "ruby #{APPSCALE_HOME}/Neptune/neptune_manager_server.rb"
stop_cmd = "pkill -9 neptune_manager_server"
port = [NeptuneManagerClient::SERVER_PORT]
env_vars = {
'APPSCALE_HOME' => APPSCALE_HOME,
'DATABASE_USED' => @creds['table']
}
GodInterface.start(:neptune_manager, start_cmd, stop_cmd, port, env_vars)
Djinn.log_debug("Started NeptuneManager successfully!")
end
def write_cloud_info()
cloud_info = {
'is_cloud?' => is_cloud?(),
'is_hybrid_cloud?' => is_hybrid_cloud?()
}
HelperFunctions.write_json_file("/etc/appscale/cloud_info.json", cloud_info)
end
def stop_neptune_manager
Djinn.log_debug("Stopping NeptuneManager")
GodInterface.stop(:neptune_manager)
end
def get_online_users_list(secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
online_users = []
login_node = get_login
ip = login_node.public_ip
key = login_node.ssh_key
raw_list = `ssh -i #{key} -o StrictHostkeyChecking=no root@#{ip} 'ejabberdctl connected-users'`
raw_list.split("\n").each { |userdata|
online_users << userdata.split("/")[0]
}
return online_users
end
def done_uploading(appname, location, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
if File.exists?(location)
ZKInterface.add_app_entry(appname, my_node.serialize, location)
result = "success"
else
result = "The #{appname} app was not found at #{location}."
end
Djinn.log_debug(result)
return result
end
def is_app_running(appname, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
hosters = ZKInterface.get_app_hosters(appname)
hosters_w_appengine = []
hosters.each { |node|
hosters_w_appengine << node if node.is_appengine?
}
app_running = !hosters_w_appengine.empty?
Djinn.log_debug("Is app #{appname} running? #{app_running}")
return app_running
end
def add_role(new_role, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
# new roles may run indefinitely in the background, so don't block
# on them - just fire and forget
Thread.new {
start_roles = new_role.split(":")
start_roles.each { |role|
# only start roles that we aren't already running
# e.g., don't start_appengine if we already are, as this
# will create two threads loading apps
if my_node.jobs.include?(role)
Djinn.log_debug("Already running role #{role}, not invoking again")
else
Djinn.log_debug("Adding and starting role #{role}")
my_node.add_roles(role)
send("start_#{role}".to_sym)
end
}
}
return "OK"
end
def remove_role(old_role, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
my_node.remove_roles(old_role)
stop_roles = old_role.split(":")
stop_roles.each { |role|
Djinn.log_debug("Removing and stopping role #{role}")
send("stop_#{role}".to_sym)
}
return "OK"
end
# Starts the given roles by using open nodes, spawning new nodes, or some
# combination of the two. 'nodes_needed' should be an Array, where each
# item is an Array of the roles to start on each node.
def start_new_roles_on_nodes(nodes_needed, instance_type, secret)
if !valid_secret?(secret)
return BAD_SECRET_MSG
end
if nodes_needed.class != Array
Djinn.log_debug("Was expecting nodes_needed to be an Array, not " +
"a #{nodes_needed.class}")
return BAD_INPUT_MSG
end
Djinn.log_debug("Received a request to acquire nodes with roles " +
"#{nodes_needed.join(', ')}, with instance type #{instance_type} for " +
"new nodes")
vms_to_use = []
ZKInterface.lock_and_run {
num_of_vms_needed = nodes_needed.length