diff --git a/CCconfig.toml b/CCconfig.toml index ea8614b..09527e2 100644 --- a/CCconfig.toml +++ b/CCconfig.toml @@ -1,3 +1,4 @@ +# CloudClusters.jl's default configuration [defaults] @@ -17,9 +18,10 @@ mpiflags = "" [ec2] -imageid = "ami-0ce455c31c5fec661" -#placement_group = "automatic" -security_group_id = "sg-039de945285a31f89" -subnet_id = "subnet-3fca5558" +imageid = "ami-0ce455c31c5fec661" # found at us-east-1 (North Virginia). To use in other regions, copy it. + +# placement_group = "pg-XXXXXXXXXXXX" or "automatic" +# security_group_id = "sg-XXXXXXXXXXXX" or "automatic" +# subnet_id = "subnet-XXXXXXXXXXXX" [gcp] \ No newline at end of file diff --git a/Project.toml b/Project.toml index e2ca431..2bc444b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,13 +1,14 @@ name = "CloudClusters" uuid = "4ca6f12b-c8f1-4945-b50f-6bb73234c039" authors = ["Francisco Heron de Carvalho Junior e João Marcelo Uchôa de Alencar "] -version = "0.1.0" +version = "0.1.1" [deps] AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" MPIClusterManagers = "e7922434-ae4b-11e9-05c5-9780451d2c66" @@ -22,18 +23,19 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" [compat] -AWS = "1.92.0" +AWS = "1" Base64 = "1.10.4" Distributed = "1.10.4" FilePathsBase = "0.9.21" -JSON = "0.21.4" +Downloads = "1.6.0" +JSON = "0.21" MPIClusterManagers = "0.2.4" PlatformAware = "0.6.1" Random = "1.10.4" -Reexport = "1.2.2" +Reexport = "1" Serialization = "1.10.4" Sockets = "1.10.4" TOML = "1.0.3" Test = "1.11.0" -YAML = "0.4.12" +YAML = "0.4" julia = "1" diff --git a/README.md b/README.md index b8ad829..8580c4e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ ![CloudClusters.jl](https://raw.githubusercontent.com/PlatformAwareProgramming/CloudClusters.jl/refs/heads/main/docs/src/assets/logo-text.svg) +[![TagBot](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/TagBot.yml/badge.svg)](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/TagBot.yml) +[![CompatHelper](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/CompatHelper.yml/badge.svg)](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/CompatHelper.yml) + _A package for creating, using, and managing clusters of virtual machine (VM) instances deployed with IaaS cloud providers._ > [!NOTE] @@ -26,15 +29,16 @@ Creating clusters with _CloudClusters.jl_ requires specifying some configuration * a path pointed by the CLOUD_CLUSTERS_CONFIG environment variable, if it exists; * the current path. -Section [Configuration parameters](https://github.com/PlatformAwareProgramming/CloudClusters.jl#configuration-parameters) describes configuration parameters and how they can be overridden in programs. - -Default configuration parameters can be overridden in programs. +Section [Configuration parameters](https://github.com/PlatformAwareProgramming/CloudClusters.jl#configuration-parameters) describes default configuration parameters and how they can be overridden in programs. A [_CCconfig.toml_](https://raw.githubusercontent.com/PlatformAwareProgramming/CloudClusters.jl/refs/heads/main/CCconfig.toml) file is provided in the repository's top-level directory. It is configured to create clusters using prebuilt virtual machine images for each supported cloud provider. These images are based on the latest version of Ubuntu and include a Julia installation of a recent stable version with all the packages needed to instantiate the clusters added and precompiled. Users can create customized images, possibly derived from the provided image, using their preferred version of Julia and adding the packages they need. -> [!NOTE] +> [!WARNING] > The version of Julia on the host computer using _CloudClusters.jl_ must be the same version as the image used to deploy the clusters. +> [!NOTE] +> The current prebuilt image for EC2 is located at the _us-east-1_ (North Virginia) region. Suppose the user is going to deploy a cluster in another region. In that case, they must create a copy of the image for that region in their account and assign their id to the ```imageid``` parameter of _CCConfig.toml_. + ### The _PlatformAware.jl_ package _CloudClusters.jl_ relies on an experimental package called [_PlatformAware.jl_](https://github.com/PlatformAwareProgramming/PlatformAware.jl) for the specification of _platform types_, aimed at specifying assumptions about architectural features of virtual machines instances. Indeed, _PlatformAware.jl_ may be used with _CloudClusters.jl_ to write functions specifically tuned according to the features of VM instances that comprise the clusters. This is called _platform-aware programming_. The users of _CloudClusters.jl_, particularly package developers, are invited to explore and use the ideas behind _PlatformAware.jl_. diff --git a/docs/src/index.md b/docs/src/index.md index b8ad829..8580c4e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,5 +1,8 @@ ![CloudClusters.jl](https://raw.githubusercontent.com/PlatformAwareProgramming/CloudClusters.jl/refs/heads/main/docs/src/assets/logo-text.svg) +[![TagBot](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/TagBot.yml/badge.svg)](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/TagBot.yml) +[![CompatHelper](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/CompatHelper.yml/badge.svg)](https://github.com/PlatformAwareProgramming/CloudClusters.jl/actions/workflows/CompatHelper.yml) + _A package for creating, using, and managing clusters of virtual machine (VM) instances deployed with IaaS cloud providers._ > [!NOTE] @@ -26,15 +29,16 @@ Creating clusters with _CloudClusters.jl_ requires specifying some configuration * a path pointed by the CLOUD_CLUSTERS_CONFIG environment variable, if it exists; * the current path. -Section [Configuration parameters](https://github.com/PlatformAwareProgramming/CloudClusters.jl#configuration-parameters) describes configuration parameters and how they can be overridden in programs. - -Default configuration parameters can be overridden in programs. +Section [Configuration parameters](https://github.com/PlatformAwareProgramming/CloudClusters.jl#configuration-parameters) describes default configuration parameters and how they can be overridden in programs. A [_CCconfig.toml_](https://raw.githubusercontent.com/PlatformAwareProgramming/CloudClusters.jl/refs/heads/main/CCconfig.toml) file is provided in the repository's top-level directory. It is configured to create clusters using prebuilt virtual machine images for each supported cloud provider. These images are based on the latest version of Ubuntu and include a Julia installation of a recent stable version with all the packages needed to instantiate the clusters added and precompiled. Users can create customized images, possibly derived from the provided image, using their preferred version of Julia and adding the packages they need. -> [!NOTE] +> [!WARNING] > The version of Julia on the host computer using _CloudClusters.jl_ must be the same version as the image used to deploy the clusters. +> [!NOTE] +> The current prebuilt image for EC2 is located at the _us-east-1_ (North Virginia) region. Suppose the user is going to deploy a cluster in another region. In that case, they must create a copy of the image for that region in their account and assign their id to the ```imageid``` parameter of _CCConfig.toml_. + ### The _PlatformAware.jl_ package _CloudClusters.jl_ relies on an experimental package called [_PlatformAware.jl_](https://github.com/PlatformAwareProgramming/PlatformAware.jl) for the specification of _platform types_, aimed at specifying assumptions about architectural features of virtual machines instances. Indeed, _PlatformAware.jl_ may be used with _CloudClusters.jl_ to write functions specifically tuned according to the features of VM instances that comprise the clusters. This is called _platform-aware programming_. The users of _CloudClusters.jl_, particularly package developers, are invited to explore and use the ideas behind _PlatformAware.jl_. diff --git a/src/cluster_providers/ec2/ec2_backend.jl b/src/cluster_providers/ec2/ec2_backend.jl index f15f05d..d712884 100644 --- a/src/cluster_providers/ec2/ec2_backend.jl +++ b/src/cluster_providers/ec2/ec2_backend.jl @@ -47,7 +47,7 @@ mutable struct EC2PeerWorkers <: PeerWorkers # Cluster subnet_id::Union{String, Nothing} placement_group::Union{String, Nothing} auto_pg::Bool - security_group_id::Union{String,Nothing} + security_group_id::Union{String, Nothing} auto_sg::Bool environment::Union{SharedFSInfo, Nothing} cluster_nodes::Union{Dict{Symbol, String}, Nothing} @@ -63,7 +63,7 @@ mutable struct EC2PeerWorkersMPI <: PeerWorkersMPI # Cluster subnet_id::Union{String, Nothing} placement_group::Union{String, Nothing} auto_pg::Bool - security_group_id::Union{String,Nothing} + security_group_id::Union{String, Nothing} auto_sg::Bool environment::Union{SharedFSInfo, Nothing} cluster_nodes::Union{Dict{Symbol, String}, Nothing} diff --git a/src/cluster_providers/ec2/ec2_deploy.jl b/src/cluster_providers/ec2/ec2_deploy.jl index 2ce651d..476c107 100644 --- a/src/cluster_providers/ec2/ec2_deploy.jl +++ b/src/cluster_providers/ec2/ec2_deploy.jl @@ -61,7 +61,7 @@ function deploy_cluster(_::Type{AmazonEC2}, auto_sg, security_group_id = security_group_id == "automatic" ? (true, ec2_create_security_group(string("sgroup_", cluster_handle), "")) : (false, security_group_id) cluster = ec2_build_clusterobj(cluster_type, string(cluster_handle), instance_type, count, imageid, - subnet_id, placement_group, auto_pg, security_group_id, auto_sg, cluster_features) + subnet_id, placement_group, auto_pg, security_group_id, auto_sg, cluster_features) ec2_create_cluster(cluster) diff --git a/src/config/configs.jl b/src/config/configs.jl index b805dd0..88e9cbf 100644 --- a/src/config/configs.jl +++ b/src/config/configs.jl @@ -1,3 +1,4 @@ +using Downloads abstract type Localhost <: OnPremises end @@ -17,32 +18,30 @@ function readCCConfig(config_file::String) close(io) contents catch - default_location = "/etc/$config_file" + # system wide location try - # defaul system location + default_location = "/etc/$config_file" io = open(default_location) contents = read(io,String) close(io) contents catch - @info "The configuration file ($config_file) was not found." - - # dpf_path = @get_scratch!("default_platform_path") - # dpf_url = "https://raw.githubusercontent.com/PlatformAwareProgramming/PlatformAware.jl/manager/src/features/default/Platform.toml" - # dpf_fname = joinpath(dpf_path, basename(dpf_url)) - # try_download(dpf_url, dpf_fname) - - # read(dpf_fname,String) + # NOTHING TO DO end end - #@info "=====> $ccconfig_toml" if isnothing(ccconfig_toml) - @error "The configuration file ($config_file) was not found." - return nothing + @warn "A configuration file ($config_file) was not found. A default $config_file will be downloaded and copied to the current directory." + fetch_default_configuration_file(config_file) + return readCCConfig(config_file) + else + return TOML.parse(ccconfig_toml) end +end - TOML.parse(ccconfig_toml) +function fetch_default_configuration_file(config_file) + url = "https://raw.githubusercontent.com/PlatformAwareProgramming/CloudClusters.jl/refs/heads/main/CCconfig.toml" + Downloads.download(url, config_file) end function loadDefaults(_::Type{Provider}, ccconfig_dict) @@ -61,18 +60,15 @@ function loadDefaults(_::Type{Provider}, ccconfig_dict) return defaults_dict end -_providers = [Provider, Localhost, AmazonEC2, GoogleCloud] +_providers = [(Provider,"defaults"), (Localhost,"local"), (AmazonEC2, "ec2"), (GoogleCloud, "gcp")] defaults_dict = Dict() function load!() ccconfig_dict = readCCConfig("CCconfig.toml") - for provider_type in _providers - if !isnothing(ccconfig_dict) - defaults_dict[provider_type] = loadDefaults(provider_type, ccconfig_dict) - else - @error "Default configuration of $provider_type is empty" - end + for (provider_type, provider_key) in _providers + isempty(ccconfig_dict[provider_key]) && @warn "Default configuration of $provider_type ($provider_key) is empty" + defaults_dict[provider_type] = loadDefaults(provider_type, ccconfig_dict) end end diff --git a/src/deploy.jl b/src/deploy.jl index 82ec880..7244a4f 100644 --- a/src/deploy.jl +++ b/src/deploy.jl @@ -63,7 +63,6 @@ function cluster_deploy(contract_handle, config_args...) if !isnothing(pids) cluster_deploy_info[cluster_handle][:pids] = pids - @info "pids: $(cluster_deploy_info[cluster_handle][:pids])" return cluster_handle else @warn "error launching processes -- cluster will be terminated" @@ -200,6 +199,8 @@ function launch_processes_ssh(cluster_features, _::Type{<:ManagerWorkers}, ips) ntries += 1 end + @info "the entry process of this MW cluster has pid $(first(master_id))" + return master_id end @@ -246,6 +247,8 @@ function launch_processes_local(cluster_features, _::Type{<:ManagerWorkers}, ips ntries += 1 end + @info "the entry process of this MW cluster has pid $master_id" + return master_id end @@ -298,6 +301,8 @@ function launch_processes_ssh(cluster_features, _::Type{<:PeerWorkers}, ips) ntries += 1 end + @info "the worker processes of this PW cluster have pids $peer_ids" + return peer_ids end @@ -322,6 +327,8 @@ function launch_processes_local(cluster_features, _::Type{<:PeerWorkers}, ips) ntries += 1 end + @info "the worker processes of this PW local cluster have pids $peer_ids" + return peer_ids end @@ -356,6 +363,8 @@ function launch_processes_mpi(cluster_features, _::Type{<:PeerWorkersMPI}, ips) ntries += 1 end + @info "the worker processes of this PW-MPI cluster (local) have pids $peer_ids" + return peer_ids end @@ -436,7 +445,7 @@ function kill_processes(cluster_handle, _::Type{<:ManagerWorkers}, cluster_featu rmprocs(pids) end empty!(cluster_deploy_info[cluster_handle][:pids]) - @info "pids $pids removed (manager)" + !isempty(pids) && @info "pids $pids removed (manager)" end function kill_processes(cluster_handle, _::Type{<:PeerWorkers}, cluster_features) @@ -445,7 +454,7 @@ function kill_processes(cluster_handle, _::Type{<:PeerWorkers}, cluster_features rmprocs(pids) empty!(cluster_deploy_info[cluster_handle][:pids]) end - @info "pids $pids removed (peers)" + !isempty(pids) && @info "pids $pids removed (peers)" end diff --git a/src/macros.jl b/src/macros.jl index 79a4b62..117d9ae 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -7,13 +7,6 @@ function readFeatures(features) for f in features @assert f.head == :call @assert f.args[1] == :(=>) - #if arg.args[2] == :cluster_type - # cluster_type = arg.args[3] - # push!(common_features, Expr(:call, :(=>), :(:cluster_type), cluster_type) #= :cluster_type => cluster_type=#) - #elseif arg.args[2] == :node_provider - # cloud_provider = arg.args[3] - # push!(common_features, Expr(:call, :(=>), :(:node_provider), cloud_provider) #=:node_provider => cloud_provider=#) - #else if isa(f.args[2], Expr) @assert f.args[2].head == :. which_node = f.args[2].args[1] diff --git a/src/resolve.jl b/src/resolve.jl index ce8cae7..39c137e 100644 --- a/src/resolve.jl +++ b/src/resolve.jl @@ -7,6 +7,7 @@ function cluster_resolve(contract_handle) cluster_type, cluster_features = cluster_contract[contract_handle] cluster_resolve(cluster_type, cluster_features, contract_handle) + catch e println(e) return :fail @@ -16,8 +17,8 @@ end function cluster_resolve(_::Type{<:ManagerWorkers}, cluster_features, contract_handle) - !haskey(cluster_features, :manager_features) && @warn ":manager_features not specified" - !haskey(cluster_features, :worker_features) && @warn ":worker_features not specified" + !haskey(cluster_features, :manager_features) && @warn ":manager_features not explicitly specified" + !haskey(cluster_features, :worker_features) && @warn ":worker_features not explicitly specified" manager_features = Dict{Symbol,Any}(get(cluster_features, :manager_features, cluster_features)) worker_features = Dict{Symbol,Any}(get(cluster_features, :worker_features, cluster_features)) @@ -36,6 +37,9 @@ function cluster_resolve(_::Type{<:ManagerWorkers}, cluster_features, contract_h cluster_contract_resolved[contract_handle] = (instance_type_manager, instance_type_worker) + @info "$instance_type_manager of $node_provider selected for the manager node" + @info "$instance_type_worker of $node_provider selected for the worker nodes" + :manager_instance_type => instance_type_manager, :worker_instance_type => instance_type_worker end @@ -47,6 +51,8 @@ function cluster_resolve(_::Type{<:PeerWorkers}, cluster_features, contract_hand cluster_contract_resolved[contract_handle] = instance_type + @info "$instance_type of $node_provider selected for the peer nodes" + :instance_type => instance_type end @@ -65,8 +71,7 @@ function call_resolve(features) end end - str = resolve(resolve_args...) - return str + return resolve(resolve_args...) end #function resolve(provider::Type{<:EC2Cluster}, node_machinetype, node_memory_size, #=node_ecu_count,=# node_vcpus_count, accelerator_count, accelerator_type, accelerator_arch, accelerator, processor, processor_arch, storage_type, storage_size, interconnection_bandwidth) diff --git a/test/CCconfig.toml b/test/CCconfig.toml deleted file mode 100644 index ea8614b..0000000 --- a/test/CCconfig.toml +++ /dev/null @@ -1,25 +0,0 @@ - -[defaults] - -user = "ubuntu" -sshflags = "" -exename = "/home/ubuntu/.juliaup/bin/julia" -directory = "/home/ubuntu" -exeflags = "--optimize=3" -tunneled = true -threadlevel = "multiple" -mpiflags = "--map-by node --hostfile /home/ubuntu/hostfile" - -[local] - -directory = "." -mpiflags = "" - -[ec2] - -imageid = "ami-0ce455c31c5fec661" -#placement_group = "automatic" -security_group_id = "sg-039de945285a31f89" -subnet_id = "subnet-3fca5558" - -[gcp] \ No newline at end of file